diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 9e634919b..0c0dfe85e 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-09-15_10:17:55 +DATE: 2024-09-15_12:24:25 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.4828s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4768s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.38E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7657s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7582s + [COUNTERS] Fortran MEs ( 1 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1391s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1333s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2200s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2125s + [COUNTERS] Fortran MEs ( 1 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.4006s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3451s - [COUNTERS] Fortran MEs ( 1 ) : 0.0555s for 81920 events => throughput is 1.48E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7343s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6595s + [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 81920 events => throughput is 1.09E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1397s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1337s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.2319s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2242s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0074s for 8192 events => throughput is 1.11E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.4021s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3468s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0551s for 81920 events => throughput is 1.49E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.7234s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6509s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0723s for 81920 events => throughput is 1.13E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.507280e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.158974e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.538509e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.174895e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1374s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1339s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.2302s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2254s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.81E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3803s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3479s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 81920 events => throughput is 2.55E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.6926s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6487s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0436s for 81920 events => throughput is 1.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.636885e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.953684e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.726782e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.997304e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1362s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1335s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.20E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.2278s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2239s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.27E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3723s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3471s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 81920 events => throughput is 3.27E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.6872s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6539s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 81920 events => throughput is 2.48E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.379070e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.566849e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.566526e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.686525e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1364s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1338s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.36E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.2327s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2291s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.49E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3709s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3467s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0240s for 81920 events => throughput is 3.41E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.6890s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6567s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 81920 events => throughput is 2.55E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.588844e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.680421e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.779211e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.822232e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1349s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1329s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.42E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.2235s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2189s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.94E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,9 +489,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3660s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3480s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0178s for 81920 events => throughput is 4.59E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6871s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6460s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 81920 events => throughput is 2.00E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519892E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.812222e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.142328e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.161482e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.216839e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.6648s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6609s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 1.1813s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1726s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 81920 events => throughput is 1.01E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103909519892E-002) differ by less than 3E-14 (0.0) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.861787e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.181397e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.226375e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.783312e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.210512e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.795717e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.200044e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.159916e+08 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 9fb352517..a35b30e9f 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cpp512y + +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-09-15_10:17:59 +DATE: 2024-09-15_12:24:44 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.4822s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4763s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.38E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7477s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7400s + [COUNTERS] Fortran MEs ( 1 ) : 0.0076s for 8192 events => throughput is 1.07E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1397s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1337s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.38E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2210s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2132s + [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.4009s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3455s - [COUNTERS] Fortran MEs ( 1 ) : 0.0555s for 81920 events => throughput is 1.48E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7367s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6615s + [COUNTERS] Fortran MEs ( 1 ) : 0.0752s for 81920 events => throughput is 1.09E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432777382586498E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1383s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1332s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 8192 events => throughput is 1.64E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2351s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2281s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 8192 events => throughput is 1.21E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711091925143637E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3962s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3473s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0487s for 81920 events => throughput is 1.68E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.7204s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6521s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0681s for 81920 events => throughput is 1.20E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711091925143637E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.738826e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.224241e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.768762e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.240015e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432774839452045E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1358s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1336s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.97E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2246s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.91E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711089416628339E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3678s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3480s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0197s for 81920 events => throughput is 4.15E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6836s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6563s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 81920 events => throughput is 3.02E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089416628339E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.262921e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.177757e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.432171e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.232799e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1378s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1358s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.35E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2259s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2232s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.16E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3681s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3495s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 81920 events => throughput is 4.43E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6769s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6517s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 81920 events => throughput is 3.27E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.787139e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.494973e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.973182e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.587728e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1379s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1359s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.41E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2346s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2319s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.20E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3685s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0181s for 81920 events => throughput is 4.52E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6790s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6545s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0243s for 81920 events => throughput is 3.37E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.786048e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.557503e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.055785e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.521678e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,9 +454,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432778556608516E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1372s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1357s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0013s for 8192 events => throughput is 6.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2290s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2260s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.95E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711093118690828E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3626s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3500s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 81920 events => throughput is 6.60E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6805s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6543s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0261s for 81920 events => throughput is 3.14E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711093118690828E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.452110e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.370852e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.832047e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.638790e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432780016531851E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.6536s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6499s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.56E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432780016531851E-002) differ by less than 4E-4 (1.0203783951112655e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711094767039689E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 1.1007s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0925s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 81920 events => throughput is 1.07E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711094767039689E-002) differ by less than 4E-4 (9.968782199720749e-08) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.990359e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.155049e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.169145e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.016803e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.172418e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.059728e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.911760e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.715158e+08 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 1262c91dc..078c01c46 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - -make USEBUILDDIR=1 BACKEND=cppnone +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-09-15_10:18:04 +DATE: 2024-09-15_12:25:03 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.4833s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4774s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7519s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7443s + [COUNTERS] Fortran MEs ( 1 ) : 0.0076s for 8192 events => throughput is 1.08E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1395s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1336s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.38E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2218s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2141s + [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.07E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.4021s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3465s - [COUNTERS] Fortran MEs ( 1 ) : 0.0557s for 81920 events => throughput is 1.47E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7304s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6547s + [COUNTERS] Fortran MEs ( 1 ) : 0.0757s for 81920 events => throughput is 1.08E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,9 +134,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1333s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2209s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2132s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.10E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.4018s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3456s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0561s for 81920 events => throughput is 1.46E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.7244s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6512s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0730s for 81920 events => throughput is 1.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.483377e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.127620e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.524685e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.149889e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1367s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1333s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.55E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.2170s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2124s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0043s for 8192 events => throughput is 1.91E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3771s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3461s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0309s for 81920 events => throughput is 2.65E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.7028s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6603s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0422s for 81920 events => throughput is 1.94E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.736078e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.010964e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.850472e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.042848e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,9 +294,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1361s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1334s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.32E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2175s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2138s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.37E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3461s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0243s for 81920 events => throughput is 3.38E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.6838s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6506s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 81920 events => throughput is 2.49E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.504989e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.582216e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.708160e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.715007e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1356s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1330s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.42E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.2166s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2132s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.62E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3708s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3472s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0235s for 81920 events => throughput is 3.49E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.6865s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6535s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0327s for 81920 events => throughput is 2.51E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.663123e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.595546e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.862790e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.748668e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,9 +454,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1351s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1331s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.61E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2172s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2131s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3662s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3488s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 81920 events => throughput is 4.75E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6870s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6485s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0382s for 81920 events => throughput is 2.14E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.855008e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.209077e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.410500e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.339473e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432789437826970E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.6512s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6475s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789437826970E-002) differ by less than 2E-4 (1.1194101201539297e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711103901050417E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 1.0962s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0880s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 81920 events => throughput is 1.06E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103901050417E-002) differ by less than 2E-4 (9.234946141134515e-11) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.884543e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.177546e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.231479e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.854991e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.235273e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.864709e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.202609e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.123678e+08 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 3129a8923..96c9821e5 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-15_10:18:09 +DATE: 2024-09-15_12:25:22 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.5390s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5080s - [COUNTERS] Fortran MEs ( 1 ) : 0.0310s for 8192 events => throughput is 2.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8743s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8316s + [COUNTERS] Fortran MEs ( 1 ) : 0.0427s for 8192 events => throughput is 1.92E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2880s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2570s - [COUNTERS] Fortran MEs ( 1 ) : 0.0310s for 8192 events => throughput is 2.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4675s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4254s + [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.2513s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9411s - [COUNTERS] Fortran MEs ( 1 ) : 0.3102s for 81920 events => throughput is 2.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0472s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6157s + [COUNTERS] Fortran MEs ( 1 ) : 0.4316s for 81920 events => throughput is 1.90E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2909s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2561s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0346s for 8192 events => throughput is 2.37E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4948s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4483s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0461s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.2857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9400s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3455s for 81920 events => throughput is 2.37E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0844s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6223s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4617s for 81920 events => throughput is 1.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.367202e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.819761e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.453712e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.831048e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2742s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2553s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0186s for 8192 events => throughput is 4.40E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4464s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0258s for 8192 events => throughput is 3.18E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.1275s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1860s for 81920 events => throughput is 4.40E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8861s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6258s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2599s for 81920 events => throughput is 3.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.368153e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.274415e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.463371e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.309936e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2669s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2554s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0113s for 8192 events => throughput is 7.26E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4481s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4320s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0158s for 8192 events => throughput is 5.17E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.0558s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9437s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1119s for 81920 events => throughput is 7.32E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7336s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5761s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1571s for 81920 events => throughput is 5.21E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.505666e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.284458e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.566560e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.379827e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2666s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2553s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0110s for 8192 events => throughput is 7.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4531s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4383s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.67E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.0499s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9411s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1086s for 81920 events => throughput is 7.54E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7254s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5812s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1439s for 81920 events => throughput is 5.69E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.742435e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.785167e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.849789e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.877514e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034169] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2666s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2566s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0098s for 8192 events => throughput is 8.35E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4571s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4340s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.60E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.0404s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0981s for 81920 events => throughput is 8.35E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8048s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5784s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2261s for 81920 events => throughput is 3.62E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.094051e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.669033e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.185665e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.709076e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138611968034176] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.8678s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8638s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.58E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cuda (47.138611968034176) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144596232268178] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 2.0186s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0088s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 81920 events => throughput is 9.08E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.144596232268185) and cuda (47.144596232268178) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.691011e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.112364e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.690097e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.580203e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.646974e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.911031e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.660586e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.671503e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 85dbd5f70..332313b06 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-15_10:18:19 +DATE: 2024-09-15_12:25:51 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.5368s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5057s - [COUNTERS] Fortran MEs ( 1 ) : 0.0311s for 8192 events => throughput is 2.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8572s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8148s + [COUNTERS] Fortran MEs ( 1 ) : 0.0424s for 8192 events => throughput is 1.93E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2886s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2575s - [COUNTERS] Fortran MEs ( 1 ) : 0.0311s for 8192 events => throughput is 2.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4702s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4279s + [COUNTERS] Fortran MEs ( 1 ) : 0.0423s for 8192 events => throughput is 1.94E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.2508s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9402s - [COUNTERS] Fortran MEs ( 1 ) : 0.3106s for 81920 events => throughput is 2.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9993s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5780s + [COUNTERS] Fortran MEs ( 1 ) : 0.4213s for 81920 events => throughput is 1.94E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138606099989779] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2561s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0303s for 8192 events => throughput is 2.70E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4661s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4236s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144592707001024] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.2430s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9392s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3036s for 81920 events => throughput is 2.70E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0011s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5800s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4208s for 81920 events => throughput is 1.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144592707001024) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.692104e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.965272e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.760772e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.960936e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +214,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138602111070696] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2679s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2548s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4391s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4215s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0173s for 8192 events => throughput is 4.73E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +249,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144588828412729] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.0705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9411s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1291s for 81920 events => throughput is 6.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7527s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5796s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1729s for 81920 events => throughput is 4.74E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144588828412729) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.360230e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.736938e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.380129e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.756905e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,9 +294,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2627s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2558s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 8192 events => throughput is 1.21E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4356s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4246s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 8192 events => throughput is 7.59E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,9 +329,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.0097s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9422s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0674s for 81920 events => throughput is 1.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6648s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5733s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0913s for 81920 events => throughput is 8.98E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.270736e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.164017e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.280625e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.232442e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,9 +374,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2622s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2554s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4302s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4213s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.47E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,9 +409,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.0077s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0654s for 81920 events => throughput is 1.25E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6635s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5765s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0868s for 81920 events => throughput is 9.44E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.307149e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.838171e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.322628e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.798371e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138606840950104] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2601s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2544s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.49E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4367s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4239s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.54E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144591429357156] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 0.9950s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9401s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0547s for 81920 events => throughput is 1.50E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7012s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5808s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1202s for 81920 events => throughput is 6.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144591429357156) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.523094e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.784830e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.552541e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.151143e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138612402172164] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.8665s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8626s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.54E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cuda (47.138612402172164) differ by less than 4E-4 (9.209817353195149e-09) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144596666727985] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 2.0261s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0173s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 81920 events => throughput is 9.98E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.144596232268185) and cuda (47.144596666727985) differ by less than 4E-4 (9.215473939505614e-09) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.885710e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.367920e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.566831e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.323685e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.551074e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.318066e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.342254e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.899112e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index a63a65b56..690f140a4 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-15_10:18:29 +DATE: 2024-09-15_12:26:19 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.5381s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5070s - [COUNTERS] Fortran MEs ( 1 ) : 0.0310s for 8192 events => throughput is 2.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8603s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8180s + [COUNTERS] Fortran MEs ( 1 ) : 0.0423s for 8192 events => throughput is 1.93E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2905s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2595s - [COUNTERS] Fortran MEs ( 1 ) : 0.0310s for 8192 events => throughput is 2.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4773s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4333s + [COUNTERS] Fortran MEs ( 1 ) : 0.0440s for 8192 events => throughput is 1.86E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.2524s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9425s - [COUNTERS] Fortran MEs ( 1 ) : 0.3098s for 81920 events => throughput is 2.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9957s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5742s + [COUNTERS] Fortran MEs ( 1 ) : 0.4215s for 81920 events => throughput is 1.94E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613306947967] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2908s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2556s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0350s for 8192 events => throughput is 2.34E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4699s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4242s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0453s for 8192 events => throughput is 1.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597573367548] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.2912s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9411s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3499s for 81920 events => throughput is 2.34E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.0415s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5863s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4548s for 81920 events => throughput is 1.80E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367548) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.352933e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.842164e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.434549e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.851851e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +214,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613306947953] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2748s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2560s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0186s for 8192 events => throughput is 4.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4503s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4251s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0249s for 8192 events => throughput is 3.30E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +249,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597573367555] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.1270s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9410s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1858s for 81920 events => throughput is 4.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8185s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5698s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2483s for 81920 events => throughput is 3.30E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367555) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.370955e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.330961e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.525724e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.355752e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2660s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2546s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.34E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4396s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4241s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.39E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.0550s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9430s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1117s for 81920 events => throughput is 7.33E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7262s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5697s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1562s for 81920 events => throughput is 5.24E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.596830e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.311307e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.669903e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.353756e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2717s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2606s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0109s for 8192 events => throughput is 7.55E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4386s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4240s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0143s for 8192 events => throughput is 5.74E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.0500s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9414s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1084s for 81920 events => throughput is 7.56E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7199s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5781s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1415s for 81920 events => throughput is 5.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.812712e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.855269e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.899867e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.000340e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2664s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2565s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0097s for 8192 events => throughput is 8.48E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4473s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4249s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0220s for 8192 events => throughput is 3.72E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.0378s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9413s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0964s for 81920 events => throughput is 8.50E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8042s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5822s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2217s for 81920 events => throughput is 3.69E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.587101e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.705483e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.597330e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.752040e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138611963547788] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.8690s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8650s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.54E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cuda (47.138611963547788) differ by less than 2E-4 (9.517409083059647e-11) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144596232269095] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 2.0284s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0185s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 8.96E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.144596232268185) and cuda (47.144596232269095) differ by less than 2E-4 (1.9317880628477724e-14) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.739810e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.024140e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.672565e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.446976e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.658819e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.897824e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.677542e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.722978e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 6b50775a3..b1e24854d 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-09-15_10:18:39 +DATE: 2024-09-15_12:26:48 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.5107s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2571s - [COUNTERS] Fortran MEs ( 1 ) : 0.2536s for 8192 events => throughput is 3.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7657s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4327s + [COUNTERS] Fortran MEs ( 1 ) : 0.3330s for 8192 events => throughput is 2.46E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4948s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2413s - [COUNTERS] Fortran MEs ( 1 ) : 0.2536s for 8192 events => throughput is 3.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7371s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4051s + [COUNTERS] Fortran MEs ( 1 ) : 0.3320s for 8192 events => throughput is 2.47E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.6814s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1449s - [COUNTERS] Fortran MEs ( 1 ) : 2.5365s for 81920 events => throughput is 3.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2429s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9222s + [COUNTERS] Fortran MEs ( 1 ) : 3.3207s for 81920 events => throughput is 2.47E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5098s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2428s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2663s for 8192 events => throughput is 3.08E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.7536s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4055s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3470s for 8192 events => throughput is 2.36E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.8179s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1525s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.6646s for 81920 events => throughput is 3.07E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 5.3883s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9232s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4640s for 81920 events => throughput is 2.36E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.177192e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.429969e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.184161e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.453551e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3810s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2426s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1379s for 8192 events => throughput is 5.94E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.5849s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4022s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1821s for 8192 events => throughput is 4.50E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279650E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.5316s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1524s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3787s for 81920 events => throughput is 5.94E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 3.7388s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9277s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8104s for 81920 events => throughput is 4.52E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279650E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.053363e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.641238e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.098932e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.663272e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3088s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0664s for 8192 events => throughput is 1.23E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4942s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4031s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0906s for 8192 events => throughput is 9.05E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.8144s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1515s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6625s for 81920 events => throughput is 1.24E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.8146s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9105s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9035s for 81920 events => throughput is 9.07E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.262179e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.167207e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.259946e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.243572e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3036s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2419s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0614s for 8192 events => throughput is 1.33E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4829s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4018s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0806s for 8192 events => throughput is 1.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.7665s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6126s for 81920 events => throughput is 1.34E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.7304s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9177s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8121s for 81920 events => throughput is 1.01E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.379710e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.039882e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.378514e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.052704e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748581E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.2861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2422s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0436s for 8192 events => throughput is 1.88E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.5198s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4042s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1150s for 8192 events => throughput is 7.12E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.5866s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1519s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4344s for 81920 events => throughput is 1.89E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 3.0660s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9169s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1486s for 81920 events => throughput is 7.13E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.874300e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.941251e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.924270e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.958022e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.8509s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8384s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 8192 events => throughput is 8.59E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971656827279636E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.3784s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3496s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0258s for 81920 events => throughput is 3.18E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656827279636E-002) differ by less than 3E-14 (4.440892098500626e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.100979e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.403743e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.183615e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.166273e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.191972e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.170848e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.176434e+06 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.659019e+06 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index dc1e5729c..985d93495 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-09-15_10:18:59 +DATE: 2024-09-15_12:27:32 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.5107s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2571s - [COUNTERS] Fortran MEs ( 1 ) : 0.2536s for 8192 events => throughput is 3.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7648s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4325s + [COUNTERS] Fortran MEs ( 1 ) : 0.3323s for 8192 events => throughput is 2.47E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4972s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2434s - [COUNTERS] Fortran MEs ( 1 ) : 0.2538s for 8192 events => throughput is 3.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7338s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4033s + [COUNTERS] Fortran MEs ( 1 ) : 0.3306s for 8192 events => throughput is 2.48E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.6821s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1462s - [COUNTERS] Fortran MEs ( 1 ) : 2.5359s for 81920 events => throughput is 3.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2246s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9116s + [COUNTERS] Fortran MEs ( 1 ) : 3.3130s for 81920 events => throughput is 2.47E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471473453718410E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4970s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2419s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2544s for 8192 events => throughput is 3.22E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.7384s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4045s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3330s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971643267110940E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.6983s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1535s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.5441s for 81920 events => throughput is 3.22E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 5.2604s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9235s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.3360s for 81920 events => throughput is 2.46E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971643267110940E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.293248e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.530952e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.301303e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.541703e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471459294758378E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3180s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2411s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0765s for 8192 events => throughput is 1.07E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.5085s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4061s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1020s for 8192 events => throughput is 8.03E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971629726281482E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.9202s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1509s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7690s for 81920 events => throughput is 1.07E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.9524s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9325s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0195s for 81920 events => throughput is 8.04E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629726281482E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.094903e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.224767e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.096860e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.232784e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.2777s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2428s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0347s for 8192 events => throughput is 2.36E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4560s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0472s for 8192 events => throughput is 1.74E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.4980s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1517s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3461s for 81920 events => throughput is 2.37E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.4007s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9344s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4660s for 81920 events => throughput is 1.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.432973e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.788351e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.436260e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.804243e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.2743s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 8192 events => throughput is 2.57E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4502s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0424s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.4697s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1508s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3187s for 81920 events => throughput is 2.57E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.3548s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9286s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4259s for 81920 events => throughput is 1.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.660651e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.953582e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.665521e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.958777e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471471932611128E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.2637s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2425s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.92E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4685s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4128s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0553s for 8192 events => throughput is 1.48E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971639934306102E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.3616s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1528s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2087s for 81920 events => throughput is 3.93E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.4956s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9320s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5632s for 81920 events => throughput is 1.45E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971639934306102E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.992666e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.487964e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.024893e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.471980e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471475012321185E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.8512s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8465s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.26E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471475012321185E-002) differ by less than 4E-4 (1.375968260441951e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971648932322295E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.3665s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3527s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0128s for 81920 events => throughput is 6.42E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971648932322295E-002) differ by less than 4E-4 (9.872194262072753e-08) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.643032e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.828890e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.215885e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.209299e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.230792e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.279914e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.122919e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.244670e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 29b017a03..f09398c49 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-09-15_10:19:18 +DATE: 2024-09-15_12:28:12 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.5114s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2576s - [COUNTERS] Fortran MEs ( 1 ) : 0.2538s for 8192 events => throughput is 3.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7588s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4272s + [COUNTERS] Fortran MEs ( 1 ) : 0.3316s for 8192 events => throughput is 2.47E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4980s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2433s - [COUNTERS] Fortran MEs ( 1 ) : 0.2547s for 8192 events => throughput is 3.22E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7376s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4040s + [COUNTERS] Fortran MEs ( 1 ) : 0.3336s for 8192 events => throughput is 2.46E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.6818s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1461s - [COUNTERS] Fortran MEs ( 1 ) : 2.5357s for 81920 events => throughput is 3.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2380s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9161s + [COUNTERS] Fortran MEs ( 1 ) : 3.3219s for 81920 events => throughput is 2.47E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471486590207584E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5144s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2426s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2710s for 8192 events => throughput is 3.02E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.7582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4046s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3525s for 8192 events => throughput is 2.32E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971657589635384E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.8657s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1557s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.7093s for 81920 events => throughput is 3.02E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 5.4849s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9415s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5423s for 81920 events => throughput is 2.31E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589635384E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.127982e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.417569e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.131118e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.399198e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471486540430027E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3790s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2417s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1368s for 8192 events => throughput is 5.99E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.5837s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4040s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1790s for 8192 events => throughput is 4.58E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971657589963913E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.5228s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1526s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3697s for 81920 events => throughput is 5.98E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 3.7197s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9173s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8018s for 81920 events => throughput is 4.55E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589963913E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.171012e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.658424e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.189290e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.679651e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3073s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2415s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0655s for 8192 events => throughput is 1.25E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4035s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0912s for 8192 events => throughput is 8.98E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.8088s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1533s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6552s for 81920 events => throughput is 1.25E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.8060s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8973s for 81920 events => throughput is 9.13E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.288559e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.369001e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.288102e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.292033e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3037s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2432s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0602s for 8192 events => throughput is 1.36E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4048s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0792s for 8192 events => throughput is 1.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.7679s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1657s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6019s for 81920 events => throughput is 1.36E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.7172s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9239s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7928s for 81920 events => throughput is 1.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.398863e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.064357e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.397119e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.063711e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471486537749241E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.2931s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2444s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0483s for 8192 events => throughput is 1.70E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.5230s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4057s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1167s for 8192 events => throughput is 7.02E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971657565670345E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.6202s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1655s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4543s for 81920 events => throughput is 1.80E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 3.1033s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9248s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1778s for 81920 events => throughput is 6.96E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657565670345E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.842584e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.025965e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.846890e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.997215e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471485791426987E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.8520s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8395s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0096s for 8192 events => throughput is 8.58E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485791426987E-002) differ by less than 2E-4 (2.334807902570901e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971656830583548E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.3869s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3578s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0261s for 81920 events => throughput is 3.14E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656830583548E-002) differ by less than 2E-4 (4.131384123695625e-11) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.075089e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.284373e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.113955e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.151597e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.103058e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.161281e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.097615e+06 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.632131e+06 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 8cb2c7795..aa79e7413 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-09-15_10:19:38 +DATE: 2024-09-15_12:28:56 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 3.6389s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1856s - [COUNTERS] Fortran MEs ( 1 ) : 3.4533s for 8192 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6228s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3217s + [COUNTERS] Fortran MEs ( 1 ) : 4.3011s for 8192 events => throughput is 1.90E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.6370s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1841s - [COUNTERS] Fortran MEs ( 1 ) : 3.4530s for 8192 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6030s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3119s + [COUNTERS] Fortran MEs ( 1 ) : 4.2911s for 8192 events => throughput is 1.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 35.8417s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2807s - [COUNTERS] Fortran MEs ( 1 ) : 34.5609s for 81920 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 45.1984s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1366s + [COUNTERS] Fortran MEs ( 1 ) : 43.0618s for 81920 events => throughput is 1.90E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.6163s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1858s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4238s for 8192 events => throughput is 2.39E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0067s + [COUNTERS] PROGRAM TOTAL : 4.7927s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3128s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4709s for 8192 events => throughput is 1.83E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0091s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 35.5344s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2858s - [COUNTERS] CudaCpp MEs ( 2 ) : 34.2418s for 81920 events => throughput is 2.39E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0068s + [COUNTERS] PROGRAM TOTAL : 46.9448s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1256s + [COUNTERS] CudaCpp MEs ( 2 ) : 44.8103s for 81920 events => throughput is 1.83E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.465377e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.891751e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.466598e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.888267e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.9921s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1852s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8034s for 8192 events => throughput is 4.54E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0036s + [COUNTERS] PROGRAM TOTAL : 2.6844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3118s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3678s for 8192 events => throughput is 3.46E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248325] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 19.3435s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2813s - [COUNTERS] CudaCpp MEs ( 2 ) : 18.0587s for 81920 events => throughput is 4.54E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0036s + [COUNTERS] PROGRAM TOTAL : 25.9231s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1299s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.7883s for 81920 events => throughput is 3.44E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248325) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.772852e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.557169e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.776874e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.534554e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.9300s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1866s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7418s for 8192 events => throughput is 1.10E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s + [COUNTERS] PROGRAM TOTAL : 1.3455s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3111s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0319s for 8192 events => throughput is 7.94E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 8.7116s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2834s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.4266s for 81920 events => throughput is 1.10E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s + [COUNTERS] PROGRAM TOTAL : 12.5238s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1221s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.3993s for 81920 events => throughput is 7.88E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.141756e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.957412e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.139650e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.987255e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.8526s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1859s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6652s for 8192 events => throughput is 1.23E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 1.2470s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3177s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9271s for 8192 events => throughput is 8.84E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 7.9428s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2816s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.6596s for 81920 events => throughput is 1.23E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [COUNTERS] PROGRAM TOTAL : 11.3142s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1103s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.2017s for 81920 events => throughput is 8.90E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.245759e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.281561e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.259340e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.268453e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.5982s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1851s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4120s for 8192 events => throughput is 1.99E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 1.4928s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3122s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1778s for 8192 events => throughput is 6.96E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 5.4000s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2858s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1131s for 81920 events => throughput is 1.99E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 13.9855s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1285s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8543s for 81920 events => throughput is 6.91E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.031722e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.050033e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.022570e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.018224e+03 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 0.8261s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7522s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0390s for 8192 events => throughput is 2.10E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0349s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930257969248336] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 2.9166s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5498s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3320s for 81920 events => throughput is 2.47E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20930257969248323) and cuda (0.20930257969248336) differ by less than 3E-14 (6.661338147750939e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.134189e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.335722e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.124137e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.168234e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.119614e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.173416e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.119318e+05 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.423891e+05 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 69b8f70a1..4deca2cf0 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-09-15_10:22:01 +DATE: 2024-09-15_12:32:48 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 3.6375s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1857s - [COUNTERS] Fortran MEs ( 1 ) : 3.4517s for 8192 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6182s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3167s + [COUNTERS] Fortran MEs ( 1 ) : 4.3014s for 8192 events => throughput is 1.90E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.6392s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1859s - [COUNTERS] Fortran MEs ( 1 ) : 3.4533s for 8192 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3142s + [COUNTERS] Fortran MEs ( 1 ) : 4.2980s for 8192 events => throughput is 1.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 35.8488s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2802s - [COUNTERS] Fortran MEs ( 1 ) : 34.5686s for 81920 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 45.2847s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1346s + [COUNTERS] Fortran MEs ( 1 ) : 43.1501s for 81920 events => throughput is 1.90E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144941544531159] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.5758s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1853s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.3839s for 8192 events => throughput is 2.42E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0066s + [COUNTERS] PROGRAM TOTAL : 4.6745s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3147s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3511s for 8192 events => throughput is 1.88E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0087s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -157,7 +162,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -165,10 +170,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930329135137288] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 35.1018s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2803s - [COUNTERS] CudaCpp MEs ( 2 ) : 33.8149s for 81920 events => throughput is 2.42E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0066s + [COUNTERS] PROGRAM TOTAL : 45.6898s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1650s + [COUNTERS] CudaCpp MEs ( 2 ) : 43.5163s for 81920 events => throughput is 1.88E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +184,14 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930329135137288) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.512008e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.944338e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.513779e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.941508e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +207,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,10 +215,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144937378275385] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.0417s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1850s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8548s for 8192 events => throughput is 9.58E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [COUNTERS] PROGRAM TOTAL : 1.4963s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3103s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1836s for 8192 events => throughput is 6.92E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -237,7 +242,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -245,10 +250,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930324959819654] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 9.8475s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2839s - [COUNTERS] CudaCpp MEs ( 2 ) : 8.5618s for 81920 events => throughput is 9.57E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [COUNTERS] PROGRAM TOTAL : 14.1993s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1207s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.0762s for 81920 events => throughput is 6.78E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -259,14 +264,14 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930324959819654) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.751456e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.996317e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.769230e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.839536e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -282,7 +287,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -290,10 +295,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.5655s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1856s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3790s for 8192 events => throughput is 2.16E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 0.8442s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3095s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5334s for 8192 events => throughput is 1.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -317,7 +322,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -325,10 +330,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 5.0875s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2821s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8046s for 81920 events => throughput is 2.15E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 7.3660s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1115s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.2532s for 81920 events => throughput is 1.56E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -339,14 +344,14 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.244229e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.604925e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.243048e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.604030e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,7 +367,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -370,10 +375,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.5250s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1852s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3390s for 8192 events => throughput is 2.42E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.7886s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3135s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4739s for 8192 events => throughput is 1.73E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -397,7 +402,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -405,10 +410,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 4.6709s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2800s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.3901s for 81920 events => throughput is 2.42E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 6.8107s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1081s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.7015s for 81920 events => throughput is 1.74E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -419,14 +424,14 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.486418e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.805124e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.487427e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.796242e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -442,7 +447,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -450,10 +455,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144947551388249] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.3911s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1860s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2045s for 8192 events => throughput is 4.01E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.8988s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3122s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5852s for 8192 events => throughput is 1.40E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -477,7 +482,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -485,10 +490,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930331717025510] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 3.3221s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2826s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.0389s for 81920 events => throughput is 4.02E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 8.0322s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1227s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.9080s for 81920 events => throughput is 1.39E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -499,17 +504,125 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930331717025510) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.103869e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.400554e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.115301e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.422518e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144955535316123] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 0.7991s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7473s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0272s for 8192 events => throughput is 3.01E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0246s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cuda (0.33144955535316123) differ by less than 4E-4 (5.0980589545446264e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930336562619947] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 2.7864s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5289s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2329s for 81920 events => throughput is 3.52E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0245s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20930257969248323) and cuda (0.20930336562619947) differ by less than 4E-4 (3.755012085271403e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.087446e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.366930e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.074769e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.213607e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.112109e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.224381e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.063345e+05 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.392863e+05 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 43695b146..abdc3e698 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-09-15_10:23:59 +DATE: 2024-09-15_12:35:52 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 3.6403s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1861s - [COUNTERS] Fortran MEs ( 1 ) : 3.4543s for 8192 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6491s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3163s + [COUNTERS] Fortran MEs ( 1 ) : 4.3328s for 8192 events => throughput is 1.89E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.6393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1849s - [COUNTERS] Fortran MEs ( 1 ) : 3.4544s for 8192 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6308s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3143s + [COUNTERS] Fortran MEs ( 1 ) : 4.3165s for 8192 events => throughput is 1.90E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 35.8371s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2771s - [COUNTERS] Fortran MEs ( 1 ) : 34.5599s for 81920 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 45.2197s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1385s + [COUNTERS] Fortran MEs ( 1 ) : 43.0812s for 81920 events => throughput is 1.90E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786734542164] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.6408s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1868s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4472s for 8192 events => throughput is 2.38E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0068s + [COUNTERS] PROGRAM TOTAL : 4.8483s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3147s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.5249s for 8192 events => throughput is 1.81E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0088s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930258048084049] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 35.7748s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2842s - [COUNTERS] CudaCpp MEs ( 2 ) : 34.4838s for 81920 events => throughput is 2.38E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0068s + [COUNTERS] PROGRAM TOTAL : 47.5178s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1281s + [COUNTERS] CudaCpp MEs ( 2 ) : 45.3807s for 81920 events => throughput is 1.81E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0090s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258048084049) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.445570e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.872546e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.445685e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.871624e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786651655289] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 2.0235s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1854s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8343s for 8192 events => throughput is 4.47E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0038s + [COUNTERS] PROGRAM TOTAL : 2.7061s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3125s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3887s for 8192 events => throughput is 3.43E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0049s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930258019984904] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 19.6344s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2821s - [COUNTERS] CudaCpp MEs ( 2 ) : 18.3486s for 81920 events => throughput is 4.46E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0037s + [COUNTERS] PROGRAM TOTAL : 26.0597s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1243s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.9304s for 81920 events => throughput is 3.42E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019984904) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.600154e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.500917e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.606807e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.521414e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.9237s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1859s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7361s for 8192 events => throughput is 1.11E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s + [COUNTERS] PROGRAM TOTAL : 1.3556s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3120s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0411s for 8192 events => throughput is 7.87E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 8.6530s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2833s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.3680s for 81920 events => throughput is 1.11E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s + [COUNTERS] PROGRAM TOTAL : 12.5367s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1235s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.4107s for 81920 events => throughput is 7.87E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.148415e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.110548e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.146884e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.032234e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.8515s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1848s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6652s for 8192 events => throughput is 1.23E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 1.2161s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9010s for 8192 events => throughput is 9.09E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 7.9411s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2850s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.6545s for 81920 events => throughput is 1.23E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 11.1682s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1205s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.0456s for 81920 events => throughput is 9.06E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.264067e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.335833e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.263632e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.254922e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.5992s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1861s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4120s for 8192 events => throughput is 1.99E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 1.5079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3120s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1930s for 8192 events => throughput is 6.87E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 5.4127s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2871s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1246s for 81920 events => throughput is 1.99E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 14.0414s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1292s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9095s for 81920 events => throughput is 6.88E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.019924e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.939493e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.012964e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.983445e+03 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144786533876569] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 0.8281s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7547s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0386s for 8192 events => throughput is 2.12E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786533876569) differ by less than 2E-4 (8.255786054789382e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930258003933860] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 2.9070s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5436s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3289s for 81920 events => throughput is 2.49E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0345s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20930257969248323) and cuda (0.20930258003933860) differ by less than 2E-4 (1.6571959360334176e-09) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.146362e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.341729e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.115354e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.164001e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.119722e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.169184e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.126511e+05 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.426812e+05 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index e93f033db..22c56c63c 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg + + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z - -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-09-15_10:26:59 +DATE: 2024-09-15_12:41:22 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 85.1636s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3301s - [COUNTERS] Fortran MEs ( 1 ) : 84.8335s for 8192 events => throughput is 9.66E+01 events/s + [COUNTERS] PROGRAM TOTAL : 102.2091s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5630s + [COUNTERS] Fortran MEs ( 1 ) : 101.6461s for 8192 events => throughput is 8.06E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 85.4874s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3332s - [COUNTERS] Fortran MEs ( 1 ) : 85.1541s for 8192 events => throughput is 9.62E+01 events/s + [COUNTERS] PROGRAM TOTAL : 102.3065s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5557s + [COUNTERS] Fortran MEs ( 1 ) : 101.7508s for 8192 events => throughput is 8.05E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 853.8954s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8093s - [COUNTERS] Fortran MEs ( 1 ) : 851.0861s for 81920 events => throughput is 9.63E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1015.5197s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6072s + [COUNTERS] Fortran MEs ( 1 ) : 1010.9125s for 81920 events => throughput is 8.10E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 97.2299s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3315s - [COUNTERS] CudaCpp MEs ( 2 ) : 96.7375s for 8192 events => throughput is 8.47E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1609s + [COUNTERS] PROGRAM TOTAL : 121.0029s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5478s + [COUNTERS] CudaCpp MEs ( 2 ) : 120.2529s for 8192 events => throughput is 6.81E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2022s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,36 +161,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633778E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633775E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 970.4783s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7979s - [COUNTERS] CudaCpp MEs ( 2 ) : 967.5190s for 81920 events => throughput is 8.47E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1614s + [COUNTERS] PROGRAM TOTAL : 1237.7089s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5156s + [COUNTERS] CudaCpp MEs ( 2 ) : 1232.9854s for 81920 events => throughput is 6.64E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2079s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633778E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633775E-007) differ by less than 3E-14 (1.5543122344752192e-15) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.972116e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.635196e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.975555e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.680081e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 51.3031s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3310s - [COUNTERS] CudaCpp MEs ( 2 ) : 50.8866s for 8192 events => throughput is 1.61E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0855s + [COUNTERS] PROGRAM TOTAL : 62.7563s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5430s + [COUNTERS] CudaCpp MEs ( 2 ) : 62.1127s for 8192 events => throughput is 1.32E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1006s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 510.1480s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8000s - [COUNTERS] CudaCpp MEs ( 2 ) : 507.2631s for 81920 events => throughput is 1.61E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0849s + [COUNTERS] PROGRAM TOTAL : 620.1444s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5224s + [COUNTERS] CudaCpp MEs ( 2 ) : 615.5209s for 81920 events => throughput is 1.33E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1011s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.889075e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.588919e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.885260e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.592321e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 23.6143s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3315s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.2446s for 8192 events => throughput is 3.52E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0382s + [COUNTERS] PROGRAM TOTAL : 29.4160s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5466s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.8206s for 8192 events => throughput is 2.84E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0487s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 235.9347s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7916s - [COUNTERS] CudaCpp MEs ( 2 ) : 233.1048s for 81920 events => throughput is 3.51E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0383s + [COUNTERS] PROGRAM TOTAL : 297.3512s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5469s + [COUNTERS] CudaCpp MEs ( 2 ) : 292.7572s for 81920 events => throughput is 2.80E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0471s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.226799e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.275338e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.217375e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.265860e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 21.0572s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3306s - [COUNTERS] CudaCpp MEs ( 2 ) : 20.6933s for 8192 events => throughput is 3.96E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0333s + [COUNTERS] PROGRAM TOTAL : 27.1464s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9932s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.1100s for 8192 events => throughput is 3.14E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0432s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 209.8953s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7916s - [COUNTERS] CudaCpp MEs ( 2 ) : 207.0704s for 81920 events => throughput is 3.96E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0333s + [COUNTERS] PROGRAM TOTAL : 264.6969s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5790s + [COUNTERS] CudaCpp MEs ( 2 ) : 260.0758s for 81920 events => throughput is 3.15E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0421s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.857162e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.866714e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.854648e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.924843e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 11.8094s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3305s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.4599s for 8192 events => throughput is 7.15E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0191s + [COUNTERS] PROGRAM TOTAL : 26.5115s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9718s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.4924s for 8192 events => throughput is 3.21E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0472s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 118.0329s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7967s - [COUNTERS] CudaCpp MEs ( 2 ) : 115.2172s for 81920 events => throughput is 7.11E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0191s + [COUNTERS] PROGRAM TOTAL : 259.7252s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4963s + [COUNTERS] CudaCpp MEs ( 2 ) : 255.1820s for 81920 events => throughput is 3.21E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0469s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.531017e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.450525e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.556231e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.456130e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 3.2899s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0907s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1026s for 8192 events => throughput is 7.43E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0966s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633791E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 16.9895s + [COUNTERS] Fortran Overhead ( 0 ) : 5.0379s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.8621s for 81920 events => throughput is 7.54E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0895s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713115633791E-007) differ by less than 3E-14 (2.220446049250313e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.484119e+03 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.258297e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.228572e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.575774e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.255424e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.453521e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.234529e+03 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.253800e+03 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index e1ff2cc6a..ecfc5d7b2 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + +make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-09-15_11:27:19 +DATE: 2024-09-15_13:59:30 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 85.4150s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3299s - [COUNTERS] Fortran MEs ( 1 ) : 85.0852s for 8192 events => throughput is 9.63E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.5343s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5565s + [COUNTERS] Fortran MEs ( 1 ) : 100.9778s for 8192 events => throughput is 8.11E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 85.1963s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3321s - [COUNTERS] Fortran MEs ( 1 ) : 84.8642s for 8192 events => throughput is 9.65E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.8885s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5530s + [COUNTERS] Fortran MEs ( 1 ) : 101.3354s for 8192 events => throughput is 8.08E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 855.8242s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8112s - [COUNTERS] Fortran MEs ( 1 ) : 853.0129s for 81920 events => throughput is 9.60E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1019.5275s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6107s + [COUNTERS] Fortran MEs ( 1 ) : 1014.9167s for 81920 events => throughput is 8.07E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -122,22 +127,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575849458995581E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575849446922190E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 95.0312s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3310s - [COUNTERS] CudaCpp MEs ( 2 ) : 94.5438s for 8192 events => throughput is 8.66E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1564s + [COUNTERS] PROGRAM TOTAL : 113.9851s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5716s + [COUNTERS] CudaCpp MEs ( 2 ) : 113.2257s for 8192 events => throughput is 7.24E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1878s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575849458995581E-007) differ by less than 4E-4 (0.000139480289658378) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575849446922190E-007) differ by less than 4E-4 (0.00013947977747852391) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -158,36 +163,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845954415798603E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2845954405861011E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 946.5156s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7954s - [COUNTERS] CudaCpp MEs ( 2 ) : 943.5635s for 81920 events => throughput is 8.68E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1567s + [COUNTERS] PROGRAM TOTAL : 1127.1766s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5299s + [COUNTERS] CudaCpp MEs ( 2 ) : 1122.4594s for 81920 events => throughput is 7.30E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1874s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845954415798603E-007) differ by less than 4E-4 (0.0001418964616179963) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845954405861011E-007) differ by less than 4E-4 (0.00014189602657355138) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.025814e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.606058e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.024607e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.616772e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,7 +209,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -212,10 +217,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.358e-07 [2.3575845178322101E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 23.0583s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3311s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.6890s for 8192 events => throughput is 3.61E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0383s + [COUNTERS] PROGRAM TOTAL : 28.4233s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5609s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.8152s for 8192 events => throughput is 2.95E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0471s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -240,7 +245,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -248,10 +253,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.285e-07 [2.2845949484525033E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 229.7905s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7946s - [COUNTERS] CudaCpp MEs ( 2 ) : 226.9576s for 81920 events => throughput is 3.61E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0383s + [COUNTERS] PROGRAM TOTAL : 283.7955s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5114s + [COUNTERS] CudaCpp MEs ( 2 ) : 279.2380s for 81920 events => throughput is 2.93E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0461s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -262,14 +267,14 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845949484525033E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.194420e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.393625e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.194510e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.397500e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,7 +291,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -294,10 +299,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 11.9493s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3314s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.5988s for 8192 events => throughput is 7.06E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0192s + [COUNTERS] PROGRAM TOTAL : 14.8014s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5481s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.2298s for 8192 events => throughput is 5.76E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0235s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -322,7 +327,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -330,10 +335,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 118.8359s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7884s - [COUNTERS] CudaCpp MEs ( 2 ) : 116.0284s for 81920 events => throughput is 7.06E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0192s + [COUNTERS] PROGRAM TOTAL : 148.1207s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4972s + [COUNTERS] CudaCpp MEs ( 2 ) : 143.5998s for 81920 events => throughput is 5.70E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0237s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -344,14 +349,14 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.460886e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.850524e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.472482e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.880069e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -368,7 +373,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -376,10 +381,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 10.7288s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3311s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.3808s for 8192 events => throughput is 7.89E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0169s + [COUNTERS] PROGRAM TOTAL : 13.6509s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5589s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.0709s for 8192 events => throughput is 6.27E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0212s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -404,7 +409,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -412,10 +417,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 106.7217s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7934s - [COUNTERS] CudaCpp MEs ( 2 ) : 103.9115s for 81920 events => throughput is 7.88E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0168s + [COUNTERS] PROGRAM TOTAL : 135.4503s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5206s + [COUNTERS] CudaCpp MEs ( 2 ) : 130.9090s for 81920 events => throughput is 6.26E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0207s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -426,14 +431,14 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.658562e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.634471e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.654315e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.589322e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -450,7 +455,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -458,10 +463,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.358e-07 [2.3575850859831750E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 6.1095s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3307s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7691s for 8192 events => throughput is 1.42E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0096s + [COUNTERS] PROGRAM TOTAL : 13.3326s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5808s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.7254s for 8192 events => throughput is 6.44E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0265s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -486,7 +491,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -494,10 +499,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.285e-07 [2.2845946568145136E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 59.8894s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7905s - [COUNTERS] CudaCpp MEs ( 2 ) : 57.0891s for 81920 events => throughput is 1.43E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0097s + [COUNTERS] PROGRAM TOTAL : 131.7116s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5046s + [COUNTERS] CudaCpp MEs ( 2 ) : 127.1838s for 81920 events => throughput is 6.44E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0233s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -508,17 +513,125 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845946568145136E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.704046e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.895916e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.702005e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.878269e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.358e-07 [2.3575862304433055E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 2.1535s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0557s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5385s for 8192 events => throughput is 1.52E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5593s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3575862304433055E-007) differ by less than 4E-4 (0.00014002522141920437) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.285e-07 [2.2845959888250639E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 10.9212s + [COUNTERS] Fortran Overhead ( 0 ) : 5.0072s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.3407s for 81920 events => throughput is 1.53E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5732s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2845959888250639E-007) differ by less than 4E-4 (0.0001421360326359089) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.542523e+04 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.546779e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.146798e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.138507e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.102140e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.148371e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.144042e+04 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.992297e+03 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 7c77c3b01..f052f133a 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg + +make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - +make USEBUILDDIR=1 BACKEND=cpp512z make USEBUILDDIR=1 BACKEND=cpp512y -make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-09-15_12:15:22 +DATE: 2024-09-15_14:59:11 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 85.5598s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3310s - [COUNTERS] Fortran MEs ( 1 ) : 85.2288s for 8192 events => throughput is 9.61E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.7559s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5531s + [COUNTERS] Fortran MEs ( 1 ) : 101.2028s for 8192 events => throughput is 8.09E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 85.3601s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3311s - [COUNTERS] Fortran MEs ( 1 ) : 85.0289s for 8192 events => throughput is 9.63E+01 events/s + [COUNTERS] PROGRAM TOTAL : 102.2220s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5595s + [COUNTERS] Fortran MEs ( 1 ) : 101.6626s for 8192 events => throughput is 8.06E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 855.9979s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8058s - [COUNTERS] Fortran MEs ( 1 ) : 853.1920s for 81920 events => throughput is 9.60E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1016.2987s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6111s + [COUNTERS] Fortran MEs ( 1 ) : 1011.6876s for 81920 events => throughput is 8.10E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561678995975E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 98.1116s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3305s - [COUNTERS] CudaCpp MEs ( 2 ) : 97.6185s for 8192 events => throughput is 8.39E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1626s + [COUNTERS] PROGRAM TOTAL : 121.8737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5730s + [COUNTERS] CudaCpp MEs ( 2 ) : 121.0897s for 8192 events => throughput is 6.77E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2110s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713238614534E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 979.4974s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7958s - [COUNTERS] CudaCpp MEs ( 2 ) : 976.5386s for 81920 events => throughput is 8.39E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1631s + [COUNTERS] PROGRAM TOTAL : 1278.2990s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5229s + [COUNTERS] CudaCpp MEs ( 2 ) : 1273.5682s for 81920 events => throughput is 6.43E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2078s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713238614534E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.842043e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.297738e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.852946e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.358504e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561701257335E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 49.1998s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3326s - [COUNTERS] CudaCpp MEs ( 2 ) : 48.7860s for 8192 events => throughput is 1.68E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0813s + [COUNTERS] PROGRAM TOTAL : 67.0430s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5843s + [COUNTERS] CudaCpp MEs ( 2 ) : 66.3507s for 8192 events => throughput is 1.23E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1079s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713242471448E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 491.2214s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8070s - [COUNTERS] CudaCpp MEs ( 2 ) : 488.3332s for 81920 events => throughput is 1.68E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0813s + [COUNTERS] PROGRAM TOTAL : 651.0804s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5257s + [COUNTERS] CudaCpp MEs ( 2 ) : 646.4467s for 81920 events => throughput is 1.27E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1080s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713242471448E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.980645e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.537162e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.980273e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.537881e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 22.8660s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3315s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.4978s for 8192 events => throughput is 3.64E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0367s + [COUNTERS] PROGRAM TOTAL : 28.5211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5700s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.9053s for 8192 events => throughput is 2.94E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0458s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 227.8033s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7920s - [COUNTERS] CudaCpp MEs ( 2 ) : 224.9746s for 81920 events => throughput is 3.64E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0367s + [COUNTERS] PROGRAM TOTAL : 283.4125s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4921s + [COUNTERS] CudaCpp MEs ( 2 ) : 278.8756s for 81920 events => throughput is 2.94E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0448s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.403307e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.574406e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.396211e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.562894e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 20.3039s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3306s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.9415s for 8192 events => throughput is 4.11E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0318s + [COUNTERS] PROGRAM TOTAL : 24.8698s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5658s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.2646s for 8192 events => throughput is 3.38E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0395s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 202.0128s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7903s - [COUNTERS] CudaCpp MEs ( 2 ) : 199.1907s for 81920 events => throughput is 4.11E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0318s + [COUNTERS] PROGRAM TOTAL : 248.6338s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5381s + [COUNTERS] CudaCpp MEs ( 2 ) : 244.0566s for 81920 events => throughput is 3.36E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0391s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.082786e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.158059e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.088481e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.143650e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 11.6414s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3337s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.2891s for 8192 events => throughput is 7.26E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0186s + [COUNTERS] PROGRAM TOTAL : 26.3376s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5632s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.7273s for 8192 events => throughput is 3.18E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0471s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 115.3496s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7927s - [COUNTERS] CudaCpp MEs ( 2 ) : 112.5383s for 81920 events => throughput is 7.28E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0186s + [COUNTERS] PROGRAM TOTAL : 261.5729s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5211s + [COUNTERS] CudaCpp MEs ( 2 ) : 257.0034s for 81920 events => throughput is 3.19E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0484s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.706905e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.487285e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.777672e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.483613e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561518129465E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 2.8854s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1263s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8776s for 8192 events => throughput is 9.33E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8815s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561518129465E-007) differ by less than 2E-4 (1.4064212017217415e-09) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713109538129E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 14.5688s + [COUNTERS] Fortran Overhead ( 0 ) : 5.0388s + [COUNTERS] CudaCpp MEs ( 2 ) : 8.6484s for 81920 events => throughput is 9.47E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8816s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713109538129E-007) differ by less than 2E-4 (2.668514298420632e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.426842e+03 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.075881e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.106694e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.154223e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.105665e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.108609e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.106895e+04 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.678273e+03 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 9b4756980..ae3de12b0 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=cppnone - -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cpp512z make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-09-15_10:26:22 +DATE: 2024-09-15_12:39:45 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.3406s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2864s - [COUNTERS] Fortran MEs ( 1 ) : 0.0542s for 8192 events => throughput is 1.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5503s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4781s + [COUNTERS] Fortran MEs ( 1 ) : 0.0722s for 8192 events => throughput is 1.14E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3097s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2555s - [COUNTERS] Fortran MEs ( 1 ) : 0.0543s for 8192 events => throughput is 1.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4980s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4263s + [COUNTERS] Fortran MEs ( 1 ) : 0.0717s for 8192 events => throughput is 1.14E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.6959s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1547s - [COUNTERS] Fortran MEs ( 1 ) : 0.5412s for 81920 events => throughput is 1.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6426s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9227s + [COUNTERS] Fortran MEs ( 1 ) : 0.7199s for 81920 events => throughput is 1.14E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737132] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3199s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2607s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0588s for 8192 events => throughput is 1.39E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.5037s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4265s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0766s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.7368s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1490s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5873s for 81920 events => throughput is 1.39E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.7326s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9466s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7853s for 81920 events => throughput is 1.04E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427598) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.405875e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.069256e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.433248e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.071686e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737170] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.2879s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2555s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0320s for 8192 events => throughput is 2.56E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4715s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4278s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427590] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.4673s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1472s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3198s for 81920 events => throughput is 2.56E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.3706s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9391s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4311s for 81920 events => throughput is 1.90E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427590) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.540167e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.920806e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.600681e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.914858e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.2729s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2546s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0180s for 8192 events => throughput is 4.56E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4519s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4266s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0249s for 8192 events => throughput is 3.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.3285s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1489s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1792s for 81920 events => throughput is 4.57E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.1855s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9345s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2505s for 81920 events => throughput is 3.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.679850e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.315064e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.716867e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.353486e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.2724s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2550s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.79E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4550s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4315s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 8192 events => throughput is 3.56E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.3189s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1487s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1699s for 81920 events => throughput is 4.82E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.1824s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9556s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2263s for 81920 events => throughput is 3.62E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.914901e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.736761e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.966388e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.761611e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.2712s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2563s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0146s for 8192 events => throughput is 5.61E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4645s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4291s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0349s for 8192 events => throughput is 2.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.2980s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1525s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1453s for 81920 events => throughput is 5.64E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.2885s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9460s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3420s for 81920 events => throughput is 2.40E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.552302e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.383703e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.645393e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.334002e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313504505737173] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.8655s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8611s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504505737173) differ by less than 3E-14 (2.220446049250313e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.3721s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3607s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0103s for 81920 events => throughput is 7.97E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.721561e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.065390e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.156601e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.991698e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.159535e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.254663e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.182535e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.650805e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index a50b6d093..c9aade28c 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-09-15_10:26:34 +DATE: 2024-09-15_12:40:18 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.3398s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2855s - [COUNTERS] Fortran MEs ( 1 ) : 0.0542s for 8192 events => throughput is 1.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5486s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4755s + [COUNTERS] Fortran MEs ( 1 ) : 0.0730s for 8192 events => throughput is 1.12E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3110s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2567s - [COUNTERS] Fortran MEs ( 1 ) : 0.0543s for 8192 events => throughput is 1.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4998s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4285s + [COUNTERS] Fortran MEs ( 1 ) : 0.0713s for 8192 events => throughput is 1.15E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.6952s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1544s - [COUNTERS] Fortran MEs ( 1 ) : 0.5408s for 81920 events => throughput is 1.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6509s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9299s + [COUNTERS] Fortran MEs ( 1 ) : 0.7210s for 81920 events => throughput is 1.14E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313506133732837] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3100s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2552s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0545s for 8192 events => throughput is 1.50E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.5035s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4289s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0741s for 8192 events => throughput is 1.11E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842907143103] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.6998s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1547s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5447s for 81920 events => throughput is 1.50E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.6726s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9382s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7339s for 81920 events => throughput is 1.12E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842907143103) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.525358e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.135662e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.535190e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.136549e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313502997679400] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.2759s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2554s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0203s for 8192 events => throughput is 4.03E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4543s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4272s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 8192 events => throughput is 3.06E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095839656505114] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.3515s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1497s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2016s for 81920 events => throughput is 4.06E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.2055s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9374s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2678s for 81920 events => throughput is 3.06E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839656505114) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.014062e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.039729e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.042770e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.985614e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.2651s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2551s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0098s for 8192 events => throughput is 8.36E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4424s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4288s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.18E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.2528s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1551s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0975s for 81920 events => throughput is 8.40E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0714s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9365s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1346s for 81920 events => throughput is 6.08E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.658437e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.180640e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.714170e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.307549e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.2661s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2567s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 8192 events => throughput is 8.83E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4399s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4274s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.2458s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1534s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0923s for 81920 events => throughput is 8.88E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0732s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9485s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1245s for 81920 events => throughput is 6.58E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.159291e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.792178e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.221232e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.753332e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313505300145301] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.2637s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2562s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0074s for 8192 events => throughput is 1.11E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4441s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4268s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0170s for 8192 events => throughput is 4.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842133012335] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.2242s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0736s for 81920 events => throughput is 1.11E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.1153s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9447s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1702s for 81920 events => throughput is 4.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842133012335) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.123575e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.847714e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.143652e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.878988e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313508590887899] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.8653s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8613s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.60E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cuda (0.20313508590887899) differ by less than 4E-4 (2.011051698502797e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095846337765808] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.3785s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3688s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 81920 events => throughput is 9.25E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cuda (0.21095846337765808) differ by less than 4E-4 (1.640293887383848e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.041241e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.251569e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.842862e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.410741e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.813185e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.550006e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.522713e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.198390e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 7deef0a07..e0b5569f2 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-09-15_10:26:46 +DATE: 2024-09-15_12:40:49 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.3399s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2857s - [COUNTERS] Fortran MEs ( 1 ) : 0.0542s for 8192 events => throughput is 1.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5511s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4774s + [COUNTERS] Fortran MEs ( 1 ) : 0.0737s for 8192 events => throughput is 1.11E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2579s - [COUNTERS] Fortran MEs ( 1 ) : 0.0542s for 8192 events => throughput is 1.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5034s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4306s + [COUNTERS] Fortran MEs ( 1 ) : 0.0728s for 8192 events => throughput is 1.13E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.6971s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1564s - [COUNTERS] Fortran MEs ( 1 ) : 0.5407s for 81920 events => throughput is 1.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6463s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9275s + [COUNTERS] Fortran MEs ( 1 ) : 0.7189s for 81920 events => throughput is 1.14E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504495344831] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3146s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2557s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0586s for 8192 events => throughput is 1.40E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.5152s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4367s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.7397s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1536s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5857s for 81920 events => throughput is 1.40E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.7101s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9336s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7758s for 81920 events => throughput is 1.06E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.420154e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.061169e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.429870e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.068944e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504495344833] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.2874s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2554s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0316s for 8192 events => throughput is 2.59E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4707s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4276s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0426s for 8192 events => throughput is 1.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.4649s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1485s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3161s for 81920 events => throughput is 2.59E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.3778s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9488s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4286s for 81920 events => throughput is 1.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.556307e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.907975e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.600180e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.932977e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.2731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2552s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0176s for 8192 events => throughput is 4.65E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4572s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4311s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0257s for 8192 events => throughput is 3.19E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.3260s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1754s for 81920 events => throughput is 4.67E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.1817s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9356s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2456s for 81920 events => throughput is 3.34E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.749492e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.338992e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.781150e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.300070e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.2721s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2551s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0167s for 8192 events => throughput is 4.89E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4505s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4278s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0223s for 8192 events => throughput is 3.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.3177s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1506s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1668s for 81920 events => throughput is 4.91E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.1685s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9487s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2194s for 81920 events => throughput is 3.73E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.030150e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.788360e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.082730e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.844368e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.2711s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2555s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.37E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4630s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4272s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0352s for 8192 events => throughput is 2.33E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.3033s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1506s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1525s for 81920 events => throughput is 5.37E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.2959s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9441s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3513s for 81920 events => throughput is 2.33E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.411093e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.305023e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.446448e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.367510e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313504512110778] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.8715s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8671s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.47E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504512110778) differ by less than 2E-4 (3.1376434783680907e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842873460982] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.3800s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3686s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0103s for 81920 events => throughput is 7.99E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842873460982) differ by less than 2E-4 (1.8802814860663375e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.726178e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.175370e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.180169e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.148445e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.160013e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.247186e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.158935e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.649343e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 41392dddd..a0e790e59 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=cppnone - -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cpp512z make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + +make USEBUILDDIR=1 BACKEND=cpp512y + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-09-15_13:15:09 +DATE: 2024-09-15_16:18:04 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.6193s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5833s - [COUNTERS] Fortran MEs ( 1 ) : 0.0361s for 8192 events => throughput is 2.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0034s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9555s + [COUNTERS] Fortran MEs ( 1 ) : 0.0479s for 8192 events => throughput is 1.71E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2949s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2588s - [COUNTERS] Fortran MEs ( 1 ) : 0.0361s for 8192 events => throughput is 2.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4837s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4346s + [COUNTERS] Fortran MEs ( 1 ) : 0.0490s for 8192 events => throughput is 1.67E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.2950s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9342s - [COUNTERS] Fortran MEs ( 1 ) : 0.3608s for 81920 events => throughput is 2.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0766s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5991s + [COUNTERS] Fortran MEs ( 1 ) : 0.4774s for 81920 events => throughput is 1.72E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755170] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2952s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2558s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0391s for 8192 events => throughput is 2.09E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4969s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4447s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0517s for 8192 events => throughput is 1.58E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.3332s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9388s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3941s for 81920 events => throughput is 2.08E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.1070s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5943s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5123s for 81920 events => throughput is 1.60E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.075357e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.652048e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.155341e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.659409e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2774s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2562s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.92E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4727s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4425s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0298s for 8192 events => throughput is 2.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.1465s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9407s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2055s for 81920 events => throughput is 3.99E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8851s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6055s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2793s for 81920 events => throughput is 2.93E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.847326e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.998650e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.995207e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.029895e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2690s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2567s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0121s for 8192 events => throughput is 6.77E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4534s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4359s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.0607s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9400s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1204s for 81920 events => throughput is 6.80E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7663s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5987s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1673s for 81920 events => throughput is 4.90E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.998221e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.936700e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.066655e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.961392e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2680s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2561s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0117s for 8192 events => throughput is 7.02E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4506s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4348s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.33E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.0565s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9405s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1158s for 81920 events => throughput is 7.07E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7471s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5932s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1536s for 81920 events => throughput is 5.33E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.285596e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.479225e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.346621e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.515474e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755179] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2663s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2559s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 8192 events => throughput is 8.09E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4603s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4365s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.49E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.0418s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9406s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1009s for 81920 events => throughput is 8.12E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8294s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5963s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2327s for 81920 events => throughput is 3.52E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.876374e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.498096e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.056657e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.645446e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0160081479755192] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.8754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8716s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081479755192) differ by less than 3E-14 (4.440892098500626e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.034 [2.0336713375865294] fbridge_mode=1 + [UNWEIGHT] Wrote 1707 events (found 1712 events) + [COUNTERS] PROGRAM TOTAL : 2.0505s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0405s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 81920 events => throughput is 8.85E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713375865294) differ by less than 3E-14 (4.440892098500626e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.883515e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.305682e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.805842e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.100377e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.801291e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.430840e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.808875e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.522017e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 6bd5f6bd4..e348b5e95 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - -make USEBUILDDIR=1 BACKEND=cppnone +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-09-15_13:15:19 +DATE: 2024-09-15_16:18:33 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.6202s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5841s - [COUNTERS] Fortran MEs ( 1 ) : 0.0362s for 8192 events => throughput is 2.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9786s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9307s + [COUNTERS] Fortran MEs ( 1 ) : 0.0479s for 8192 events => throughput is 1.71E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2929s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2568s - [COUNTERS] Fortran MEs ( 1 ) : 0.0361s for 8192 events => throughput is 2.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4748s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4272s + [COUNTERS] Fortran MEs ( 1 ) : 0.0476s for 8192 events => throughput is 1.72E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.2963s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9346s - [COUNTERS] Fortran MEs ( 1 ) : 0.3617s for 81920 events => throughput is 2.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0898s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6127s + [COUNTERS] Fortran MEs ( 1 ) : 0.4771s for 81920 events => throughput is 1.72E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160406825242951] fbridge_mode=1 [UNWEIGHT] Wrote 1653 events (found 1658 events) - [COUNTERS] PROGRAM TOTAL : 0.2907s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2563s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0342s for 8192 events => throughput is 2.40E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4982s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4507s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0471s for 8192 events => throughput is 1.74E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -140,7 +145,7 @@ OK! xsec from fortran (2.0160081479755183) and cpp (2.0160406825242951) differ b *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! -diff /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 +diff /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 7562,7575d7561 < 4 1 1E-03 0.1250010E+03 0.7546771E-02 0.1235066E+00 < 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.71320499473E+02 0.71320499473E+02 0.00000000000E+00 0. 1. diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index aa2e2fc4b..aee293444 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cpp512y - -make USEBUILDDIR=1 BACKEND=cpp512z +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cpp512y + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-09-15_13:15:22 +DATE: 2024-09-15_16:18:40 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.6167s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5805s - [COUNTERS] Fortran MEs ( 1 ) : 0.0361s for 8192 events => throughput is 2.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9822s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9342s + [COUNTERS] Fortran MEs ( 1 ) : 0.0480s for 8192 events => throughput is 1.71E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2933s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2572s - [COUNTERS] Fortran MEs ( 1 ) : 0.0361s for 8192 events => throughput is 2.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4747s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4267s + [COUNTERS] Fortran MEs ( 1 ) : 0.0480s for 8192 events => throughput is 1.71E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.2964s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9356s - [COUNTERS] Fortran MEs ( 1 ) : 0.3608s for 81920 events => throughput is 2.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0803s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6006s + [COUNTERS] Fortran MEs ( 1 ) : 0.4796s for 81920 events => throughput is 1.71E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -122,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -130,10 +135,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081964453331] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2942s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2549s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0391s for 8192 events => throughput is 2.10E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4941s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4424s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0513s for 8192 events => throughput is 1.60E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +163,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,10 +171,10 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713843200420] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.3255s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9370s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3883s for 81920 events => throughput is 2.11E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.0975s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5888s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5083s for 81920 events => throughput is 1.61E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,15 +187,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.017575e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.547214e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.024423e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.555047e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -207,7 +212,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -215,10 +220,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081964453336] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2762s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2550s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 8192 events => throughput is 3.91E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4608s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4324s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0280s for 8192 events => throughput is 2.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,7 +248,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -251,9 +256,9 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713843200425] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.1447s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9358s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2086s for 81920 events => throughput is 3.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8697s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5908s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2785s for 81920 events => throughput is 2.94E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -267,15 +272,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.761464e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.808635e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.809289e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.832759e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -292,7 +297,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -300,10 +305,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2682s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2557s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.65E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4521s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4343s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0175s for 8192 events => throughput is 4.69E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -328,7 +333,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -336,10 +341,10 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.0628s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9400s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1226s for 81920 events => throughput is 6.68E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7695s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5966s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1725s for 81920 events => throughput is 4.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -352,15 +357,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.729146e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.778867e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.809933e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.765875e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -377,7 +382,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -385,10 +390,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2685s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2563s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0119s for 8192 events => throughput is 6.87E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4553s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4389s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -413,7 +418,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -421,10 +426,10 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.0565s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9374s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1189s for 81920 events => throughput is 6.89E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7602s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6033s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1565s for 81920 events => throughput is 5.24E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -437,15 +442,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.976266e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.160486e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.058014e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.247106e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -462,7 +467,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,10 +475,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081962970020] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.2665s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2556s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.67E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4605s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4361s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0241s for 8192 events => throughput is 3.40E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -498,7 +503,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -506,10 +511,10 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713836598515] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.0492s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9417s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1073s for 81920 events => throughput is 7.63E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8377s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5959s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2414s for 81920 events => throughput is 3.39E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -522,18 +527,126 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.243526e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.114993e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.339222e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.218557e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0160081483021330] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.8691s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8652s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081483021330) differ by less than 2E-4 (1.6201062713605552e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.034 [2.0336713380111449] fbridge_mode=1 + [UNWEIGHT] Wrote 1707 events (found 1712 events) + [COUNTERS] PROGRAM TOTAL : 2.0649s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0550s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 81920 events => throughput is 8.92E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713380111449) differ by less than 2E-4 (2.0879298290310544e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.889005e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.160378e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.798951e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.069218e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.788436e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.498432e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.792905e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.500899e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 9fe2df4a7..394424817 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-09-15_13:16:31 +DATE: 2024-09-15_16:21:57 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 1.9883s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2239s - [COUNTERS] Fortran MEs ( 1 ) : 1.7644s for 8192 events => throughput is 4.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7837s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4036s + [COUNTERS] Fortran MEs ( 1 ) : 2.3801s for 8192 events => throughput is 3.44E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.9899s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2245s - [COUNTERS] Fortran MEs ( 1 ) : 1.7654s for 8192 events => throughput is 4.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7457s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3886s + [COUNTERS] Fortran MEs ( 1 ) : 2.3571s for 8192 events => throughput is 3.48E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 18.8926s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2465s - [COUNTERS] Fortran MEs ( 1 ) : 17.6461s for 81920 events => throughput is 4.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.3649s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1183s + [COUNTERS] Fortran MEs ( 1 ) : 23.2466s for 81920 events => throughput is 3.52E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.1597s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2257s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9301s for 8192 events => throughput is 4.24E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0039s + [COUNTERS] PROGRAM TOTAL : 2.8935s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3895s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4988s for 8192 events => throughput is 3.28E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 20.5559s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2399s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.3122s for 81920 events => throughput is 4.24E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0038s + [COUNTERS] PROGRAM TOTAL : 27.0952s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1181s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.9720s for 81920 events => throughput is 3.28E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898148E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.458613e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.400030e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.467785e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.433414e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728610E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.1880s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2249s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9610s for 8192 events => throughput is 8.52E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s + [COUNTERS] PROGRAM TOTAL : 1.7089s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3967s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3094s for 8192 events => throughput is 6.26E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898191E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 10.8494s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2365s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.6108s for 81920 events => throughput is 8.52E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s + [COUNTERS] PROGRAM TOTAL : 15.2155s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1129s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.0999s for 81920 events => throughput is 6.25E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898191E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.816952e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.519678e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.777616e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.485759e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6398s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2237s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4150s for 8192 events => throughput is 1.97E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.9814s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3900s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5898s for 8192 events => throughput is 1.39E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 5.3920s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2408s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1502s for 81920 events => throughput is 1.97E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 7.9019s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1015s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.7989s for 81920 events => throughput is 1.41E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.040281e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.452031e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.035560e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.449039e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6064s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2254s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3800s for 8192 events => throughput is 2.16E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s + [COUNTERS] PROGRAM TOTAL : 0.9097s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3947s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5136s for 8192 events => throughput is 1.60E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 5.0331s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2359s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.7962s for 81920 events => throughput is 2.16E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s + [COUNTERS] PROGRAM TOTAL : 7.2541s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0919s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.1608s for 81920 events => throughput is 1.59E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.229478e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.652584e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.230790e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.662622e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.4714s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2244s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2462s for 8192 events => throughput is 3.33E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 1.0779s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3969s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6792s for 8192 events => throughput is 1.21E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 3.7038s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2420s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4611s for 81920 events => throughput is 3.33E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 8.8889s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0984s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.7887s for 81920 events => throughput is 1.21E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.439534e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.222969e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.438661e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.228238e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728578E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8588s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8192s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610362728578E-007) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 2.7854s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5891s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1766s for 81920 events => throughput is 4.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.196050e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.528954e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.664076e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.233050e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.853541e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.202405e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.857146e+05 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.686465e+05 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 36706036a..e9deddff7 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-09-15_13:17:53 +DATE: 2024-09-15_16:24:19 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 1.9858s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2227s - [COUNTERS] Fortran MEs ( 1 ) : 1.7632s for 8192 events => throughput is 4.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7015s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3761s + [COUNTERS] Fortran MEs ( 1 ) : 2.3254s for 8192 events => throughput is 3.52E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.9884s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2249s - [COUNTERS] Fortran MEs ( 1 ) : 1.7635s for 8192 events => throughput is 4.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7015s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3807s + [COUNTERS] Fortran MEs ( 1 ) : 2.3208s for 8192 events => throughput is 3.53E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 18.8786s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2427s - [COUNTERS] Fortran MEs ( 1 ) : 17.6359s for 81920 events => throughput is 4.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.2760s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1081s + [COUNTERS] Fortran MEs ( 1 ) : 23.1679s for 81920 events => throughput is 3.54E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381686438954397E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.1264s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2245s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8981s for 8192 events => throughput is 4.32E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0038s + [COUNTERS] PROGRAM TOTAL : 2.8693s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3947s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4694s for 8192 events => throughput is 3.32E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542978900095690E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 20.2032s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2366s - [COUNTERS] CudaCpp MEs ( 2 ) : 18.9628s for 81920 events => throughput is 4.32E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0038s + [COUNTERS] PROGRAM TOTAL : 26.6707s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1091s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.5567s for 81920 events => throughput is 3.34E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542978900095690E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.508230e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.460698e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.512236e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.449222e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381671483253128E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7088s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2244s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4833s for 8192 events => throughput is 1.70E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 1.1130s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4027s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7087s for 8192 events => throughput is 1.16E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542962735029303E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 6.0798s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2320s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.8467s for 81920 events => throughput is 1.69E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 9.0988s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1256s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.9716s for 81920 events => throughput is 1.18E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542962735029303E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.742043e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.222480e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.753376e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.223984e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.4403s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2255s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2142s for 8192 events => throughput is 3.82E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.6864s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3911s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2945s for 8192 events => throughput is 2.78E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 3.3792s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2366s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.1420s for 81920 events => throughput is 3.82E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 5.0576s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0983s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.9584s for 81920 events => throughput is 2.77E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.894356e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.831089e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.894533e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.778084e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.4232s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2255s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1971s for 8192 events => throughput is 4.16E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.6632s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3942s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2682s for 8192 events => throughput is 3.05E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 3.2092s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2392s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9694s for 81920 events => throughput is 4.16E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 4.7861s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0881s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.6971s for 81920 events => throughput is 3.04E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.309060e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.155559e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.311057e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.172155e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381686320975603E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.3487s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2241s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1242s for 8192 events => throughput is 6.60E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.7427s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4008s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3409s for 8192 events => throughput is 2.40E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6543004237976207E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.4835s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2401s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2429s for 81920 events => throughput is 6.59E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 5.5358s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1001s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4347s for 81920 events => throughput is 2.39E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6543004237976207E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.764745e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.419635e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.819106e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.430312e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381711031958629E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8612s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8242s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.13E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0172s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381711031958629E-007) differ by less than 4E-4 (1.3179773188376487e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6543026921346333E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 2.7207s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5435s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1599s for 81920 events => throughput is 5.12E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0172s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6543026921346333E-007) differ by less than 4E-4 (1.3108781262705094e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.203327e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.444470e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.300793e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.322450e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.296148e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.323252e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.292944e+06 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.656200e+05 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 331a37633..2926bfb0a 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-09-15_13:19:02 +DATE: 2024-09-15_16:26:16 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 1.9873s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2231s - [COUNTERS] Fortran MEs ( 1 ) : 1.7642s for 8192 events => throughput is 4.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7071s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3810s + [COUNTERS] Fortran MEs ( 1 ) : 2.3260s for 8192 events => throughput is 3.52E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.9898s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2260s - [COUNTERS] Fortran MEs ( 1 ) : 1.7638s for 8192 events => throughput is 4.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6931s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3811s + [COUNTERS] Fortran MEs ( 1 ) : 2.3120s for 8192 events => throughput is 3.54E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 18.8806s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2408s - [COUNTERS] Fortran MEs ( 1 ) : 17.6398s for 81920 events => throughput is 4.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.2391s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1065s + [COUNTERS] Fortran MEs ( 1 ) : 23.1326s for 81920 events => throughput is 3.54E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381608764955655E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.1589s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2249s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9302s for 8192 events => throughput is 4.24E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0039s + [COUNTERS] PROGRAM TOTAL : 2.9243s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3965s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5226s for 8192 events => throughput is 3.25E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542925018181681E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 20.5518s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2421s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.3059s for 81920 events => throughput is 4.24E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0039s + [COUNTERS] PROGRAM TOTAL : 27.2773s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1266s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.1455s for 81920 events => throughput is 3.26E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925018181681E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.447138e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.405812e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.442236e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.414011e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381608686521600E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.1647s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2250s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9376s for 8192 events => throughput is 8.74E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s + [COUNTERS] PROGRAM TOTAL : 1.6740s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3954s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2759s for 8192 events => throughput is 6.42E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542924921991264E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 10.6160s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2386s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.3753s for 81920 events => throughput is 8.74E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s + [COUNTERS] PROGRAM TOTAL : 14.9334s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1011s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.8296s for 81920 events => throughput is 6.39E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542924921991264E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.033965e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.728250e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.993164e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.779541e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6434s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2256s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4167s for 8192 events => throughput is 1.97E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.9679s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3921s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5743s for 8192 events => throughput is 1.43E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 5.4111s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2419s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1681s for 81920 events => throughput is 1.97E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 7.8646s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1024s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.7607s for 81920 events => throughput is 1.42E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.038754e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.464043e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.034438e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.467432e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6049s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2267s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3771s for 8192 events => throughput is 2.17E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s + [COUNTERS] PROGRAM TOTAL : 0.8994s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3907s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5073s for 8192 events => throughput is 1.61E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 5.0144s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2433s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.7702s for 81920 events => throughput is 2.17E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s + [COUNTERS] PROGRAM TOTAL : 7.1742s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0971s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.0757s for 81920 events => throughput is 1.61E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.240649e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.629585e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.238634e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.667414e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.4697s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2249s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2441s for 8192 events => throughput is 3.36E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 1.0767s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3890s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6858s for 8192 events => throughput is 1.19E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 3.6782s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2377s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4397s for 81920 events => throughput is 3.36E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 8.9698s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0925s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.8755s for 81920 events => throughput is 1.19E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.431834e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.207955e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.424290e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.210184e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381610372590318E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8633s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8236s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.13E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610372590318E-007) differ by less than 2E-4 (1.2911138824733825e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6542926581386226E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 2.7183s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5211s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1774s for 81920 events => throughput is 4.62E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926581386226E-007) differ by less than 2E-4 (1.9752643964920935e-11) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.181769e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.501330e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.823476e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.163740e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.836976e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.201797e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.682707e+05 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.670380e+05 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 88c262ecb..9ce56a2c2 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-09-15_13:16:03 +DATE: 2024-09-15_16:20:35 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.4344s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4285s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7270s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7185s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.57E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2641s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2582s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4436s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4349s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.46E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.0109s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9519s - [COUNTERS] Fortran MEs ( 1 ) : 0.0589s for 81920 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6937s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6091s + [COUNTERS] Fortran MEs ( 1 ) : 0.0846s for 81920 events => throughput is 9.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2696s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2632s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 8192 events => throughput is 1.32E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4569s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4481s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 8192 events => throughput is 9.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.0201s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9576s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0624s for 81920 events => throughput is 1.31E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7096s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6246s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0847s for 81920 events => throughput is 9.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.297710e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.003482e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.317070e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.010577e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2653s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2617s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.50E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4412s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4364s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.84E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 0.9918s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9594s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0322s for 81920 events => throughput is 2.54E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6661s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6195s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 81920 events => throughput is 1.77E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.609267e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.895351e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.797169e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.970667e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2641s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2619s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.07E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4516s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4482s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 0.9765s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9570s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0193s for 81920 events => throughput is 4.24E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6424s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6128s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0292s for 81920 events => throughput is 2.80E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.735077e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.076046e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.025403e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.350533e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2636s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2615s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.28E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4486s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4454s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.92E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 0.9761s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9576s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0183s for 81920 events => throughput is 4.48E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6351s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6077s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 81920 events => throughput is 3.03E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.911825e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.308950e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.291778e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.410024e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2644s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2628s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.78E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4531s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4495s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.57E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 0.9721s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9582s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0137s for 81920 events => throughput is 5.99E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6424s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6109s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0311s for 81920 events => throughput is 2.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.610786e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.879828e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.396666e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.100504e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449452343426109] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.8776s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8739s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.62E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452343426109) differ by less than 3E-14 (3.3306690738754696e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 2.0680s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0594s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 81920 events => throughput is 1.02E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.097292e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.375155e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.411895e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.863144e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.479845e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.815670e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.473337e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.228599e+08 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 11e091766..dbd145c06 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - -make USEBUILDDIR=1 BACKEND=cppnone +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + +make USEBUILDDIR=1 BACKEND=cpp512y + +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-09-15_13:16:12 +DATE: 2024-09-15_16:21:03 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.4343s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4284s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7082s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6996s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.53E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2649s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2591s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4406s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4322s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.0106s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9521s - [COUNTERS] Fortran MEs ( 1 ) : 0.0584s for 81920 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6930s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6080s + [COUNTERS] Fortran MEs ( 1 ) : 0.0850s for 81920 events => throughput is 9.64E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449446496609361] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2675s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2614s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4545s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4459s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,9 +169,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747305007079218] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.0151s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9564s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0585s for 81920 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6930s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6098s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0829s for 81920 events => throughput is 9.88E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305007079218) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.415800e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.007261e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.511889e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.017605e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +214,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449446369440458] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2622s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2600s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4435s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4405s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 2.99E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +249,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747304961041555] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 0.9773s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9576s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 81920 events => throughput is 4.19E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6362s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6083s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0277s for 81920 events => throughput is 2.96E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747304961041555) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.500506e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.284018e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.706372e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.432203e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,9 +294,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2630s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2615s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.99E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4386s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4364s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.13E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,9 +329,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 0.9684s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9552s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 81920 events => throughput is 6.29E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6465s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6266s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0197s for 81920 events => throughput is 4.16E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.266412e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.063003e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.723195e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.292038e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,9 +374,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2614s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2599s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0013s for 8192 events => throughput is 6.27E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4510s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4489s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.40E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,9 +409,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 0.9710s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9583s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0126s for 81920 events => throughput is 6.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6259s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6077s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0179s for 81920 events => throughput is 4.57E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.549932e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.244642e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.077024e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.614839e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449447031649013] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2625s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2613s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 8.12E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4486s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4460s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.50E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747305508949557] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 0.9671s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9571s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0098s for 81920 events => throughput is 8.40E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6554s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6328s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0223s for 81920 events => throughput is 3.67E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305508949557) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.476480e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.412735e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.051290e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.638041e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449447352014630] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.8784s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8748s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.68E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cuda (0.30449447352014630) differ by less than 4E-4 (1.639245078566276e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747305761315818] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 2.0625s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0542s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 81920 events => throughput is 1.05E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30747310722207288) and cuda (0.30747305761315818) differ by less than 4E-4 (1.6134391445099538e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.154441e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.353273e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.331367e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.058204e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.463227e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.109516e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.155108e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.640171e+08 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 0a0b7aaff..2e41ca2cb 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-09-15_13:16:21 +DATE: 2024-09-15_16:21:30 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.4346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4287s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7195s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7107s + [COUNTERS] Fortran MEs ( 1 ) : 0.0088s for 8192 events => throughput is 9.32E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2650s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2592s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4498s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4410s + [COUNTERS] Fortran MEs ( 1 ) : 0.0088s for 8192 events => throughput is 9.32E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.0087s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9503s - [COUNTERS] Fortran MEs ( 1 ) : 0.0585s for 81920 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7106s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6251s + [COUNTERS] Fortran MEs ( 1 ) : 0.0855s for 81920 events => throughput is 9.58E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2681s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2615s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4646s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4554s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0088s for 8192 events => throughput is 9.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.0207s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9569s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0635s for 81920 events => throughput is 1.29E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7066s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6200s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0862s for 81920 events => throughput is 9.50E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.302363e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.753908e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.314011e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.000196e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2653s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2618s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.51E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4546s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4497s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 8192 events => throughput is 1.77E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 0.9906s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 81920 events => throughput is 2.57E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6465s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6016s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0447s for 81920 events => throughput is 1.83E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.676881e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.842568e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.789784e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.939712e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2639s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2617s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.32E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4475s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4441s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.71E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 0.9760s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9574s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0184s for 81920 events => throughput is 4.45E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6359s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6071s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0284s for 81920 events => throughput is 2.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.952018e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.319543e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.335025e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.469317e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2632s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2611s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.45E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4549s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4518s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 2.99E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 0.9755s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9576s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0176s for 81920 events => throughput is 4.64E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6340s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6069s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 81920 events => throughput is 3.06E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.124183e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.335525e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.539297e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.608974e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.2631s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2615s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 6.05E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4504s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4468s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.52E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 0.9730s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9595s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 81920 events => throughput is 6.17E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6561s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6249s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0307s for 81920 events => throughput is 2.67E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.996477e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.036868e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.682677e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.195892e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449452360186230] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.8785s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8749s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452360186230) differ by less than 2E-4 (5.504239286580059e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747310720557364] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 2.0629s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0544s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 81920 events => throughput is 1.02E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310720557364) differ by less than 2E-4 (5.366074251611508e-11) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.079478e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.384918e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.482107e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.823345e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.501214e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.930940e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.488831e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.231647e+08 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 7a2d1c41d..62e2554b8 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cppnone - -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cpp512z +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + +make USEBUILDDIR=1 BACKEND=cpp512y + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-15_13:15:33 +DATE: 2024-09-15_16:19:09 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.5400s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5088s - [COUNTERS] Fortran MEs ( 1 ) : 0.0313s for 8192 events => throughput is 2.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8824s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8399s + [COUNTERS] Fortran MEs ( 1 ) : 0.0425s for 8192 events => throughput is 1.93E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2990s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2678s - [COUNTERS] Fortran MEs ( 1 ) : 0.0313s for 8192 events => throughput is 2.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4660s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4237s + [COUNTERS] Fortran MEs ( 1 ) : 0.0423s for 8192 events => throughput is 1.93E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.2528s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9404s - [COUNTERS] Fortran MEs ( 1 ) : 0.3124s for 81920 events => throughput is 2.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9963s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5726s + [COUNTERS] Fortran MEs ( 1 ) : 0.4237s for 81920 events => throughput is 1.93E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846964] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2962s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2616s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0343s for 8192 events => throughput is 2.39E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4815s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4345s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0465s for 8192 events => throughput is 1.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444664] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.2903s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9448s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3452s for 81920 events => throughput is 2.37E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0315s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5822s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4490s for 81920 events => throughput is 1.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444664) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.380878e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.850829e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.441518e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.847409e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2808s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2618s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0188s for 8192 events => throughput is 4.35E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4690s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4433s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 8192 events => throughput is 3.23E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.1319s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9457s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1860s for 81920 events => throughput is 4.41E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8264s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5731s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2530s for 81920 events => throughput is 3.24E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.373620e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.295503e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.531095e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.383060e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2713s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2598s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0113s for 8192 events => throughput is 7.25E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4583s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4424s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0156s for 8192 events => throughput is 5.25E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.0591s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9464s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1125s for 81920 events => throughput is 7.28E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7403s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5823s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1576s for 81920 events => throughput is 5.20E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.517915e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.346054e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.584313e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.315504e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2718s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2606s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0110s for 8192 events => throughput is 7.42E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4533s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4383s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0146s for 8192 events => throughput is 5.60E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.0611s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9512s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1097s for 81920 events => throughput is 7.47E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7255s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5809s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1443s for 81920 events => throughput is 5.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.756295e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.745425e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.836044e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.928976e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2709s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2608s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0099s for 8192 events => throughput is 8.29E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4775s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4535s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0236s for 8192 events => throughput is 3.47E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.0432s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9435s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0995s for 81920 events => throughput is 8.24E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8170s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5877s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2290s for 81920 events => throughput is 3.58E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.235475e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.544976e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.564838e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.602571e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.8820s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8781s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.641911695846957) and cuda (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 2.0328s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0229s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 9.01E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cuda (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.708123e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.136288e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.859628e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.597476e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.838367e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.970647e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.822341e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.671790e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 71cd41a7d..5ea9a274c 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cppnone +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-15_13:15:43 +DATE: 2024-09-15_16:19:38 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.5395s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5082s - [COUNTERS] Fortran MEs ( 1 ) : 0.0313s for 8192 events => throughput is 2.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8639s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8212s + [COUNTERS] Fortran MEs ( 1 ) : 0.0426s for 8192 events => throughput is 1.92E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2889s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2577s - [COUNTERS] Fortran MEs ( 1 ) : 0.0313s for 8192 events => throughput is 2.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4685s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4259s + [COUNTERS] Fortran MEs ( 1 ) : 0.0425s for 8192 events => throughput is 1.93E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.2539s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9416s - [COUNTERS] Fortran MEs ( 1 ) : 0.3123s for 81920 events => throughput is 2.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9998s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5755s + [COUNTERS] Fortran MEs ( 1 ) : 0.4243s for 81920 events => throughput is 1.93E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641906072918047] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2909s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2603s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0304s for 8192 events => throughput is 2.69E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4862s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4438s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0421s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473258789404959] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.2494s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9457s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3036s for 81920 events => throughput is 2.70E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0026s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5791s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4232s for 81920 events => throughput is 1.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473258789404959) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.673944e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.962000e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.745842e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.971836e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +214,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641902189470080] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2724s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2593s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4548s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4371s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0174s for 8192 events => throughput is 4.71E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +249,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473255074265531] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.0760s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9452s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1307s for 81920 events => throughput is 6.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7464s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5743s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1719s for 81920 events => throughput is 4.77E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473255074265531) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.288994e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.761360e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.469921e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.758225e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,9 +294,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2668s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2598s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 8192 events => throughput is 1.21E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4469s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 8192 events => throughput is 8.88E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,9 +329,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.0137s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9464s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0672s for 81920 events => throughput is 1.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6633s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5720s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0911s for 81920 events => throughput is 8.99E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.269021e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.095324e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.281030e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.260486e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,9 +374,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2661s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2594s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4530s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4440s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.41E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,9 +409,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.0110s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9453s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0656s for 81920 events => throughput is 1.25E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6712s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5843s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0866s for 81920 events => throughput is 9.46E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.304545e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.845166e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.319188e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.869743e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641906399820272] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2659s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2602s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.49E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4622s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4498s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0121s for 8192 events => throughput is 6.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,9 +489,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473258854390501] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.0000s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9449s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0549s for 81920 events => throughput is 1.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7035s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5822s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1211s for 81920 events => throughput is 6.77E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473258854390501) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.532921e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.814469e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.542761e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.887986e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641910992291372] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.8659s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8622s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.69E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.641911695846957) and cuda (44.641910992291372) differ by less than 4E-4 (1.575997887748315e-08) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473262664842089] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 2.0160s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0074s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 81920 events => throughput is 1.02E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cuda (44.473262664842089) differ by less than 4E-4 (4.334295222729878e-08) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.873489e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.285658e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.913803e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.363404e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.939202e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.391621e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.636477e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.005101e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index adf2726ef..a80fb58d5 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-15_13:15:53 +DATE: 2024-09-15_16:20:06 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.5393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5080s - [COUNTERS] Fortran MEs ( 1 ) : 0.0313s for 8192 events => throughput is 2.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8574s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8153s + [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2872s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2559s - [COUNTERS] Fortran MEs ( 1 ) : 0.0313s for 8192 events => throughput is 2.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4665s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4245s + [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.2503s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9379s - [COUNTERS] Fortran MEs ( 1 ) : 0.3125s for 81920 events => throughput is 2.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0185s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5937s + [COUNTERS] Fortran MEs ( 1 ) : 0.4248s for 81920 events => throughput is 1.93E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,10 +134,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2938s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0350s for 8192 events => throughput is 2.34E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4381s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0458s for 8192 events => throughput is 1.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -156,7 +161,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -164,10 +169,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.2921s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9423s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3496s for 81920 events => throughput is 2.34E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0381s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5844s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4534s for 81920 events => throughput is 1.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -178,14 +183,14 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.415983e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.855837e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.412532e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.855798e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,10 +214,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2784s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2595s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0187s for 8192 events => throughput is 4.39E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4621s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4365s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0252s for 8192 events => throughput is 3.25E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +241,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,10 +249,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.1318s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9452s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1864s for 81920 events => throughput is 4.39E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8351s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5852s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2495s for 81920 events => throughput is 3.28E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,14 +263,14 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.333531e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.330553e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.467628e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.338265e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2717s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2604s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 8192 events => throughput is 7.35E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4575s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4416s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0156s for 8192 events => throughput is 5.26E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +321,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,10 +329,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.0581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9467s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1112s for 81920 events => throughput is 7.36E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7359s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1540s for 81920 events => throughput is 5.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -338,14 +343,14 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.595426e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.295263e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.689413e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.300898e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,10 +374,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2708s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2597s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0109s for 8192 events => throughput is 7.54E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4549s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4403s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -396,7 +401,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -404,10 +409,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.0557s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9475s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1080s for 81920 events => throughput is 7.59E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7287s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5853s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1430s for 81920 events => throughput is 5.73E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -418,14 +423,14 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.830329e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.880539e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.926762e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.012183e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,10 +454,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2695s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2596s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0097s for 8192 events => throughput is 8.47E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4599s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4371s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0224s for 8192 events => throughput is 3.66E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -476,7 +481,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -484,10 +489,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.0415s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9448s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0964s for 81920 events => throughput is 8.50E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8085s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2211s for 81920 events => throughput is 3.71E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -498,17 +503,125 @@ OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.739420e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.675547e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.892254e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.724298e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641911674225568] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.8739s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8700s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.641911695846957) and cuda (44.641911674225568) differ by less than 2E-4 (4.843293543999039e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473264587763374] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 2.0197s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0099s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 9.03E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cuda (44.473264587763374) differ by less than 2E-4 (1.0526091109852587e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.935715e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.223055e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.837385e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.518211e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.850328e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.872009e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.860604e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.713800e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index a141f1256..1292ed24b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_09:41:19 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:08:03 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.330379e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.527996e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.788543e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.812638 sec +INFO: No Floating Point Exceptions have been reported + 2,711,766,628 cycles # 2.867 GHz + 4,239,903,132 instructions # 1.56 insn per cycle + 1.138564764 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.371707e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.576793e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.576793e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.204274 sec -INFO: No Floating Point Exceptions have been reported - 18,215,562,202 cycles # 3.499 GHz - 46,199,987,570 instructions # 2.54 insn per cycle - 5.207081237 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.032481e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.205909e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.205909e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.541164 sec +INFO: No Floating Point Exceptions have been reported + 19,214,248,144 cycles # 2.933 GHz + 46,179,436,349 instructions # 2.40 insn per cycle + 6.552095575 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.197351e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.817550e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.817550e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.470479 sec -INFO: No Floating Point Exceptions have been reported - 12,150,819,694 cycles # 3.499 GHz - 31,609,060,137 instructions # 2.60 insn per cycle - 3.473422786 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.566602e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.052859e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.052859e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.470271 sec +INFO: No Floating Point Exceptions have been reported + 13,145,357,361 cycles # 2.934 GHz + 31,720,883,797 instructions # 2.41 insn per cycle + 4.481479023 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.649260e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.559584e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.559584e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.950152 sec -INFO: No Floating Point Exceptions have been reported - 9,977,976,212 cycles # 3.379 GHz - 19,490,400,043 instructions # 1.95 insn per cycle - 2.953010911 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.961947e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.743984e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.743984e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.647893 sec +INFO: No Floating Point Exceptions have been reported + 10,212,054,728 cycles # 2.792 GHz + 19,686,910,587 instructions # 1.93 insn per cycle + 3.658422867 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.762428e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.763089e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.763089e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.866460 sec -INFO: No Floating Point Exceptions have been reported - 9,699,578,571 cycles # 3.381 GHz - 18,952,306,246 instructions # 1.95 insn per cycle - 2.869364942 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.012892e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.837375e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.837375e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.568887 sec +INFO: No Floating Point Exceptions have been reported + 10,042,390,879 cycles # 2.806 GHz + 19,342,891,969 instructions # 1.93 insn per cycle + 3.579550757 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.516010e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.271150e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.271150e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.384411 sec -INFO: No Floating Point Exceptions have been reported - 8,106,156,122 cycles # 3.396 GHz - 15,415,430,610 instructions # 1.90 insn per cycle - 2.387299574 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.687611e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.233598e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.233598e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.179012 sec +INFO: No Floating Point Exceptions have been reported + 8,766,087,418 cycles # 2.093 GHz + 15,826,503,490 instructions # 1.81 insn per cycle + 4.190350116 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 9d9c23d11..656f6e2f9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,48 +1,113 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_10:00:23 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:45:05 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.206256e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.682542e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.682542e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.457281 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 7,617,652,543 cycles # 2.830 GHz + 12,995,599,451 instructions # 1.71 insn per cycle + 2.778749807 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.348136e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.542679e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.542679e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.366864 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.578808e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.114252e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.114252e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 7.297311 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 18,787,551,677 cycles # 3.499 GHz - 46,447,894,705 instructions # 2.47 insn per cycle - 5.370415603 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) + 20,775,260,342 cycles # 2.853 GHz + 46,581,102,942 instructions # 2.24 insn per cycle + 7.320240357 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -50,33 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.081037e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.634663e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.634663e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.793926 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.428539e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.841895e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.841895e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 5.145544 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 13,279,063,909 cycles # 3.497 GHz - 32,431,245,077 instructions # 2.44 insn per cycle - 3.798027455 seconds time elapsed + 14,656,531,148 cycles # 2.850 GHz + 32,719,868,481 instructions # 2.23 insn per cycle + 5.168221991 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -84,33 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.494167e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.289434e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.289434e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.264788 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.782197e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.438081e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.438081e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.265326 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 11,076,320,539 cycles # 3.389 GHz - 20,830,702,186 instructions # 1.88 insn per cycle - 3.269093260 seconds time elapsed + 11,667,988,214 cycles # 2.728 GHz + 21,208,810,330 instructions # 1.82 insn per cycle + 4.287433901 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -118,33 +183,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.593003e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.471851e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.471851e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.164450 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.824257e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.501728e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.501728e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.177936 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 10,745,998,429 cycles # 3.392 GHz - 20,293,306,176 instructions # 1.89 insn per cycle - 3.167946509 seconds time elapsed + 11,439,960,592 cycles # 2.728 GHz + 20,869,154,642 instructions # 1.82 insn per cycle + 4.198165961 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -152,33 +217,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.238315e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.669091e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.669091e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.696586 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.555883e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.022583e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.022583e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.778023 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 9,204,050,047 cycles # 3.409 GHz - 16,541,143,990 instructions # 1.80 insn per cycle - 2.700156871 seconds time elapsed + 10,270,194,102 cycles # 2.143 GHz + 17,125,695,085 instructions # 1.67 insn per cycle + 4.797944534 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -186,8 +251,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 671a673e4..c883b5b3b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_10:05:19 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:57:26 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.026987e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.683583e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.857936e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.462390 sec +INFO: No Floating Point Exceptions have been reported + 4,897,119,539 cycles # 2.892 GHz + 7,502,819,293 instructions # 1.53 insn per cycle + 1.751403177 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.373400e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.575479e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.575479e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.029304e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.201451e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.201451e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.195888 sec +TOTAL : 7.039422 sec INFO: No Floating Point Exceptions have been reported - 18,185,706,552 cycles # 3.499 GHz - 46,200,456,028 instructions # 2.54 insn per cycle - 5.198467473 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) + 20,621,489,655 cycles # 2.926 GHz + 46,653,049,885 instructions # 2.26 insn per cycle + 7.049549267 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.195424e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.814656e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.814656e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.563131e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.046448e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.046448e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.475797 sec +TOTAL : 4.950021 sec INFO: No Floating Point Exceptions have been reported - 12,168,911,570 cycles # 3.499 GHz - 31,609,104,906 instructions # 2.60 insn per cycle - 3.478491025 seconds time elapsed + 14,503,935,115 cycles # 2.925 GHz + 32,091,166,775 instructions # 2.21 insn per cycle + 4.960008276 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.649433e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.560014e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.560014e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.961733e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.745410e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.745410e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.949164 sec +TOTAL : 4.125348 sec INFO: No Floating Point Exceptions have been reported - 9,972,259,572 cycles # 3.379 GHz - 19,490,713,386 instructions # 1.95 insn per cycle - 2.951776441 seconds time elapsed + 11,625,768,108 cycles # 2.812 GHz + 19,969,403,537 instructions # 1.72 insn per cycle + 4.135325887 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.763676e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.760841e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.760841e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.013648e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.832922e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.832922e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.861385 sec +TOTAL : 4.042328 sec INFO: No Floating Point Exceptions have been reported - 9,686,062,110 cycles # 3.382 GHz - 18,941,409,972 instructions # 1.96 insn per cycle - 2.864022207 seconds time elapsed + 11,410,026,502 cycles # 2.817 GHz + 19,423,165,232 instructions # 1.70 insn per cycle + 4.052144921 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.511948e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.252768e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.252768e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.712486e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.267405e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.267405e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.377933 sec +TOTAL : 4.597004 sec INFO: No Floating Point Exceptions have been reported - 8,086,254,658 cycles # 3.397 GHz - 15,415,427,547 instructions # 1.91 insn per cycle - 2.380573049 seconds time elapsed + 10,166,885,638 cycles # 2.209 GHz + 15,890,691,650 instructions # 1.56 insn per cycle + 4.606740948 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 33ead1e19..be71099ca 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-01_23:50:58 +DATE: 2024-09-15_11:54:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.500094e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.517548e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.743547e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.198136e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.691819e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.863296e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.980790 sec +TOTAL : 0.975349 sec INFO: No Floating Point Exceptions have been reported - 3,560,791,476 cycles # 2.941 GHz - 7,160,309,799 instructions # 2.01 insn per cycle - 1.268112972 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst + 3,491,422,658 cycles # 2.884 GHz + 6,978,748,855 instructions # 2.00 insn per cycle + 1.269405520 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.057662e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.232825e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.232825e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.025076e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.196021e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.196021e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.351364 sec +TOTAL : 6.587511 sec INFO: No Floating Point Exceptions have been reported - 19,028,703,027 cycles # 2.994 GHz - 46,088,094,034 instructions # 2.42 insn per cycle - 6.356654129 seconds time elapsed + 19,326,799,797 cycles # 2.930 GHz + 46,201,035,622 instructions # 2.39 insn per cycle + 6.597179522 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.610727e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.099544e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.099544e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.561064e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.042427e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.042427e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.309172 sec +TOTAL : 4.487009 sec INFO: No Floating Point Exceptions have been reported - 12,915,279,178 cycles # 2.994 GHz - 31,622,192,060 instructions # 2.45 insn per cycle - 4.314454195 seconds time elapsed + 13,122,812,956 cycles # 2.919 GHz + 31,726,329,549 instructions # 2.42 insn per cycle + 4.497261752 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.964702e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.728204e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.728204e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.964017e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.742650e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.742650e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.611197 sec +TOTAL : 3.652673 sec INFO: No Floating Point Exceptions have been reported - 10,072,475,407 cycles # 2.786 GHz - 19,603,807,541 instructions # 1.95 insn per cycle - 3.616483385 seconds time elapsed + 10,238,914,237 cycles # 2.798 GHz + 19,709,087,829 instructions # 1.92 insn per cycle + 3.662709498 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.073817e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.916083e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.916083e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.987318e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.812393e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.812393e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.432142 sec +TOTAL : 3.613935 sec INFO: No Floating Point Exceptions have been reported - 9,841,961,259 cycles # 2.864 GHz - 19,266,632,762 instructions # 1.96 insn per cycle - 3.437636381 seconds time elapsed + 10,087,966,707 cycles # 2.786 GHz + 19,370,511,659 instructions # 1.92 insn per cycle + 3.624175302 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.789510e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.380802e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.380802e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.729073e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.292942e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.292942e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.918568 sec +TOTAL : 4.086984 sec INFO: No Floating Point Exceptions have been reported - 8,610,059,548 cycles # 2.195 GHz - 15,735,305,253 instructions # 1.83 insn per cycle - 3.924031077 seconds time elapsed + 8,757,365,492 cycles # 2.138 GHz + 15,836,859,575 instructions # 1.81 insn per cycle + 4.097082998 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index d1d5094eb..1574c6c3c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,46 +1,104 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_10:03:51 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:51:49 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.996142e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.634832e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.796610e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.900321 sec +INFO: No Floating Point Exceptions have been reported + 6,197,801,426 cycles # 2.910 GHz + 11,411,789,503 instructions # 1.84 insn per cycle + 2.187318710 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.393333e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.601446e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.601446e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.130354 sec -INFO: No Floating Point Exceptions have been reported - 17,957,918,408 cycles # 3.499 GHz - 46,199,808,889 instructions # 2.57 insn per cycle - 5.133046363 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.025883e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.199963e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.199963e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.587430 sec +INFO: No Floating Point Exceptions have been reported + 19,281,256,149 cycles # 2.928 GHz + 46,192,094,635 instructions # 2.40 insn per cycle + 6.597986195 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.190684e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.807600e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.807600e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.482551 sec -INFO: No Floating Point Exceptions have been reported - 12,189,819,446 cycles # 3.498 GHz - 31,609,313,793 instructions # 2.59 insn per cycle - 3.485227668 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.548679e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.019540e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019540e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.521143 sec +INFO: No Floating Point Exceptions have been reported + 13,278,378,525 cycles # 2.931 GHz + 31,736,760,460 instructions # 2.39 insn per cycle + 4.531295049 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.650686e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.556675e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.556675e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.946291 sec -INFO: No Floating Point Exceptions have been reported - 9,962,849,375 cycles # 3.379 GHz - 19,490,420,245 instructions # 1.96 insn per cycle - 2.948882900 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.952371e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.722642e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.722642e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.672885 sec +INFO: No Floating Point Exceptions have been reported + 10,228,932,843 cycles # 2.778 GHz + 19,706,958,837 instructions # 1.93 insn per cycle + 3.682647007 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +170,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.757765e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.762663e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.762663e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.874720 sec -INFO: No Floating Point Exceptions have been reported - 9,727,997,634 cycles # 3.381 GHz - 18,952,289,125 instructions # 1.95 insn per cycle - 2.877321520 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.003393e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.813864e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.813864e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.587790 sec +INFO: No Floating Point Exceptions have been reported + 10,033,694,863 cycles # 2.790 GHz + 19,370,562,804 instructions # 1.93 insn per cycle + 3.597832664 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +202,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.513889e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.281259e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.281259e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.379322 sec -INFO: No Floating Point Exceptions have been reported - 8,090,440,999 cycles # 3.397 GHz - 15,415,659,785 instructions # 1.91 insn per cycle - 2.381954871 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.709775e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.272417e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.272417e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.131241 sec +INFO: No Floating Point Exceptions have been reported + 8,787,275,470 cycles # 2.123 GHz + 15,836,849,319 instructions # 1.80 insn per cycle + 4.141346630 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +234,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 02d193c2e..3b02782d4 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_09:41:37 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:08:35 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.564401e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.700588e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.875805e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.684524 sec +INFO: No Floating Point Exceptions have been reported + 2,669,304,279 cycles # 2.860 GHz + 4,081,785,887 instructions # 1.53 insn per cycle + 0.991787338 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.369826e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.571666e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.571666e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.209413 sec -INFO: No Floating Point Exceptions have been reported - 18,236,578,223 cycles # 3.499 GHz - 46,161,391,936 instructions # 2.53 insn per cycle - 5.212237683 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.030249e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.204005e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204005e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.555775 sec +INFO: No Floating Point Exceptions have been reported + 19,264,579,235 cycles # 2.934 GHz + 46,142,725,089 instructions # 2.40 insn per cycle + 6.567398103 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.211145e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.843707e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.843707e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.451760 sec -INFO: No Floating Point Exceptions have been reported - 12,081,796,496 cycles # 3.498 GHz - 31,582,837,123 instructions # 2.61 insn per cycle - 3.454572570 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.561230e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.041001e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.041001e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.486484 sec +INFO: No Floating Point Exceptions have been reported + 13,180,735,522 cycles # 2.931 GHz + 31,698,753,932 instructions # 2.40 insn per cycle + 4.497601224 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1649) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.641922e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.553612e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.553612e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.963547 sec -INFO: No Floating Point Exceptions have been reported - 10,017,592,760 cycles # 3.377 GHz - 19,470,985,067 instructions # 1.94 insn per cycle - 2.966484857 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.959717e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.742175e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.742175e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.661759 sec +INFO: No Floating Point Exceptions have been reported + 10,296,251,645 cycles # 2.804 GHz + 19,686,624,933 instructions # 1.91 insn per cycle + 3.673145773 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1895) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.759408e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.761689e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.761689e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.867725 sec -INFO: No Floating Point Exceptions have been reported - 9,705,911,202 cycles # 3.382 GHz - 18,967,030,089 instructions # 1.95 insn per cycle - 2.870545716 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.002735e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.826033e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.826033e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.593177 sec +INFO: No Floating Point Exceptions have been reported + 10,082,197,083 cycles # 2.798 GHz + 19,384,360,663 instructions # 1.92 insn per cycle + 3.604587412 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1636) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.546143e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.327979e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.327979e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.362590 sec -INFO: No Floating Point Exceptions have been reported - 8,029,597,321 cycles # 3.395 GHz - 15,285,748,356 instructions # 1.90 insn per cycle - 2.365517830 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.753760e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.344313e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.344313e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.042847 sec +INFO: No Floating Point Exceptions have been reported + 8,657,274,459 cycles # 2.136 GHz + 15,708,080,882 instructions # 1.81 insn per cycle + 4.054289402 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 834) (512y: 156) (512z: 1237) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index e24c5e9c3..9adc226af 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_09:54:27 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:33:55 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.203471e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.505439e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.793875e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.692895 sec +INFO: No Floating Point Exceptions have been reported + 2,667,104,303 cycles # 2.870 GHz + 4,197,568,068 instructions # 1.57 insn per cycle + 0.991023921 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.158880e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.704828e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.704828e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.528801 sec -INFO: No Floating Point Exceptions have been reported - 12,352,680,894 cycles # 3.498 GHz - 32,659,629,837 instructions # 2.64 insn per cycle - 3.531526438 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.609773e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.065122e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.065122e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.323744 sec +INFO: No Floating Point Exceptions have been reported + 12,680,899,676 cycles # 2.930 GHz + 32,573,373,461 instructions # 2.57 insn per cycle + 4.329822925 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 281) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.873378e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.030687e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.030687e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.793698 sec -INFO: No Floating Point Exceptions have been reported - 9,781,165,302 cycles # 3.498 GHz - 24,647,922,556 instructions # 2.52 insn per cycle - 2.796450977 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.020941e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.881765e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.881765e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.528541 sec +INFO: No Floating Point Exceptions have been reported + 10,343,960,768 cycles # 2.928 GHz + 24,660,363,232 instructions # 2.38 insn per cycle + 3.534351751 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.993178e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.210358e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.210358e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.675935 sec -INFO: No Floating Point Exceptions have been reported - 9,066,942,153 cycles # 3.385 GHz - 16,816,785,033 instructions # 1.85 insn per cycle - 2.678491592 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1614) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.219408e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.252697e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.252697e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.248651 sec +INFO: No Floating Point Exceptions have been reported + 9,122,079,188 cycles # 2.804 GHz + 16,949,443,243 instructions # 1.86 insn per cycle + 3.254977824 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1616) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.297139e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.834117e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.834117e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.495870 sec -INFO: No Floating Point Exceptions have been reported - 8,734,985,783 cycles # 3.496 GHz - 16,044,259,969 instructions # 1.84 insn per cycle - 2.498871546 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.281631e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.372235e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.372235e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.167737 sec +INFO: No Floating Point Exceptions have been reported + 8,922,630,281 cycles # 2.812 GHz + 16,368,012,425 instructions # 1.83 insn per cycle + 3.174211351 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1352) (512y: 139) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.708072e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.704812e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.704812e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.285428 sec -INFO: No Floating Point Exceptions have been reported - 7,777,012,165 cycles # 3.399 GHz - 14,262,132,406 instructions # 1.83 insn per cycle - 2.288386307 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.953386e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.685128e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.685128e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.635921 sec +INFO: No Floating Point Exceptions have been reported + 7,907,839,436 cycles # 2.172 GHz + 14,593,864,068 instructions # 1.85 insn per cycle + 3.642895717 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1003) (512y: 158) (512z: 955) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index ee63fcf1e..a111e191c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_09:54:42 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:34:21 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.369824e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.600320e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.803756e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.688866 sec +INFO: No Floating Point Exceptions have been reported + 2,687,883,365 cycles # 2.879 GHz + 4,137,672,828 instructions # 1.54 insn per cycle + 0.991982760 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.978299e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.127356e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.127356e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.729610 sec -INFO: No Floating Point Exceptions have been reported - 9,549,079,075 cycles # 3.497 GHz - 25,595,217,248 instructions # 2.68 insn per cycle - 2.732225799 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.085552e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.924035e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.924035e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.430840 sec +INFO: No Floating Point Exceptions have been reported + 10,018,256,596 cycles # 2.916 GHz + 25,507,694,274 instructions # 2.55 insn per cycle + 3.436494229 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 236) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.502945e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.421522e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.421522e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.400035 sec -INFO: No Floating Point Exceptions have been reported - 8,402,694,834 cycles # 3.498 GHz - 21,455,745,451 instructions # 2.55 insn per cycle - 2.402782234 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.371301e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.639987e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.639987e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.073954 sec +INFO: No Floating Point Exceptions have been reported + 9,025,495,783 cycles # 2.931 GHz + 21,478,170,721 instructions # 2.38 insn per cycle + 3.080490687 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1100) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.366238e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.988258e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.988258e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.443390 sec -INFO: No Floating Point Exceptions have been reported - 8,554,759,654 cycles # 3.498 GHz - 15,782,102,704 instructions # 1.84 insn per cycle - 2.446098201 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.348565e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.529213e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.529213e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.091062 sec +INFO: No Floating Point Exceptions have been reported + 8,721,037,733 cycles # 2.816 GHz + 15,901,191,500 instructions # 1.82 insn per cycle + 3.097416237 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1489) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.584374e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.475687e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.475687e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.342649 sec -INFO: No Floating Point Exceptions have been reported - 8,200,971,563 cycles # 3.497 GHz - 15,302,173,377 instructions # 1.87 insn per cycle - 2.345623202 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.428348e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.696607e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.696607e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.003427 sec +INFO: No Floating Point Exceptions have been reported + 8,472,649,935 cycles # 2.816 GHz + 15,622,192,614 instructions # 1.84 insn per cycle + 3.009695803 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1264) (512y: 141) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.860548e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.067560e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.067560e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.215960 sec -INFO: No Floating Point Exceptions have been reported - 7,546,957,343 cycles # 3.402 GHz - 13,972,929,248 instructions # 1.85 insn per cycle - 2.218509052 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.053185e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.879921e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.879921e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.481285 sec +INFO: No Floating Point Exceptions have been reported + 7,632,139,448 cycles # 2.189 GHz + 14,304,829,590 instructions # 1.87 insn per cycle + 3.488200715 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1032) (512y: 164) (512z: 877) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 8485d81a4..bc5233a5b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_09:41:55 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:09:06 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.192132e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.336696e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.300693e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.585451 sec +INFO: No Floating Point Exceptions have been reported + 2,336,870,734 cycles # 2.880 GHz + 3,644,936,097 instructions # 1.56 insn per cycle + 0.870296260 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.529171e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.820324e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.820324e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.712952 sec -INFO: No Floating Point Exceptions have been reported - 16,485,220,944 cycles # 3.496 GHz - 45,120,207,116 instructions # 2.74 insn per cycle - 4.715622382 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 415) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.072651e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.269381e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269381e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.255061 sec +INFO: No Floating Point Exceptions have been reported + 18,355,246,050 cycles # 2.931 GHz + 45,043,077,667 instructions # 2.45 insn per cycle + 6.263286658 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.097480e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.693610e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.693610e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.620028 sec -INFO: No Floating Point Exceptions have been reported - 9,174,027,170 cycles # 3.498 GHz - 22,340,923,303 instructions # 2.44 insn per cycle - 2.622709100 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1960) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.241713e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.430745e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.430745e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.200925 sec +INFO: No Floating Point Exceptions have been reported + 9,386,491,422 cycles # 2.926 GHz + 22,329,398,339 instructions # 2.38 insn per cycle + 3.208910381 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.369000e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.963087e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.963087e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.426987 sec -INFO: No Floating Point Exceptions have been reported - 8,231,967,150 cycles # 3.389 GHz - 15,657,340,491 instructions # 1.90 insn per cycle - 2.429619388 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.404117e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.697492e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697492e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.006682 sec +INFO: No Floating Point Exceptions have been reported + 8,484,958,572 cycles # 2.815 GHz + 15,797,352,563 instructions # 1.86 insn per cycle + 3.014624816 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.422729e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.075478e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.075478e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.408657 sec -INFO: No Floating Point Exceptions have been reported - 8,176,347,784 cycles # 3.391 GHz - 15,310,652,466 instructions # 1.87 insn per cycle - 2.411285336 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.426806e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765840e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765840e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.982618 sec +INFO: No Floating Point Exceptions have been reported + 8,401,165,701 cycles # 2.811 GHz + 15,653,777,374 instructions # 1.86 insn per cycle + 2.990373231 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.519738e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.079037e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.079037e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 1.963725 sec -INFO: No Floating Point Exceptions have been reported - 6,708,366,667 cycles # 3.412 GHz - 12,566,215,783 instructions # 1.87 insn per cycle - 1.966351531 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1699) (512y: 5) (512z: 1440) +EvtsPerSec[Rmb+ME] (23) = ( 2.426463e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.722731e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.722731e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.987827 sec +INFO: No Floating Point Exceptions have been reported + 6,753,744,387 cycles # 2.255 GHz + 12,906,211,238 instructions # 1.91 insn per cycle + 2.995926915 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index f5c7fefa8..95b868152 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,48 +1,113 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_10:00:43 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:45:42 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.076713e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.378449e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.378449e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.944337 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,200,765,179 cycles # 2.831 GHz + 10,073,714,089 instructions # 1.62 insn per cycle + 2.274311561 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.509331e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.793589e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.793589e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.812918 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.013177e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.196081e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.196081e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.753252 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 16,838,806,069 cycles # 3.496 GHz - 45,271,938,603 instructions # 2.69 insn per cycle - 4.816701893 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 415) (avx2: 0) (512y: 0) (512z: 0) + 19,140,260,721 cycles # 2.851 GHz + 45,281,984,182 instructions # 2.37 insn per cycle + 6.770979415 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -50,33 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.997929e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.418682e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.418682e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.782160 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.075255e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.101362e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.101362e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.587325 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 9,741,398,345 cycles # 3.497 GHz - 23,678,484,700 instructions # 2.43 insn per cycle - 2.786007725 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1960) (avx2: 0) (512y: 0) (512z: 0) + 10,243,661,246 cycles # 2.856 GHz + 23,736,113,257 instructions # 2.32 insn per cycle + 3.601313820 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -84,33 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.220334e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.654086e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.654086e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.603829 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.208611e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.299964e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.299964e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.407535 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 8,855,066,227 cycles # 3.397 GHz - 16,780,902,123 instructions # 1.90 insn per cycle - 2.607149423 seconds time elapsed + 9,325,104,909 cycles # 2.739 GHz + 16,992,883,294 instructions # 1.82 insn per cycle + 3.420829574 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -118,33 +183,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.270925e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.750140e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.750140e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.584696 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.223475e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.357947e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.357947e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.391442 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 8,795,389,119 cycles # 3.399 GHz - 16,433,900,723 instructions # 1.87 insn per cycle - 2.587968687 seconds time elapsed + 9,279,675,392 cycles # 2.737 GHz + 16,862,711,706 instructions # 1.82 insn per cycle + 3.405369019 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -152,33 +217,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.209188e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.186431e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.186431e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.162814 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.268012e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.359028e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.359028e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 3.328677 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 7,406,558,991 cycles # 3.419 GHz - 13,776,766,135 instructions # 1.86 insn per cycle - 2.166693177 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1699) (512y: 5) (512z: 1440) + 7,543,875,114 cycles # 2.260 GHz + 14,180,005,728 instructions # 1.88 insn per cycle + 3.340981281 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -186,8 +251,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 9e5d47f7f..15fa7d311 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_10:05:37 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:58:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.336762e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.510278e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.479828e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 +TOTAL : 1.311031 sec +INFO: No Floating Point Exceptions have been reported + 4,430,467,531 cycles # 2.889 GHz + 6,960,795,222 instructions # 1.57 insn per cycle + 1.590735457 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.528174e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.818976e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.818976e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.075223e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.270769e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270769e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.713586 sec +TOTAL : 6.687798 sec INFO: No Floating Point Exceptions have been reported - 16,499,352,454 cycles # 3.499 GHz - 45,120,371,859 instructions # 2.73 insn per cycle - 4.716102408 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 415) (avx2: 0) (512y: 0) (512z: 0) + 19,629,378,634 cycles # 2.933 GHz + 45,588,143,016 instructions # 2.32 insn per cycle + 6.694819566 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.123655e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.692354e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.692354e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.248280e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.433718e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.433718e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.599672 sec +TOTAL : 3.634348 sec INFO: No Floating Point Exceptions have been reported - 9,101,674,408 cycles # 3.498 GHz - 22,341,026,349 instructions # 2.45 insn per cycle - 2.602087129 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1960) (avx2: 0) (512y: 0) (512z: 0) + 10,674,006,930 cycles # 2.932 GHz + 22,771,305,471 instructions # 2.13 insn per cycle + 3.641620548 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.364034e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.958009e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.958009e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.377112e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.647798e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.647798e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.427714 sec +TOTAL : 3.480978 sec INFO: No Floating Point Exceptions have been reported - 8,236,322,762 cycles # 3.390 GHz - 15,658,005,595 instructions # 1.90 insn per cycle - 2.430068353 seconds time elapsed + 9,770,888,729 cycles # 2.802 GHz + 16,055,948,307 instructions # 1.64 insn per cycle + 3.487814115 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.420388e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.074630e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.074630e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.421430e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.770475e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.770475e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.409980 sec +TOTAL : 3.434581 sec INFO: No Floating Point Exceptions have been reported - 8,178,366,004 cycles # 3.391 GHz - 15,311,025,955 instructions # 1.87 insn per cycle - 2.412392974 seconds time elapsed + 9,740,474,003 cycles # 2.831 GHz + 15,722,386,015 instructions # 1.61 insn per cycle + 3.441655213 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.512869e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.095268e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.095268e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.441505e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.731463e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.731463e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 1.963386 sec +TOTAL : 3.419856 sec INFO: No Floating Point Exceptions have been reported - 6,710,290,885 cycles # 3.414 GHz - 12,566,628,127 instructions # 1.87 insn per cycle - 1.965805671 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1699) (512y: 5) (512z: 1440) + 8,031,724,309 cycles # 2.344 GHz + 12,960,768,751 instructions # 1.61 insn per cycle + 3.427508239 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 8c6a0f3af..cd1edf8b0 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-01_23:51:28 +DATE: 2024-09-15_11:55:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.262526e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.390328e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.387630e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.341129e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.656597e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.613096e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.856218 sec +TOTAL : 0.860917 sec INFO: No Floating Point Exceptions have been reported - 3,161,544,131 cycles # 2.937 GHz - 6,517,835,009 instructions # 2.06 insn per cycle - 1.134957109 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst + 3,166,806,232 cycles # 2.886 GHz + 6,390,531,049 instructions # 2.02 insn per cycle + 1.154020491 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.096959e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.295586e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.295586e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.067801e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.267766e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.267766e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.095656 sec +TOTAL : 6.288953 sec INFO: No Floating Point Exceptions have been reported - 18,241,874,835 cycles # 2.990 GHz - 45,006,676,387 instructions # 2.47 insn per cycle - 6.100863007 seconds time elapsed + 18,410,332,076 cycles # 2.925 GHz + 45,054,816,993 instructions # 2.45 insn per cycle + 6.296062030 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.302660e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.506502e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.506502e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.250758e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.437877e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.437877e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.102726 sec +TOTAL : 3.189367 sec INFO: No Floating Point Exceptions have been reported - 9,285,265,734 cycles # 2.989 GHz - 22,287,225,294 instructions # 2.40 insn per cycle - 3.107827990 seconds time elapsed + 9,348,605,615 cycles # 2.926 GHz + 22,330,060,575 instructions # 2.39 insn per cycle + 3.196473636 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.467927e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.777181e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.777181e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.398564e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.676038e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.676038e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.912326 sec +TOTAL : 3.014863 sec INFO: No Floating Point Exceptions have been reported - 8,373,944,597 cycles # 2.872 GHz - 15,754,624,876 instructions # 1.88 insn per cycle - 2.917669267 seconds time elapsed + 8,484,963,512 cycles # 2.808 GHz + 15,797,579,651 instructions # 1.86 insn per cycle + 3.022063297 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.499171e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.843892e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.843892e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.428672e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765186e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765186e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.880358 sec +TOTAL : 2.974553 sec INFO: No Floating Point Exceptions have been reported - 8,253,904,909 cycles # 2.861 GHz - 15,614,471,146 instructions # 1.89 insn per cycle - 2.885437824 seconds time elapsed + 8,386,796,916 cycles # 2.814 GHz + 15,657,891,969 instructions # 1.87 insn per cycle + 2.981555653 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.523885e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.854007e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.854007e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.445697e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.728486e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.728486e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.856554 sec +TOTAL : 2.962981 sec INFO: No Floating Point Exceptions have been reported - 6,625,726,004 cycles # 2.316 GHz - 12,865,251,919 instructions # 1.94 insn per cycle - 2.861676497 seconds time elapsed + 6,681,340,868 cycles # 2.251 GHz + 12,907,012,003 instructions # 1.93 insn per cycle + 2.970184514 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index a53a22491..6589d6b6f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,46 +1,104 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_10:04:09 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:52:21 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.003631e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.641927e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.671119e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.488823 sec +INFO: No Floating Point Exceptions have been reported + 4,967,613,517 cycles # 2.899 GHz + 9,171,831,308 instructions # 1.85 insn per cycle + 1.769936667 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.530486e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.822206e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.822206e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.707228 sec -INFO: No Floating Point Exceptions have been reported - 16,476,190,138 cycles # 3.499 GHz - 45,120,500,761 instructions # 2.74 insn per cycle - 4.709680460 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 415) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.073262e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.267974e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.267974e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.250612 sec +INFO: No Floating Point Exceptions have been reported + 18,322,110,309 cycles # 2.929 GHz + 45,051,388,062 instructions # 2.46 insn per cycle + 6.257775360 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.115607e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.696364e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.696364e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.608021 sec -INFO: No Floating Point Exceptions have been reported - 9,130,129,462 cycles # 3.498 GHz - 22,341,232,731 instructions # 2.45 insn per cycle - 2.610480018 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1960) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.247339e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.424409e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.424409e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.190677 sec +INFO: No Floating Point Exceptions have been reported + 9,359,387,266 cycles # 2.928 GHz + 22,331,498,291 instructions # 2.39 insn per cycle + 3.197654484 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.362847e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.962010e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.962010e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.431116 sec -INFO: No Floating Point Exceptions have been reported - 8,246,783,426 cycles # 3.389 GHz - 15,656,977,176 instructions # 1.90 insn per cycle - 2.433931557 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.363785e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.679448e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.679448e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.047896 sec +INFO: No Floating Point Exceptions have been reported + 8,583,132,130 cycles # 2.811 GHz + 15,806,350,534 instructions # 1.84 insn per cycle + 3.054826008 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +170,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.421996e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.074309e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.074309e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.408255 sec -INFO: No Floating Point Exceptions have been reported - 8,173,732,939 cycles # 3.391 GHz - 15,316,165,057 instructions # 1.87 insn per cycle - 2.410753438 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.430638e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.771124e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.771124e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.974287 sec +INFO: No Floating Point Exceptions have been reported + 8,401,059,167 cycles # 2.818 GHz + 15,651,581,046 instructions # 1.86 insn per cycle + 2.981875735 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +202,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.525748e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.088744e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.088744e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 1.959226 sec -INFO: No Floating Point Exceptions have been reported - 6,687,341,475 cycles # 3.413 GHz - 12,566,490,219 instructions # 1.88 insn per cycle - 1.961667895 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1699) (512y: 5) (512z: 1440) +EvtsPerSec[Rmb+ME] (23) = ( 2.438697e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.736996e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.736996e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.973534 sec +INFO: No Floating Point Exceptions have been reported + 6,722,109,548 cycles # 2.256 GHz + 12,906,606,049 instructions # 1.92 insn per cycle + 2.981153680 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +234,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index ae42fc422..bed528f6e 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_09:42:11 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:09:33 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.185244e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.645179e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.802907e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.581560 sec +INFO: No Floating Point Exceptions have been reported + 2,321,713,730 cycles # 2.869 GHz + 3,648,873,879 instructions # 1.57 insn per cycle + 0.865547071 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.527178e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.817518e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.817518e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.716186 sec -INFO: No Floating Point Exceptions have been reported - 16,508,980,588 cycles # 3.499 GHz - 45,094,295,346 instructions # 2.73 insn per cycle - 4.718797963 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 401) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.067642e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.262437e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.262437e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.278398 sec +INFO: No Floating Point Exceptions have been reported + 18,394,901,899 cycles # 2.927 GHz + 45,013,341,285 instructions # 2.45 insn per cycle + 6.286516700 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 397) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.116749e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.679507e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.679507e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.607708 sec -INFO: No Floating Point Exceptions have been reported - 9,126,539,816 cycles # 3.497 GHz - 22,308,706,665 instructions # 2.44 insn per cycle - 2.610294990 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1943) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.249815e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.432877e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.432877e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.188261 sec +INFO: No Floating Point Exceptions have been reported + 9,382,779,388 cycles # 2.937 GHz + 22,291,184,899 instructions # 2.38 insn per cycle + 3.196123670 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1939) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.357584e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.955753e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.955753e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.432420 sec -INFO: No Floating Point Exceptions have been reported - 8,250,814,494 cycles # 3.389 GHz - 15,650,712,226 instructions # 1.90 insn per cycle - 2.435062865 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.394804e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.683014e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.683014e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.016616 sec +INFO: No Floating Point Exceptions have been reported + 8,501,260,075 cycles # 2.812 GHz + 15,791,303,131 instructions # 1.86 insn per cycle + 3.024850695 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2539) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.403856e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.068697e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.068697e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.420547 sec -INFO: No Floating Point Exceptions have been reported - 8,214,345,891 cycles # 3.390 GHz - 15,292,647,944 instructions # 1.86 insn per cycle - 2.423327603 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.433401e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.784502e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.784502e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.973425 sec +INFO: No Floating Point Exceptions have been reported + 8,414,276,106 cycles # 2.823 GHz + 15,633,261,481 instructions # 1.86 insn per cycle + 2.981340876 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2436) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.531179e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.109760e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.109760e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 1.960305 sec -INFO: No Floating Point Exceptions have been reported - 6,693,113,307 cycles # 3.410 GHz - 12,545,094,081 instructions # 1.87 insn per cycle - 1.963008201 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1669) (512y: 6) (512z: 1428) +EvtsPerSec[Rmb+ME] (23) = ( 2.449856e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.750896e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.750896e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.962493 sec +INFO: No Floating Point Exceptions have been reported + 6,702,761,235 cycles # 2.257 GHz + 12,885,740,598 instructions # 1.92 insn per cycle + 2.970728824 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1667) (512y: 18) (512z: 1428) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052564145764E-002 Relative difference = 1.9988585667912256e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 595c7c378..711fbf3a5 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_09:54:55 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:34:45 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.272057e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.453757e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.411368e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.577320 sec +INFO: No Floating Point Exceptions have been reported + 2,322,367,280 cycles # 2.881 GHz + 3,616,476,077 instructions # 1.56 insn per cycle + 0.862621614 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.223119e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.896152e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.896152e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 3.428285 sec -INFO: No Floating Point Exceptions have been reported - 12,002,417,308 cycles # 3.499 GHz - 32,462,338,273 instructions # 2.70 insn per cycle - 3.430884773 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.635657e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.129532e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.129532e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 4.218771 sec +INFO: No Floating Point Exceptions have been reported + 12,191,913,623 cycles # 2.887 GHz + 32,293,306,178 instructions # 2.65 insn per cycle + 4.224304323 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 290) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039840314887E-002 Relative difference = 1.244813035273009e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.707841e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.157515e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.157515e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.288783 sec -INFO: No Floating Point Exceptions have been reported - 8,007,569,874 cycles # 3.496 GHz - 18,789,517,322 instructions # 2.35 insn per cycle - 2.291538020 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.654215e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.464911e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.464911e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.746425 sec +INFO: No Floating Point Exceptions have been reported + 8,013,864,577 cycles # 2.914 GHz + 18,725,751,725 instructions # 2.34 insn per cycle + 2.751635696 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1548) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039283704129E-002 Relative difference = 5.583829420356249e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.883958e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.203609e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.203609e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.176617 sec -INFO: No Floating Point Exceptions have been reported - 7,408,185,784 cycles # 3.400 GHz - 14,166,813,612 instructions # 1.91 insn per cycle - 2.179197921 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.734762e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.516819e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.516819e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.676523 sec +INFO: No Floating Point Exceptions have been reported + 7,476,186,846 cycles # 2.791 GHz + 14,257,923,546 instructions # 1.91 insn per cycle + 2.682062632 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053244447801E-002 Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.115855e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.760908e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.760908e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.097997 sec -INFO: No Floating Point Exceptions have been reported - 7,334,996,617 cycles # 3.492 GHz - 13,668,400,743 instructions # 1.86 insn per cycle - 2.100556039 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.834242e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.778618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.778618e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.592675 sec +INFO: No Floating Point Exceptions have been reported + 7,344,696,907 cycles # 2.828 GHz + 13,952,931,831 instructions # 1.90 insn per cycle + 2.598198803 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2087) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053244447801E-002 Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.509034e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.088779e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.088779e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 1.965872 sec -INFO: No Floating Point Exceptions have been reported - 6,715,050,815 cycles # 3.411 GHz - 13,140,776,396 instructions # 1.96 insn per cycle - 1.968672320 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.491060e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.875896e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.875896e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.903149 sec +INFO: No Floating Point Exceptions have been reported + 6,571,286,194 cycles # 2.260 GHz + 13,433,545,963 instructions # 2.04 insn per cycle + 2.908820313 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2077) (512y: 1) (512z: 1199) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052562326775E-002 Relative difference = 1.997440588685788e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index b638646c0..6fc527ffa 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_09:55:09 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:35:09 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.289380e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.618768e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.817817e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.580301 sec +INFO: No Floating Point Exceptions have been reported + 2,323,629,755 cycles # 2.848 GHz + 3,593,641,981 instructions # 1.55 insn per cycle + 0.873274895 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.031434e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.437242e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.437242e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 2.672442 sec -INFO: No Floating Point Exceptions have been reported - 9,356,064,998 cycles # 3.498 GHz - 25,877,878,335 instructions # 2.77 insn per cycle - 2.674908403 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.209448e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.220446e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.220446e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.220192 sec +INFO: No Floating Point Exceptions have been reported + 9,366,774,041 cycles # 2.905 GHz + 25,702,432,609 instructions # 2.74 insn per cycle + 3.225730639 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 243) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039838495897E-002 Relative difference = 1.2589928273811243e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.313162e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.027611e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.027611e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.050449 sec -INFO: No Floating Point Exceptions have been reported - 7,172,558,370 cycles # 3.493 GHz - 16,960,397,750 instructions # 2.36 insn per cycle - 2.053556990 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.014104e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.557363e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.557363e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.462960 sec +INFO: No Floating Point Exceptions have been reported + 7,216,847,131 cycles # 2.925 GHz + 16,891,846,951 instructions # 2.34 insn per cycle + 2.468502980 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1350) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.261989e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.279942e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.279942e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.032982 sec -INFO: No Floating Point Exceptions have been reported - 7,108,736,371 cycles # 3.493 GHz - 13,544,695,392 instructions # 1.91 insn per cycle - 2.035471238 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.924187e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.020326e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.020326e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.522608 sec +INFO: No Floating Point Exceptions have been reported + 7,150,122,380 cycles # 2.829 GHz + 13,633,449,373 instructions # 1.91 insn per cycle + 2.528205937 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2061) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053220800939E-002 Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.381952e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.505595e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.505595e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.005906 sec -INFO: No Floating Point Exceptions have been reported - 7,012,128,945 cycles # 3.492 GHz - 13,158,386,951 instructions # 1.88 insn per cycle - 2.008383123 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.976818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.175866e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.175866e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.485267 sec +INFO: No Floating Point Exceptions have been reported + 7,047,642,186 cycles # 2.830 GHz + 13,442,931,038 instructions # 1.91 insn per cycle + 2.490839699 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1940) (512y: 4) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053220800939E-002 Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.649521e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.539875e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.539875e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 1.922685 sec -INFO: No Floating Point Exceptions have been reported - 6,575,449,089 cycles # 3.416 GHz - 12,870,465,015 instructions # 1.96 insn per cycle - 1.925188165 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.604837e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.103202e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.103202e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.790744 sec +INFO: No Floating Point Exceptions have been reported + 6,349,721,778 cycles # 2.272 GHz + 13,164,680,615 instructions # 2.07 insn per cycle + 2.796235299 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2033) (512y: 1) (512z: 1085) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052536860923E-002 Relative difference = 1.977588895209662e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 05a098516..caa67d1a4 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_09:42:26 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:10:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.610039e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.567106e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.762593e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.688497 sec +INFO: No Floating Point Exceptions have been reported + 2,665,329,968 cycles # 2.845 GHz + 4,055,682,627 instructions # 1.52 insn per cycle + 0.995154695 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.348782e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.543232e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.543232e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.281137 sec -INFO: No Floating Point Exceptions have been reported - 18,484,484,247 cycles # 3.498 GHz - 46,389,198,851 instructions # 2.51 insn per cycle - 5.284033404 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 469) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.002881e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168467e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.168467e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.726627 sec +INFO: No Floating Point Exceptions have been reported + 19,724,365,546 cycles # 2.928 GHz + 46,388,641,620 instructions # 2.35 insn per cycle + 6.737968541 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.263266e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.925413e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.925413e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.385176 sec -INFO: No Floating Point Exceptions have been reported - 11,851,095,587 cycles # 3.498 GHz - 31,377,407,346 instructions # 2.65 insn per cycle - 3.387974553 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1725) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.617185e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.143896e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.143896e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.347629 sec +INFO: No Floating Point Exceptions have been reported + 12,771,945,524 cycles # 2.931 GHz + 31,577,972,239 instructions # 2.47 insn per cycle + 4.359278192 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1719) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.728365e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.705136e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.705136e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.880423 sec -INFO: No Floating Point Exceptions have been reported - 9,745,012,452 cycles # 3.380 GHz - 19,342,015,984 instructions # 1.98 insn per cycle - 2.883193305 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2045) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.943978e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.720569e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.720569e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.688531 sec +INFO: No Floating Point Exceptions have been reported + 10,322,037,008 cycles # 2.790 GHz + 19,570,801,424 instructions # 1.90 insn per cycle + 3.699996508 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2042) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.812623e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.856125e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.856125e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.823063 sec -INFO: No Floating Point Exceptions have been reported - 9,558,353,623 cycles # 3.383 GHz - 18,881,988,531 instructions # 1.98 insn per cycle - 2.825771584 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1788) (512y: 189) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.986657e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.789520e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.789520e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.618212 sec +INFO: No Floating Point Exceptions have been reported + 10,149,499,266 cycles # 2.797 GHz + 19,312,096,557 instructions # 1.90 insn per cycle + 3.629679706 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1785) (512y: 189) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.615424e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.488606e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.488606e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.328192 sec -INFO: No Floating Point Exceptions have been reported - 7,919,315,034 cycles # 3.398 GHz - 14,649,546,517 instructions # 1.85 insn per cycle - 2.331100870 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.777391e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.385382e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.385382e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.992684 sec +INFO: No Floating Point Exceptions have been reported + 8,588,251,503 cycles # 2.146 GHz + 15,161,251,122 instructions # 1.77 insn per cycle + 4.003805537 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 954) (512y: 154) (512z: 1322) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index c389f9cd3..ce1b16067 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_09:42:44 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +DATE: 2024-09-15_11:10:30 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.695377e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.640031e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.828039e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.676129 sec +INFO: No Floating Point Exceptions have been reported + 2,632,225,960 cycles # 2.883 GHz + 4,132,384,248 instructions # 1.57 insn per cycle + 0.970493332 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.342163e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.537359e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.537359e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.305019 sec -INFO: No Floating Point Exceptions have been reported - 18,562,035,875 cycles # 3.497 GHz - 46,325,962,703 instructions # 2.50 insn per cycle - 5.307741235 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 458) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.005676e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.176899e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.176899e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.735465 sec +INFO: No Floating Point Exceptions have been reported + 19,720,225,593 cycles # 2.924 GHz + 46,326,489,596 instructions # 2.35 insn per cycle + 6.746197968 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.264537e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.928390e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.928390e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.383743 sec -INFO: No Floating Point Exceptions have been reported - 11,843,398,676 cycles # 3.498 GHz - 31,326,604,350 instructions # 2.65 insn per cycle - 3.386465222 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1717) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.574820e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.144081e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.144081e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.449866 sec +INFO: No Floating Point Exceptions have been reported + 13,065,779,841 cycles # 2.930 GHz + 31,555,443,434 instructions # 2.42 insn per cycle + 4.460852067 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1711) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.730673e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.711929e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.711929e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.878183 sec -INFO: No Floating Point Exceptions have been reported - 9,738,984,624 cycles # 3.381 GHz - 19,315,525,724 instructions # 1.98 insn per cycle - 2.880952054 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2029) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.952135e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.730440e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.730440e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.677079 sec +INFO: No Floating Point Exceptions have been reported + 10,320,566,663 cycles # 2.800 GHz + 19,557,785,526 instructions # 1.90 insn per cycle + 3.688245631 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2026) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.799677e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.826248e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.826248e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.833572 sec -INFO: No Floating Point Exceptions have been reported - 9,590,732,308 cycles # 3.382 GHz - 18,945,760,365 instructions # 1.98 insn per cycle - 2.836512276 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1782) (512y: 189) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.981919e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.782784e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.782784e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.626089 sec +INFO: No Floating Point Exceptions have been reported + 10,150,645,903 cycles # 2.793 GHz + 19,388,040,023 instructions # 1.91 insn per cycle + 3.637342012 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1779) (512y: 189) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.668190e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.605971e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.605971e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.302372 sec -INFO: No Floating Point Exceptions have been reported - 7,833,485,268 cycles # 3.399 GHz - 14,557,358,248 instructions # 1.86 insn per cycle - 2.305218493 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.806136e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.449559e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.449559e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.931921 sec +INFO: No Floating Point Exceptions have been reported + 8,442,748,276 cycles # 2.150 GHz + 15,068,523,446 instructions # 1.78 insn per cycle + 3.943167549 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 947) (512y: 156) (512z: 1306) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index d95f5806e..aeadfaae6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_09:43:02 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:11:01 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.391981e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.330443e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.949573e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.534465 sec +INFO: No Floating Point Exceptions have been reported + 2,210,485,527 cycles # 2.869 GHz + 3,136,829,588 instructions # 1.42 insn per cycle + 0.828432932 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.389312e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.443915e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.443915e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.515596 sec -INFO: No Floating Point Exceptions have been reported - 15,805,747,752 cycles # 3.498 GHz - 46,009,061,225 instructions # 2.91 insn per cycle - 4.518391816 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 618) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.818281e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.865529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.865529e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.911600 sec +INFO: No Floating Point Exceptions have been reported + 17,389,649,504 cycles # 2.935 GHz + 46,036,709,188 instructions # 2.65 insn per cycle + 5.925127688 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.334009e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.530059e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.530059e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.543744 sec -INFO: No Floating Point Exceptions have been reported - 8,905,161,760 cycles # 3.497 GHz - 27,766,078,865 instructions # 3.12 insn per cycle - 2.546514702 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.165855e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.325075e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.325075e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.453922 sec +INFO: No Floating Point Exceptions have been reported + 10,171,046,914 cycles # 2.936 GHz + 27,937,548,503 instructions # 2.75 insn per cycle + 3.465600263 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.025173e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.538989e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.538989e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.612872 sec -INFO: No Floating Point Exceptions have been reported - 5,373,981,708 cycles # 3.327 GHz - 12,508,193,951 instructions # 2.33 insn per cycle - 1.615725551 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.967598e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.358232e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.358232e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.252729 sec +INFO: No Floating Point Exceptions have been reported + 6,219,848,194 cycles # 2.748 GHz + 12,677,070,824 instructions # 2.04 insn per cycle + 2.263945260 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.242778e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.784348e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.784348e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.569547 sec -INFO: No Floating Point Exceptions have been reported - 5,230,919,018 cycles # 3.327 GHz - 12,042,680,237 instructions # 2.30 insn per cycle - 1.572354897 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2408) (512y: 124) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.478036e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.948706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.948706e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.055945 sec +INFO: No Floating Point Exceptions have been reported + 5,693,440,562 cycles # 2.756 GHz + 12,116,317,958 instructions # 2.13 insn per cycle + 2.067013514 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.021885e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.679262e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.679262e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.428603 sec -INFO: No Floating Point Exceptions have been reported - 4,762,622,093 cycles # 3.328 GHz - 8,162,562,308 instructions # 1.71 insn per cycle - 1.431360119 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1434) (512y: 100) (512z: 1800) +EvtsPerSec[Rmb+ME] (23) = ( 3.483396e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.667202e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.667202e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.151048 sec +INFO: No Floating Point Exceptions have been reported + 5,836,401,977 cycles # 1.846 GHz + 8,391,475,751 instructions # 1.44 insn per cycle + 3.162234928 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index ad5a13e48..9022013b0 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,48 +1,113 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_10:00:59 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:46:12 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.381047e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.782856e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.782856e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.840023 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,108,426,192 cycles # 2.861 GHz + 4,770,924,698 instructions # 1.53 insn per cycle + 1.146594198 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.379234e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.433848e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.433848e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.575375 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.806159e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.852377e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.852377e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 6.067320 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 16,014,621,465 cycles # 3.498 GHz - 46,077,344,584 instructions # 2.88 insn per cycle - 4.578943753 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 618) (avx2: 0) (512y: 0) (512z: 0) + 17,856,813,573 cycles # 2.936 GHz + 46,243,571,751 instructions # 2.59 insn per cycle + 6.083398130 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -50,33 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.276961e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.470086e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.470086e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.633136 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.134236e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.289007e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.289007e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.611222 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 9,220,909,924 cycles # 3.498 GHz - 27,947,793,718 instructions # 3.03 insn per cycle - 2.636696639 seconds time elapsed + 10,640,342,598 cycles # 2.934 GHz + 28,274,377,614 instructions # 2.66 insn per cycle + 3.627839941 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -84,33 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.939030e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.438340e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.438340e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.685451 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.918119e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.295386e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.295386e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.398079 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 5,629,964,174 cycles # 3.334 GHz - 12,792,824,955 instructions # 2.27 insn per cycle - 1.688966429 seconds time elapsed + 6,685,318,792 cycles # 2.770 GHz + 13,122,453,026 instructions # 1.96 insn per cycle + 2.414568983 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -118,33 +183,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.138931e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.669541e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.669541e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.648630 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.370026e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.821205e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.821205e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.218649 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 5,508,527,860 cycles # 3.335 GHz - 12,327,450,790 instructions # 2.24 insn per cycle - 1.652183387 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2408) (512y: 124) (512z: 0) + 6,189,711,182 cycles # 2.770 GHz + 12,557,371,407 instructions # 2.03 insn per cycle + 2.235322482 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -152,33 +217,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.853351e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.481609e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.481609e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.514137 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.469847e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.650575e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.650575e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.290950 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 5,063,485,088 cycles # 3.337 GHz - 8,404,195,842 instructions # 1.66 insn per cycle - 1.517689030 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1434) (512y: 100) (512z: 1800) + 6,320,487,640 cycles # 1.913 GHz + 8,791,643,966 instructions # 1.39 insn per cycle + 3.307886654 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -186,8 +251,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index e0584de0e..85f95aac4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_10:05:52 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:58:30 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.424217e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.466051e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.011391e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 0.646400 sec +INFO: No Floating Point Exceptions have been reported + 2,528,248,856 cycles # 2.879 GHz + 3,688,196,917 instructions # 1.46 insn per cycle + 0.934986871 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.386492e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.441570e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.441570e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.819736e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.867152e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.867152e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.521365 sec +TOTAL : 5.985044 sec INFO: No Floating Point Exceptions have been reported - 15,819,167,567 cycles # 3.497 GHz - 46,012,035,541 instructions # 2.91 insn per cycle - 4.524387280 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 618) (avx2: 0) (512y: 0) (512z: 0) + 17,600,111,411 cycles # 2.936 GHz + 46,124,554,790 instructions # 2.62 insn per cycle + 5.995774241 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.326626e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.523995e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.523995e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.168097e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.326675e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.326675e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.549609 sec +TOTAL : 3.533047 sec INFO: No Floating Point Exceptions have been reported - 8,926,266,937 cycles # 3.497 GHz - 27,765,923,573 instructions # 3.11 insn per cycle - 2.552638543 seconds time elapsed + 10,405,596,477 cycles # 2.937 GHz + 28,016,084,485 instructions # 2.69 insn per cycle + 3.543840924 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.064803e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.587566e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.587566e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.000915e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.392912e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.392912e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.605186 sec +TOTAL : 2.319991 sec INFO: No Floating Point Exceptions have been reported - 5,345,902,615 cycles # 3.325 GHz - 12,508,926,379 instructions # 2.34 insn per cycle - 1.608234295 seconds time elapsed + 6,443,899,577 cycles # 2.766 GHz + 12,743,367,354 instructions # 1.98 insn per cycle + 2.330731733 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.265996e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.815884e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.815884e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.453355e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.917864e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.917864e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.566342 sec +TOTAL : 2.148765 sec INFO: No Floating Point Exceptions have been reported - 5,219,813,903 cycles # 3.327 GHz - 12,042,361,917 instructions # 2.31 insn per cycle - 1.569330498 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2408) (512y: 124) (512z: 0) + 5,965,699,512 cycles # 2.764 GHz + 12,146,978,501 instructions # 2.04 insn per cycle + 2.159407986 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.979942e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.630626e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.630626e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.499391e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.683844e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.683844e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.436870 sec +TOTAL : 3.221377 sec INFO: No Floating Point Exceptions have been reported - 4,787,289,926 cycles # 3.325 GHz - 8,162,057,125 instructions # 1.70 insn per cycle - 1.439928518 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1434) (512y: 100) (512z: 1800) + 6,080,803,082 cycles # 1.882 GHz + 8,423,087,351 instructions # 1.39 insn per cycle + 3.232264502 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index f1a7279f4..a207bf696 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-01_23:51:54 +DATE: 2024-09-15_11:55:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.273434e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.280412e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.968161e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.567312e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.455238e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.007252e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.562544 sec +TOTAL : 0.563061 sec INFO: No Floating Point Exceptions have been reported - 2,322,125,439 cycles # 2.928 GHz - 3,645,722,192 instructions # 1.57 insn per cycle - 0.850249349 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst + 2,285,403,688 cycles # 2.879 GHz + 3,587,140,832 instructions # 1.57 insn per cycle + 0.850548380 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.858859e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.905786e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.905786e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.818247e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.865618e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.865618e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.747389 sec +TOTAL : 5.910910 sec INFO: No Floating Point Exceptions have been reported - 17,237,233,259 cycles # 2.997 GHz - 45,935,224,991 instructions # 2.66 insn per cycle - 5.752803841 seconds time elapsed + 17,366,644,102 cycles # 2.933 GHz + 46,048,599,584 instructions # 2.65 insn per cycle + 5.921509733 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.234441e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.394817e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.394817e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.165029e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.323362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.323362e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.347494 sec +TOTAL : 3.462860 sec INFO: No Floating Point Exceptions have been reported - 10,060,095,556 cycles # 3.001 GHz - 27,840,812,105 instructions # 2.77 insn per cycle - 3.352886847 seconds time elapsed + 10,189,100,613 cycles # 2.933 GHz + 27,958,354,355 instructions # 2.74 insn per cycle + 3.474989469 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.050411e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.453973e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.453973e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.979916e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.371493e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.371493e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.184644 sec +TOTAL : 2.251622 sec INFO: No Floating Point Exceptions have been reported - 6,160,511,677 cycles # 2.816 GHz - 12,582,281,977 instructions # 2.04 insn per cycle - 2.190146485 seconds time elapsed + 6,239,671,348 cycles # 2.759 GHz + 12,698,985,692 instructions # 2.04 insn per cycle + 2.262379977 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.619096e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.102203e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.102203e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.471204e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.935728e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.935728e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.971941 sec +TOTAL : 2.061624 sec INFO: No Floating Point Exceptions have been reported - 5,581,458,565 cycles # 2.824 GHz - 12,016,750,817 instructions # 2.15 insn per cycle - 1.977376660 seconds time elapsed + 5,721,552,525 cycles # 2.762 GHz + 12,134,935,244 instructions # 2.12 insn per cycle + 2.072280515 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.649577e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.841164e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.841164e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.500472e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.684328e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.684328e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.978936 sec +TOTAL : 3.142449 sec INFO: No Floating Point Exceptions have been reported - 5,694,372,557 cycles # 1.909 GHz - 8,293,954,373 instructions # 1.46 insn per cycle - 2.984356046 seconds time elapsed + 5,837,185,756 cycles # 1.853 GHz + 8,411,639,410 instructions # 1.44 insn per cycle + 3.153001098 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 3cab8a384..fa08fbada 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,46 +1,104 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_10:04:25 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:52:49 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.726431e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.401767e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.003069e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.719407 sec +INFO: No Floating Point Exceptions have been reported + 2,718,962,196 cycles # 2.853 GHz + 4,261,744,999 instructions # 1.57 insn per cycle + 1.009279183 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.388417e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.443133e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.443133e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.517887 sec -INFO: No Floating Point Exceptions have been reported - 15,810,043,704 cycles # 3.498 GHz - 46,009,474,578 instructions # 2.91 insn per cycle - 4.520527280 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 618) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.804236e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.851320e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.851320e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.958188 sec +INFO: No Floating Point Exceptions have been reported + 17,386,557,790 cycles # 2.923 GHz + 46,053,036,463 instructions # 2.65 insn per cycle + 5.968882862 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.331411e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.528535e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.528535e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.546600 sec -INFO: No Floating Point Exceptions have been reported - 8,914,723,677 cycles # 3.497 GHz - 27,766,387,576 instructions # 3.11 insn per cycle - 2.549256003 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.150158e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.308857e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.308857e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.477756 sec +INFO: No Floating Point Exceptions have been reported + 10,179,732,087 cycles # 2.919 GHz + 27,956,952,229 instructions # 2.75 insn per cycle + 3.488776572 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.047011e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.566002e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.566002e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.609179 sec -INFO: No Floating Point Exceptions have been reported - 5,361,825,286 cycles # 3.327 GHz - 12,508,369,103 instructions # 2.33 insn per cycle - 1.611926548 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.931728e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.313640e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.313640e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.272950 sec +INFO: No Floating Point Exceptions have been reported + 6,250,280,118 cycles # 2.738 GHz + 12,699,256,189 instructions # 2.03 insn per cycle + 2.283692354 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +170,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.262434e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.812559e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.812559e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.566539 sec -INFO: No Floating Point Exceptions have been reported - 5,219,949,127 cycles # 3.327 GHz - 12,043,061,166 instructions # 2.31 insn per cycle - 1.569233016 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2408) (512y: 124) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.454076e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.923276e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.923276e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.069149 sec +INFO: No Floating Point Exceptions have been reported + 5,725,799,884 cycles # 2.754 GHz + 12,135,179,967 instructions # 2.12 insn per cycle + 2.079783939 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +202,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.986938e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.641655e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.641655e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.435190 sec -INFO: No Floating Point Exceptions have been reported - 4,780,402,139 cycles # 3.325 GHz - 8,162,971,020 instructions # 1.71 insn per cycle - 1.437874193 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1434) (512y: 100) (512z: 1800) +EvtsPerSec[Rmb+ME] (23) = ( 3.414550e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.592343e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.592343e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.218110 sec +INFO: No Floating Point Exceptions have been reported + 5,959,079,250 cycles # 1.847 GHz + 8,422,189,176 instructions # 1.41 insn per cycle + 3.228977501 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +234,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 7eeea088a..328467ef6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_09:43:16 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:11:27 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.368553e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.328924e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.965685e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.531897 sec +INFO: No Floating Point Exceptions have been reported + 2,204,349,816 cycles # 2.864 GHz + 3,169,634,690 instructions # 1.44 insn per cycle + 0.825649601 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.438430e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.495812e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.495812e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.428339 sec -INFO: No Floating Point Exceptions have been reported - 15,496,090,887 cycles # 3.497 GHz - 44,984,942,856 instructions # 2.90 insn per cycle - 4.431181750 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.870268e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.919754e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.919754e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.735202 sec +INFO: No Floating Point Exceptions have been reported + 16,847,713,247 cycles # 2.933 GHz + 44,981,738,957 instructions # 2.67 insn per cycle + 5.744748484 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.530795e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.746195e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.746195e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.438783 sec -INFO: No Floating Point Exceptions have been reported - 8,531,978,132 cycles # 3.497 GHz - 26,607,320,687 instructions # 3.12 insn per cycle - 2.441589762 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.300705e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.473658e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.473658e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.304655 sec +INFO: No Floating Point Exceptions have been reported + 9,659,083,497 cycles # 2.916 GHz + 26,749,720,361 instructions # 2.77 insn per cycle + 3.314062418 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2328) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.392634e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.821875e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.821875e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.760695 sec -INFO: No Floating Point Exceptions have been reported - 5,859,888,224 cycles # 3.323 GHz - 14,075,396,388 instructions # 2.40 insn per cycle - 1.763491701 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2720) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.583161e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.907950e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.907950e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.415079 sec +INFO: No Floating Point Exceptions have been reported + 6,675,336,151 cycles # 2.753 GHz + 14,174,925,457 instructions # 2.12 insn per cycle + 2.425332683 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2710) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.660825e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.119844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.119844e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.696359 sec -INFO: No Floating Point Exceptions have been reported - 5,649,544,498 cycles # 3.325 GHz - 13,634,874,854 instructions # 2.41 insn per cycle - 1.699147037 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2366) (512y: 297) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.674600e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.017717e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.017717e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.378233 sec +INFO: No Floating Point Exceptions have been reported + 6,574,155,578 cycles # 2.754 GHz + 13,789,180,928 instructions # 2.10 insn per cycle + 2.388565062 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2356) (512y: 297) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.906986e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.558374e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.558374e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.449484 sec -INFO: No Floating Point Exceptions have been reported - 4,827,231,514 cycles # 3.324 GHz - 9,985,414,709 instructions # 2.07 insn per cycle - 1.452343991 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.383516e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.554686e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.554686e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.229855 sec +INFO: No Floating Point Exceptions have been reported + 5,994,389,719 cycles # 1.851 GHz + 10,123,629,860 instructions # 1.69 insn per cycle + 3.240029027 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1273) (512y: 208) (512z: 1988) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 8c7ea4c9e..2da881e2b 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_09:55:21 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:35:31 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.302471e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.316570e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001729e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.535501 sec +INFO: No Floating Point Exceptions have been reported + 2,215,078,191 cycles # 2.874 GHz + 3,154,679,095 instructions # 1.42 insn per cycle + 0.829146554 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.191460e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.289542e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.289542e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.411969 sec -INFO: No Floating Point Exceptions have been reported - 11,945,708,704 cycles # 3.499 GHz - 34,353,065,389 instructions # 2.88 insn per cycle - 3.414756802 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.351910e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.430033e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.430033e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.571525 sec +INFO: No Floating Point Exceptions have been reported + 13,035,401,690 cycles # 2.848 GHz + 34,355,905,973 instructions # 2.64 insn per cycle + 4.578322526 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.690055e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.833019e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.833019e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.966412 sec -INFO: No Floating Point Exceptions have been reported - 10,381,423,527 cycles # 3.497 GHz - 24,005,773,058 instructions # 2.31 insn per cycle - 2.969097608 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.974205e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.110138e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.110138e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.639722 sec +INFO: No Floating Point Exceptions have been reported + 10,720,308,622 cycles # 2.941 GHz + 24,027,850,859 instructions # 2.24 insn per cycle + 3.646936507 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.933977e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.295813e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.295813e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.886823 sec -INFO: No Floating Point Exceptions have been reported - 6,277,430,439 cycles # 3.323 GHz - 12,326,992,871 instructions # 1.96 insn per cycle - 1.889576906 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.622097e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.949479e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.949479e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.385273 sec +INFO: No Floating Point Exceptions have been reported + 6,607,425,584 cycles # 2.762 GHz + 12,368,604,074 instructions # 1.87 insn per cycle + 2.392729796 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3103) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.431964e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.857069e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.857069e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.752330 sec -INFO: No Floating Point Exceptions have been reported - 5,834,160,394 cycles # 3.325 GHz - 11,516,187,156 instructions # 1.97 insn per cycle - 1.755080119 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.850394e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.302393e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.302393e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.279711 sec +INFO: No Floating Point Exceptions have been reported + 6,291,615,830 cycles # 2.752 GHz + 11,595,311,145 instructions # 1.84 insn per cycle + 2.287442889 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2648) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.900093e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.544893e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.544893e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.448953 sec -INFO: No Floating Point Exceptions have been reported - 4,824,521,164 cycles # 3.324 GHz - 9,224,489,231 instructions # 1.91 insn per cycle - 1.451728622 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.743982e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.952525e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.952525e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.921108 sec +INFO: No Floating Point Exceptions have been reported + 5,423,773,794 cycles # 1.852 GHz + 9,310,782,229 instructions # 1.72 insn per cycle + 2.929084188 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2096) (512y: 282) (512z: 1955) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 1ebea6be1..86df224c9 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_09:55:33 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:35:55 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.255241e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.245059e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.949681e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.533151 sec +INFO: No Floating Point Exceptions have been reported + 2,204,451,822 cycles # 2.864 GHz + 3,124,206,943 instructions # 1.42 insn per cycle + 0.826948420 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.499542e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.618079e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.618079e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.123823 sec -INFO: No Floating Point Exceptions have been reported - 10,929,932,947 cycles # 3.496 GHz - 34,920,947,871 instructions # 3.19 insn per cycle - 3.126856338 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.566402e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.659710e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.659710e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.199423 sec +INFO: No Floating Point Exceptions have been reported + 12,339,181,649 cycles # 2.934 GHz + 34,922,451,175 instructions # 2.83 insn per cycle + 4.206438418 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 430) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.695427e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.837464e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.837464e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.963065 sec -INFO: No Floating Point Exceptions have been reported - 10,375,088,589 cycles # 3.498 GHz - 23,007,376,294 instructions # 2.22 insn per cycle - 2.965933364 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.977481e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.113067e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.113067e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.637534 sec +INFO: No Floating Point Exceptions have been reported + 10,710,920,290 cycles # 2.939 GHz + 23,032,620,692 instructions # 2.15 insn per cycle + 3.644897421 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2340) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.522081e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.961774e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.961774e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.727688 sec -INFO: No Floating Point Exceptions have been reported - 5,752,613,331 cycles # 3.325 GHz - 11,935,478,883 instructions # 2.07 insn per cycle - 1.730486549 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.909385e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.279214e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.279214e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.252873 sec +INFO: No Floating Point Exceptions have been reported + 6,212,002,997 cycles # 2.749 GHz + 11,978,645,016 instructions # 1.93 insn per cycle + 2.260347594 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.780684e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.254891e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.254891e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.668546 sec -INFO: No Floating Point Exceptions have been reported - 5,557,603,877 cycles # 3.326 GHz - 11,074,611,396 instructions # 1.99 insn per cycle - 1.671321875 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.039032e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.423530e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.423530e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.196002 sec +INFO: No Floating Point Exceptions have been reported + 6,062,556,643 cycles # 2.753 GHz + 11,146,456,018 instructions # 1.84 insn per cycle + 2.203425956 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2103) (512y: 174) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.300012e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.020242e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.020242e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.385171 sec -INFO: No Floating Point Exceptions have been reported - 4,612,980,314 cycles # 3.324 GHz - 8,963,395,665 instructions # 1.94 insn per cycle - 1.388009873 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.871031e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.091641e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.091641e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.826553 sec +INFO: No Floating Point Exceptions have been reported + 5,265,704,314 cycles # 1.859 GHz + 9,046,022,125 instructions # 1.72 insn per cycle + 2.834187629 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1632) (512y: 208) (512z: 1571) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index eea2d8771..e1d11759a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_09:43:29 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:11:52 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.179768e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.708203e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.827426e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.491224 sec +INFO: No Floating Point Exceptions have been reported + 2,056,714,115 cycles # 2.865 GHz + 2,916,773,309 instructions # 1.42 insn per cycle + 0.776029796 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.687672e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.765570e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.765570e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.015623 sec -INFO: No Floating Point Exceptions have been reported - 14,056,200,879 cycles # 3.498 GHz - 45,269,818,111 instructions # 3.22 insn per cycle - 4.018235515 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.918838e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.972832e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.972832e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.552302 sec +INFO: No Floating Point Exceptions have been reported + 16,247,282,670 cycles # 2.924 GHz + 45,328,928,537 instructions # 2.79 insn per cycle + 5.557963082 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288198669441044 Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.017192e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.433292e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.433292e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.855639 sec -INFO: No Floating Point Exceptions have been reported - 6,499,332,107 cycles # 3.498 GHz - 17,757,054,967 instructions # 2.73 insn per cycle - 1.858319166 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.529514e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.866293e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.866293e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.402922 sec +INFO: No Floating Point Exceptions have been reported + 7,055,912,070 cycles # 2.931 GHz + 17,768,218,222 instructions # 2.52 insn per cycle + 2.408607319 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193075684831 Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.141914e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.283715e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.283715e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.026271 sec -INFO: No Floating Point Exceptions have been reported - 3,432,573,962 cycles # 3.337 GHz - 8,233,650,198 instructions # 2.40 insn per cycle - 1.029019263 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.299822e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.410195e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.410195e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.350007 sec +INFO: No Floating Point Exceptions have been reported + 3,747,874,250 cycles # 2.767 GHz + 8,260,976,747 instructions # 2.20 insn per cycle + 1.355686963 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.175210e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.323704e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.323704e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.003176 sec -INFO: No Floating Point Exceptions have been reported - 3,354,038,167 cycles # 3.335 GHz - 7,926,545,145 instructions # 2.36 insn per cycle - 1.005907570 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3263) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.794382e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.005480e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005480e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.278305 sec +INFO: No Floating Point Exceptions have been reported + 3,550,706,297 cycles # 2.767 GHz + 7,915,681,558 instructions # 2.23 insn per cycle + 1.284036639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.377885e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.592890e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.592890e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 0.871268 sec -INFO: No Floating Point Exceptions have been reported - 2,919,726,698 cycles # 3.342 GHz - 6,018,022,045 instructions # 2.06 insn per cycle - 0.873984580 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2252) (512y: 0) (512z: 2155) +EvtsPerSec[Rmb+ME] (23) = ( 6.489307e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.134354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.134354e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.705925 sec +INFO: No Floating Point Exceptions have been reported + 3,272,576,419 cycles # 1.913 GHz + 6,103,138,487 instructions # 1.86 insn per cycle + 1.712010321 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183148950338 Relative difference = 1.5521108056421764e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 33f6ad0c2..4785fec17 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,48 +1,113 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_10:01:12 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:46:39 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.985126e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.401873e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.401873e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.682896 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,622,267,762 cycles # 2.874 GHz + 4,057,326,622 instructions # 1.55 insn per cycle + 0.970912293 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.681724e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.759343e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.759343e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.046094 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.900194e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.953379e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953379e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.681942 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 14,165,255,595 cycles # 3.498 GHz - 45,316,310,337 instructions # 3.20 insn per cycle - 4.049378254 seconds time elapsed + 16,671,667,536 cycles # 2.929 GHz + 45,497,192,820 instructions # 2.73 insn per cycle + 5.692941265 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -50,33 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288198669441044 Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.973168e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.381809e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.381809e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.901091 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.487658e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.829841e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.829841e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.506047 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 6,654,586,526 cycles # 3.495 GHz - 18,040,027,931 instructions # 2.71 insn per cycle - 1.904347949 seconds time elapsed + 7,392,710,973 cycles # 2.938 GHz + 18,162,302,699 instructions # 2.46 insn per cycle + 2.517172183 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -84,33 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193075684831 Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.126146e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.262211e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.262211e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.072323 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.160433e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.262111e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.262111e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.454756 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 3,597,996,290 cycles # 3.344 GHz - 8,472,949,687 instructions # 2.35 insn per cycle - 1.076249874 seconds time elapsed + 4,077,667,192 cycles # 2.783 GHz + 8,611,395,195 instructions # 2.11 insn per cycle + 1.466232607 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -118,33 +183,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.154507e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.297075e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.297075e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.052391 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.644244e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.901065e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.901065e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.382676 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 3,530,366,208 cycles # 3.343 GHz - 8,165,687,935 instructions # 2.31 insn per cycle - 1.056257388 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3263) (512y: 0) (512z: 0) + 3,878,507,500 cycles # 2.784 GHz + 8,265,873,907 instructions # 2.13 insn per cycle + 1.393862906 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -152,33 +217,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.351147e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.558201e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.558201e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 0.919559 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.387022e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.022226e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.022226e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.814510 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 3,090,552,850 cycles # 3.350 GHz - 6,275,653,453 instructions # 2.03 insn per cycle - 0.922791907 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2252) (512y: 0) (512z: 2155) + 3,594,832,163 cycles # 1.970 GHz + 6,462,220,806 instructions # 1.80 insn per cycle + 1.825958297 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -186,8 +251,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183148950338 Relative difference = 1.5521108056421764e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index ee1938c69..ff1a0d1a3 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_10:06:05 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:58:55 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.402978e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.823028e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.958083e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 +TOTAL : 0.595970 sec +INFO: No Floating Point Exceptions have been reported + 2,351,354,548 cycles # 2.876 GHz + 3,444,506,671 instructions # 1.46 insn per cycle + 0.875001238 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.685862e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.764265e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.764265e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.915967e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.970379e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.970379e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.018620 sec +TOTAL : 5.649395 sec INFO: No Floating Point Exceptions have been reported - 14,065,707,057 cycles # 3.498 GHz - 45,270,051,663 instructions # 3.22 insn per cycle - 4.021150985 seconds time elapsed + 16,580,294,518 cycles # 2.931 GHz + 45,471,304,380 instructions # 2.74 insn per cycle + 5.657307639 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288198669441044 Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.074327e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.502922e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.502922e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.539990e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.880360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.880360e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.838711 sec +TOTAL : 2.485589 sec INFO: No Floating Point Exceptions have been reported - 6,436,775,742 cycles # 3.497 GHz - 17,757,014,588 instructions # 2.76 insn per cycle - 1.841187730 seconds time elapsed + 7,332,359,110 cycles # 2.941 GHz + 17,888,678,821 instructions # 2.44 insn per cycle + 2.493593708 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193075684831 Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.141501e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.281232e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.281232e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.267444e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.414148e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.414148e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.026220 sec +TOTAL : 1.445011 sec INFO: No Floating Point Exceptions have been reported - 3,431,593,988 cycles # 3.337 GHz - 8,233,834,239 instructions # 2.40 insn per cycle - 1.028688902 seconds time elapsed + 4,026,136,405 cycles # 2.773 GHz + 8,355,233,205 instructions # 2.08 insn per cycle + 1.452615562 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.173633e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.321271e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.321271e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.729960e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.002290e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002290e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.003274 sec +TOTAL : 1.378766 sec INFO: No Floating Point Exceptions have been reported - 3,356,378,152 cycles # 3.338 GHz - 7,926,839,516 instructions # 2.36 insn per cycle - 1.005729867 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3263) (512y: 0) (512z: 0) + 3,842,420,700 cycles # 2.773 GHz + 7,976,068,469 instructions # 2.08 insn per cycle + 1.386498519 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.378254e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.595151e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.595151e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.464492e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.118413e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.118413e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 0.870727 sec +TOTAL : 1.801449 sec INFO: No Floating Point Exceptions have been reported - 2,917,877,792 cycles # 3.343 GHz - 6,017,978,398 instructions # 2.06 insn per cycle - 0.873214987 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2252) (512y: 0) (512z: 2155) + 3,544,493,993 cycles # 1.960 GHz + 6,155,712,678 instructions # 1.74 insn per cycle + 1.809068044 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183148950338 Relative difference = 1.5521108056421764e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index a26ce1355..d6446e740 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-01_23:52:19 +DATE: 2024-09-15_11:56:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.300334e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.753955e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.878936e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.563506e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.776392e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.890773e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.515714 sec +TOTAL : 0.519623 sec INFO: No Floating Point Exceptions have been reported - 2,131,743,033 cycles # 2.892 GHz - 3,363,258,452 instructions # 1.58 insn per cycle - 0.793874595 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst + 2,142,971,519 cycles # 2.865 GHz + 3,298,601,891 instructions # 1.54 insn per cycle + 0.804804703 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499356247485 Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.955916e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.010652e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.010652e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.921389e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.976206e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.976206e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.445137 sec +TOTAL : 5.559284 sec INFO: No Floating Point Exceptions have been reported - 16,242,922,108 cycles # 2.981 GHz - 45,332,069,683 instructions # 2.79 insn per cycle - 5.450297847 seconds time elapsed + 16,314,564,993 cycles # 2.932 GHz + 45,379,598,894 instructions # 2.78 insn per cycle + 5.566847007 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288198669441044 Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.593016e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.934269e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.934269e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.507753e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.846242e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.846242e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.367261 sec +TOTAL : 2.429783 sec INFO: No Floating Point Exceptions have been reported - 7,054,204,460 cycles # 2.974 GHz - 17,770,139,491 instructions # 2.52 insn per cycle - 2.372459853 seconds time elapsed + 7,122,810,590 cycles # 2.924 GHz + 17,820,548,090 instructions # 2.50 insn per cycle + 2.437485624 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193075684831 Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.502328e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.661035e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.661035e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.248249e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.390772e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.390772e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.317120 sec +TOTAL : 1.373977 sec INFO: No Floating Point Exceptions have been reported - 3,741,592,813 cycles # 2.831 GHz - 8,261,772,173 instructions # 2.21 insn per cycle - 1.322339821 seconds time elapsed + 3,800,696,374 cycles # 2.752 GHz + 8,311,474,483 instructions # 2.19 insn per cycle + 1.381547073 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.964572e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.026851e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.026851e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.515740e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.758099e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.758099e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.253784 sec +TOTAL : 1.334783 sec INFO: No Floating Point Exceptions have been reported - 3,555,905,791 cycles # 2.826 GHz - 7,915,891,992 instructions # 2.23 insn per cycle - 1.259045889 seconds time elapsed + 3,623,427,363 cycles # 2.701 GHz + 7,966,643,374 instructions # 2.20 insn per cycle + 1.342318524 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.676750e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.352514e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.352514e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.461308e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.125295e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.125295e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.655409 sec +TOTAL : 1.726295 sec INFO: No Floating Point Exceptions have been reported - 3,257,600,602 cycles # 1.963 GHz - 6,097,719,225 instructions # 1.87 insn per cycle - 1.660515865 seconds time elapsed + 3,316,188,727 cycles # 1.914 GHz + 6,146,469,798 instructions # 1.85 insn per cycle + 1.733694711 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183148950338 Relative difference = 1.5521108056421764e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 235e36e42..a7d3a3bca 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,46 +1,104 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_10:04:38 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:53:15 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.603732e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.769962e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.882903e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.632677 sec +INFO: No Floating Point Exceptions have been reported + 2,459,588,784 cycles # 2.873 GHz + 3,830,411,115 instructions # 1.56 insn per cycle + 0.912203024 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.684127e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.761978e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.761978e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.021106 sec -INFO: No Floating Point Exceptions have been reported - 14,066,477,920 cycles # 3.498 GHz - 45,268,690,595 instructions # 3.22 insn per cycle - 4.023405206 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.920423e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.975843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.975843e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.564298 sec +INFO: No Floating Point Exceptions have been reported + 16,322,815,770 cycles # 2.930 GHz + 45,379,862,622 instructions # 2.78 insn per cycle + 5.571986399 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288198669441044 Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.071710e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.500544e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.500544e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.839574 sec -INFO: No Floating Point Exceptions have been reported - 6,440,900,212 cycles # 3.497 GHz - 17,756,931,376 instructions # 2.76 insn per cycle - 1.842074691 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.531426e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.870727e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.870727e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.417527 sec +INFO: No Floating Point Exceptions have been reported + 7,119,927,533 cycles # 2.937 GHz + 17,819,194,741 instructions # 2.50 insn per cycle + 2.425172051 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193075684831 Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.122355e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.257259e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.257259e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.041977 sec -INFO: No Floating Point Exceptions have been reported - 3,482,636,990 cycles # 3.335 GHz - 8,233,933,043 instructions # 2.36 insn per cycle - 1.044457638 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.302342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.447707e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.447707e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.366211 sec +INFO: No Floating Point Exceptions have been reported + 3,809,953,710 cycles # 2.774 GHz + 8,311,255,796 instructions # 2.18 insn per cycle + 1.374030854 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +170,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.172679e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.320188e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.320188e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.003898 sec -INFO: No Floating Point Exceptions have been reported - 3,358,001,558 cycles # 3.338 GHz - 7,925,637,916 instructions # 2.36 insn per cycle - 1.006392375 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3263) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.793036e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.010397e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.010397e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.299199 sec +INFO: No Floating Point Exceptions have been reported + 3,629,527,941 cycles # 2.777 GHz + 7,964,563,950 instructions # 2.19 insn per cycle + 1.308355970 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +202,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.375185e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.588554e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.588554e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 0.873448 sec -INFO: No Floating Point Exceptions have been reported - 2,920,336,880 cycles # 3.334 GHz - 6,017,562,336 instructions # 2.06 insn per cycle - 0.876305751 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2252) (512y: 0) (512z: 2155) +EvtsPerSec[Rmb+ME] (23) = ( 6.490800e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.161305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.161305e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.720915 sec +INFO: No Floating Point Exceptions have been reported + 3,334,705,650 cycles # 1.929 GHz + 6,144,839,228 instructions # 1.84 insn per cycle + 1.729518385 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +234,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183148950338 Relative difference = 1.5521108056421764e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index a74a1314f..1a7a19dcf 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_09:43:39 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:12:13 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.207213e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.745203e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.857795e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.487469 sec +INFO: No Floating Point Exceptions have been reported + 2,053,225,612 cycles # 2.869 GHz + 2,963,841,312 instructions # 1.44 insn per cycle + 0.771871582 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.730576e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.811192e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.811192e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.954835 sec -INFO: No Floating Point Exceptions have been reported - 13,844,770,572 cycles # 3.499 GHz - 44,379,740,428 instructions # 3.21 insn per cycle - 3.957529560 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.964254e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.021824e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.021824e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.438732 sec +INFO: No Floating Point Exceptions have been reported + 16,004,474,289 cycles # 2.939 GHz + 44,480,990,455 instructions # 2.78 insn per cycle + 5.447104045 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288198669441044 Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.276981e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.893763e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.893763e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.553626 sec -INFO: No Floating Point Exceptions have been reported - 5,441,485,020 cycles # 3.497 GHz - 17,062,888,093 instructions # 3.14 insn per cycle - 1.556246584 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.270854e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.739248e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.739248e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.093047 sec +INFO: No Floating Point Exceptions have been reported + 6,146,902,561 cycles # 2.927 GHz + 17,124,330,277 instructions # 2.79 insn per cycle + 2.101358753 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2864) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193075684831 Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.558806e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.330596e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.330596e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.332994 sec -INFO: No Floating Point Exceptions have been reported - 4,445,086,763 cycles # 3.329 GHz - 10,197,507,843 instructions # 2.29 insn per cycle - 1.335656964 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.010634e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.590643e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.590643e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.845256 sec +INFO: No Floating Point Exceptions have been reported + 5,092,053,198 cycles # 2.749 GHz + 10,266,716,383 instructions # 2.02 insn per cycle + 1.853336006 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3907) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.597371e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.374546e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.374546e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.329643 sec -INFO: No Floating Point Exceptions have been reported - 4,433,469,454 cycles # 3.328 GHz - 9,932,784,402 instructions # 2.24 insn per cycle - 1.332277414 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.078233e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.679535e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.679535e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.828978 sec +INFO: No Floating Point Exceptions have been reported + 5,049,904,876 cycles # 2.749 GHz + 10,046,122,437 instructions # 1.99 insn per cycle + 1.837563375 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3806) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.599609e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.061123e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.061123e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.202376 sec -INFO: No Floating Point Exceptions have been reported - 4,004,558,751 cycles # 3.324 GHz - 8,385,306,315 instructions # 2.09 insn per cycle - 1.205025535 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.649805e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.982891e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.982891e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.359308 sec +INFO: No Floating Point Exceptions have been reported + 4,441,902,341 cycles # 1.877 GHz + 8,494,262,942 instructions # 1.91 insn per cycle + 2.367884804 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2746) (512y: 4) (512z: 2754) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183148950338 Relative difference = 1.5521108056421764e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index a96c48e64..7bcb20b10 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_09:55:46 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:36:18 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.281805e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.729157e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.845967e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.487987 sec +INFO: No Floating Point Exceptions have been reported + 2,055,281,728 cycles # 2.872 GHz + 2,956,266,509 instructions # 1.44 insn per cycle + 0.772297546 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.290059e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.408490e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.408490e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.302751 sec -INFO: No Floating Point Exceptions have been reported - 11,549,378,598 cycles # 3.494 GHz - 34,633,358,524 instructions # 3.00 insn per cycle - 3.305596204 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.496211e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.588163e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.588163e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.287732 sec +INFO: No Floating Point Exceptions have been reported + 12,584,199,997 cycles # 2.932 GHz + 34,606,962,286 instructions # 2.75 insn per cycle + 4.293417398 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199094356969 Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.007295e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.576024e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.576024e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.609185 sec -INFO: No Floating Point Exceptions have been reported - 5,635,188,283 cycles # 3.497 GHz - 14,856,875,442 instructions # 2.64 insn per cycle - 1.611754747 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.317872e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.783743e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.783743e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.058410 sec +INFO: No Floating Point Exceptions have been reported + 6,058,288,486 cycles # 2.936 GHz + 14,847,536,122 instructions # 2.45 insn per cycle + 2.064093895 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2980) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193414453417 Relative difference = 1.6829758681196702e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.394720e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.032269e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.032269e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.224012 sec -INFO: No Floating Point Exceptions have been reported - 4,084,707,787 cycles # 3.331 GHz - 9,036,794,041 instructions # 2.21 insn per cycle - 1.226594192 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.131335e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.950373e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.950373e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.556955 sec +INFO: No Floating Point Exceptions have been reported + 4,316,973,163 cycles # 2.764 GHz + 9,053,302,579 instructions # 2.10 insn per cycle + 1.562583378 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4460) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181974319741 Relative difference = 9.731379272303266e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.823155e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.083746e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.083746e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.176993 sec -INFO: No Floating Point Exceptions have been reported - 3,929,688,937 cycles # 3.332 GHz - 8,611,370,853 instructions # 2.19 insn per cycle - 1.179525467 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.308917e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.178890e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.178890e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.521257 sec +INFO: No Floating Point Exceptions have been reported + 4,205,210,775 cycles # 2.756 GHz + 8,662,511,141 instructions # 2.06 insn per cycle + 1.526851661 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4225) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181974319741 Relative difference = 9.731379272303266e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.075397e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.202852e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.202852e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.084815 sec -INFO: No Floating Point Exceptions have been reported - 3,622,373,581 cycles # 3.332 GHz - 7,752,858,352 instructions # 2.14 insn per cycle - 1.087451199 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.372555e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.809988e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.809988e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.038768 sec +INFO: No Floating Point Exceptions have been reported + 3,837,026,814 cycles # 1.878 GHz + 7,805,330,859 instructions # 2.03 insn per cycle + 2.044464874 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4273) (512y: 0) (512z: 2558) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183246739209 Relative difference = 1.6003107281264138e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index bf3281c33..69afb6ef9 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_09:55:55 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:36:38 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.211144e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.722563e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.857000e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.487617 sec +INFO: No Floating Point Exceptions have been reported + 2,060,698,152 cycles # 2.879 GHz + 2,912,783,409 instructions # 1.41 insn per cycle + 0.772686787 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.629012e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.772511e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.772511e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.003868 sec -INFO: No Floating Point Exceptions have been reported - 10,508,602,693 cycles # 3.497 GHz - 35,099,441,292 instructions # 3.34 insn per cycle - 3.006336302 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.666171e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.771083e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.771083e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.019199 sec +INFO: No Floating Point Exceptions have been reported + 11,827,953,010 cycles # 2.940 GHz + 35,076,444,454 instructions # 2.97 insn per cycle + 4.024833106 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199094356969 Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.235614e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.843933e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.843933e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.562034 sec -INFO: No Floating Point Exceptions have been reported - 5,470,908,331 cycles # 3.497 GHz - 14,477,502,515 instructions # 2.65 insn per cycle - 1.564699545 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.415928e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.902704e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.902704e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.023749 sec +INFO: No Floating Point Exceptions have been reported + 5,953,573,213 cycles # 2.935 GHz + 14,468,346,196 instructions # 2.43 insn per cycle + 2.029398775 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2559) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193583255634 Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.005032e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.111753e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.111753e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.151412 sec -INFO: No Floating Point Exceptions have been reported - 3,845,608,314 cycles # 3.332 GHz - 8,863,271,515 instructions # 2.30 insn per cycle - 1.154254912 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.382613e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.271116e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.271116e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.507601 sec +INFO: No Floating Point Exceptions have been reported + 4,170,609,924 cycles # 2.758 GHz + 8,881,070,721 instructions # 2.13 insn per cycle + 1.513291878 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3570) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182104704902 Relative difference = 1.0374044905426431e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.894010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.093260e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.093260e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.170379 sec -INFO: No Floating Point Exceptions have been reported - 3,909,061,749 cycles # 3.333 GHz - 8,356,083,971 instructions # 2.14 insn per cycle - 1.173274758 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.441367e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.356283e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.356283e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.495607 sec +INFO: No Floating Point Exceptions have been reported + 4,129,531,699 cycles # 2.752 GHz + 8,406,651,679 instructions # 2.04 insn per cycle + 1.501280641 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3296) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182104704902 Relative difference = 1.0374044905426431e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.092998e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.224310e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.224310e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.069987 sec -INFO: No Floating Point Exceptions have been reported - 3,570,204,329 cycles # 3.329 GHz - 7,645,688,528 instructions # 2.14 insn per cycle - 1.072889742 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.439343e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.891063e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.891063e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.014863 sec +INFO: No Floating Point Exceptions have been reported + 3,794,076,081 cycles # 1.879 GHz + 7,699,347,303 instructions # 2.03 insn per cycle + 2.020593600 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 2110) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183204829693 Relative difference = 1.5796536184903122e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index ab79953e6..5fcfefd8b 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_09:43:50 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:12:35 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.360888e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.282446e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.948861e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.535947 sec +INFO: No Floating Point Exceptions have been reported + 2,206,968,400 cycles # 2.863 GHz + 3,177,366,447 instructions # 1.44 insn per cycle + 0.829346124 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.362750e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.416399e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.416399e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.565810 sec -INFO: No Floating Point Exceptions have been reported - 15,975,939,926 cycles # 3.497 GHz - 46,160,424,236 instructions # 2.89 insn per cycle - 4.568737077 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 618) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.801369e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.847904e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.847904e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.967015 sec +INFO: No Floating Point Exceptions have been reported + 17,524,709,788 cycles # 2.932 GHz + 46,191,860,900 instructions # 2.64 insn per cycle + 5.978935443 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.326288e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.523607e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.523607e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.549267 sec -INFO: No Floating Point Exceptions have been reported - 8,919,543,225 cycles # 3.495 GHz - 27,517,697,576 instructions # 3.09 insn per cycle - 2.552183549 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.140467e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.299172e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.299172e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.487825 sec +INFO: No Floating Point Exceptions have been reported + 10,261,674,067 cycles # 2.934 GHz + 27,722,537,189 instructions # 2.70 insn per cycle + 3.498978005 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.128964e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.659185e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.659185e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.591539 sec -INFO: No Floating Point Exceptions have been reported - 5,303,158,619 cycles # 3.327 GHz - 12,385,058,181 instructions # 2.34 insn per cycle - 1.594459861 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.032137e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.431710e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.431710e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.231910 sec +INFO: No Floating Point Exceptions have been reported + 6,175,976,175 cycles # 2.753 GHz + 12,601,670,185 instructions # 2.04 insn per cycle + 2.244594220 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2774) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.357708e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.920767e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.920767e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.548259 sec -INFO: No Floating Point Exceptions have been reported - 5,158,680,427 cycles # 3.326 GHz - 11,916,875,674 instructions # 2.31 insn per cycle - 1.551227238 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2570) (512y: 126) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.534616e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.015525e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.015525e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.042072 sec +INFO: No Floating Point Exceptions have been reported + 5,669,805,165 cycles # 2.764 GHz + 12,036,562,183 instructions # 2.12 insn per cycle + 2.054181107 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2519) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.191115e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.883409e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.883409e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.401980 sec -INFO: No Floating Point Exceptions have been reported - 4,673,759,870 cycles # 3.328 GHz - 7,934,553,207 instructions # 1.70 insn per cycle - 1.404853021 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1664) (512y: 104) (512z: 1861) +EvtsPerSec[Rmb+ME] (23) = ( 3.587962e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.783361e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.783361e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.070999 sec +INFO: No Floating Point Exceptions have been reported + 5,754,052,644 cycles # 1.867 GHz + 8,225,264,257 instructions # 1.43 insn per cycle + 3.082911381 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1863) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 416a2eb5e..f3ccad174 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_09:44:03 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_11:13:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.403841e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.350229e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.960040e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.532145 sec +INFO: No Floating Point Exceptions have been reported + 2,202,225,096 cycles # 2.860 GHz + 3,140,327,784 instructions # 1.43 insn per cycle + 0.826897706 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.403784e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.459249e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.459249e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.489030 sec -INFO: No Floating Point Exceptions have been reported - 15,712,921,742 cycles # 3.498 GHz - 45,169,333,438 instructions # 2.87 insn per cycle - 4.491856240 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 569) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.849779e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.898891e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.898891e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.812131 sec +INFO: No Floating Point Exceptions have been reported + 17,080,273,912 cycles # 2.934 GHz + 45,215,696,703 instructions # 2.65 insn per cycle + 5.823642020 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.486857e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.698065e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.698065e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.461843 sec -INFO: No Floating Point Exceptions have been reported - 8,618,714,834 cycles # 3.497 GHz - 26,136,812,498 instructions # 3.03 insn per cycle - 2.464680844 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.353337e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.532203e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.532203e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.272970 sec +INFO: No Floating Point Exceptions have been reported + 9,622,203,648 cycles # 2.930 GHz + 26,352,115,115 instructions # 2.74 insn per cycle + 3.284795843 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.340287e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.756748e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.756748e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.774806 sec -INFO: No Floating Point Exceptions have been reported - 5,907,298,061 cycles # 3.323 GHz - 13,915,464,478 instructions # 2.36 insn per cycle - 1.777716707 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.499939e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.814164e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.814164e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.448542 sec +INFO: No Floating Point Exceptions have been reported + 6,760,703,277 cycles # 2.754 GHz + 14,051,302,777 instructions # 2.08 insn per cycle + 2.455916079 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2896) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.541372e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.980671e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.980671e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.724932 sec -INFO: No Floating Point Exceptions have been reported - 5,743,174,831 cycles # 3.324 GHz - 13,416,452,333 instructions # 2.34 insn per cycle - 1.727848614 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2545) (512y: 302) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.725371e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.062901e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.062901e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.336085 sec +INFO: No Floating Point Exceptions have been reported + 6,433,871,158 cycles # 2.746 GHz + 13,544,684,713 instructions # 2.11 insn per cycle + 2.343643276 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2535) (512y: 302) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.400406e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.141039e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.141039e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.370439 sec -INFO: No Floating Point Exceptions have been reported - 4,565,287,298 cycles # 3.325 GHz - 9,073,811,838 instructions # 1.99 insn per cycle - 1.373379843 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1452) (512y: 212) (512z: 2060) +EvtsPerSec[Rmb+ME] (23) = ( 3.546154e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.730746e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.730746e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.074398 sec +INFO: No Floating Point Exceptions have been reported + 5,684,367,008 cycles # 1.845 GHz + 9,231,965,840 instructions # 1.62 insn per cycle + 3.081687192 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2060) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 0986f6067..0fe4cfc92 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_09:44:16 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + +DATE: 2024-09-15_11:13:25 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.646703e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.903323e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.008440e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.472515 sec +INFO: No Floating Point Exceptions have been reported + 1,976,395,629 cycles # 2.864 GHz + 2,853,369,004 instructions # 1.44 insn per cycle + 0.746437756 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.044065e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.229313e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.240372e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.613454 sec +INFO: No Floating Point Exceptions have been reported + 2,457,306,952 cycles # 2.873 GHz + 3,760,458,763 instructions # 1.53 insn per cycle + 0.914581816 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.176105e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.190693e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.190693e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.182832 sec -INFO: No Floating Point Exceptions have been reported - 18,128,600,339 cycles # 3.497 GHz - 59,972,722,547 instructions # 3.31 insn per cycle - 5.184432203 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1200) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.423163e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.435086e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.435086e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.782254 sec +INFO: No Floating Point Exceptions have been reported + 19,933,068,888 cycles # 2.938 GHz + 59,910,639,029 instructions # 3.01 insn per cycle + 6.786428407 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.045335e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.097650e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.097650e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.733343 sec -INFO: No Floating Point Exceptions have been reported - 9,565,199,258 cycles # 3.498 GHz - 30,994,708,884 instructions # 3.24 insn per cycle - 2.734935121 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.574256e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.615519e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.615519e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.601510 sec +INFO: No Floating Point Exceptions have been reported + 10,564,249,920 cycles # 2.931 GHz + 31,083,049,027 instructions # 2.94 insn per cycle + 3.605720194 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.245672e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.266338e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.266338e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.337811 sec -INFO: No Floating Point Exceptions have been reported - 4,424,930,671 cycles # 3.304 GHz - 11,351,171,945 instructions # 2.57 insn per cycle - 1.339405963 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 9.109445e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.271688e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.271688e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.818340 sec +INFO: No Floating Point Exceptions have been reported + 4,995,758,651 cycles # 2.742 GHz + 11,404,411,821 instructions # 2.28 insn per cycle + 1.822613950 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416466 Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.359543e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.384504e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.384504e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.228183 sec -INFO: No Floating Point Exceptions have been reported - 4,063,224,559 cycles # 3.305 GHz - 10,666,694,039 instructions # 2.63 insn per cycle - 1.229791553 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4461) (512y: 57) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.031962e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.052518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.052518e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.607425 sec +INFO: No Floating Point Exceptions have been reported + 4,440,179,427 cycles # 2.756 GHz + 10,663,032,994 instructions # 2.40 insn per cycle + 1.611644858 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 91) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416466 Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.887968e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.936321e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.936321e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.891066 sec -INFO: No Floating Point Exceptions have been reported - 2,948,076,016 cycles # 3.303 GHz - 5,900,042,287 instructions # 2.00 insn per cycle - 0.892682068 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1621) (512y: 63) (512z: 3574) +EvtsPerSec[Rmb+ME] (23) = ( 7.095641e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.193407e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.193407e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.331432 sec +INFO: No Floating Point Exceptions have been reported + 4,128,663,715 cycles # 1.768 GHz + 5,965,561,050 instructions # 1.44 insn per cycle + 2.335809030 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1617) (512y: 95) (512z: 3577) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index d4c3a83ff..555f99fae 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,48 +1,133 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_10:01:23 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + +DATE: 2024-09-15_11:47:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.462205e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.092748e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.092748e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.504368 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,053,925,551 cycles # 2.862 GHz + 3,120,835,610 instructions # 1.52 insn per cycle + 0.775288198 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.695563e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.383097e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.383097e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.834031 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,122,984,951 cycles # 2.884 GHz + 5,028,895,726 instructions # 1.61 insn per cycle + 1.144542739 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.169322e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.183933e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.183933e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.198378 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.420974e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.433351e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.433351e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.797187 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 18,183,225,836 cycles # 3.497 GHz - 59,980,140,750 instructions # 3.30 insn per cycle - 5.200348647 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1200) (avx2: 0) (512y: 0) (512z: 0) + 19,924,578,475 cycles # 2.930 GHz + 59,919,807,490 instructions # 3.01 insn per cycle + 6.801426045 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -50,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.042265e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.094495e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.094495e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.738994 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.519993e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.562146e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.562146e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.653302 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 9,585,610,612 cycles # 3.498 GHz - 31,043,101,993 instructions # 3.24 insn per cycle - 2.740784958 seconds time elapsed + 10,746,732,815 cycles # 2.939 GHz + 31,134,499,346 instructions # 2.90 insn per cycle + 3.657616586 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -84,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.244687e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.265247e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.265247e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.343858 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.065515e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.233179e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.233179e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.835810 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,446,886,614 cycles # 3.305 GHz - 11,403,196,618 instructions # 2.56 insn per cycle - 1.345842705 seconds time elapsed + 5,036,150,788 cycles # 2.739 GHz + 11,457,434,104 instructions # 2.28 insn per cycle + 1.839969686 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -118,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416466 Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.356113e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.380826e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.380826e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.236158 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.012086e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.033024e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.033024e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.647583 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,091,571,017 cycles # 3.305 GHz - 10,718,360,877 instructions # 2.62 insn per cycle - 1.238120523 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4461) (512y: 57) (512z: 0) + 4,499,476,819 cycles # 2.725 GHz + 10,716,818,624 instructions # 2.38 insn per cycle + 1.651828196 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 91) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -152,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416466 Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.884048e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.932845e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.932845e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.897891 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.046506e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.146814e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.146814e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.355729 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,971,968,391 cycles # 3.304 GHz - 5,940,185,208 instructions # 2.00 insn per cycle - 0.899723225 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1621) (512y: 63) (512z: 3574) + 4,171,753,505 cycles # 1.769 GHz + 6,006,835,350 instructions # 1.44 insn per cycle + 2.359914843 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1617) (512y: 95) (512z: 3577) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -186,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index f20d2fd15..df418c0c5 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_09:44:29 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + +DATE: 2024-09-15_11:13:51 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.819441e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.940165e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.036270e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.473171 sec +INFO: No Floating Point Exceptions have been reported + 1,973,905,564 cycles # 2.864 GHz + 2,835,389,936 instructions # 1.44 insn per cycle + 0.747859769 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.045923e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.239053e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.249723e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.613723 sec +INFO: No Floating Point Exceptions have been reported + 2,468,512,324 cycles # 2.879 GHz + 3,722,507,305 instructions # 1.51 insn per cycle + 0.915283019 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.164481e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.179267e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.179267e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.202657 sec -INFO: No Floating Point Exceptions have been reported - 18,191,121,765 cycles # 3.496 GHz - 60,204,990,543 instructions # 3.31 insn per cycle - 5.204370882 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1324) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.416240e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.428350e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.428350e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.802073 sec +INFO: No Floating Point Exceptions have been reported + 19,919,234,926 cycles # 2.929 GHz + 60,126,857,831 instructions # 3.02 insn per cycle + 6.806341598 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1322) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.104456e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.158115e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.158115e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.707057 sec -INFO: No Floating Point Exceptions have been reported - 9,472,985,303 cycles # 3.498 GHz - 30,673,728,145 instructions # 3.24 insn per cycle - 2.708662649 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.628115e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.671006e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.671006e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.560395 sec +INFO: No Floating Point Exceptions have been reported + 10,470,027,689 cycles # 2.938 GHz + 30,685,175,745 instructions # 2.93 insn per cycle + 3.564357324 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5047) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.202483e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.221608e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.221608e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.384905 sec -INFO: No Floating Point Exceptions have been reported - 4,580,312,616 cycles # 3.304 GHz - 11,818,533,932 instructions # 2.58 insn per cycle - 1.386504518 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4744) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.858005e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.013532e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.013532e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.870006 sec +INFO: No Floating Point Exceptions have been reported + 5,129,037,452 cycles # 2.738 GHz + 11,838,972,708 instructions # 2.31 insn per cycle + 1.873874088 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4748) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416466 Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.287316e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.309413e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.309413e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.295581 sec -INFO: No Floating Point Exceptions have been reported - 4,285,536,738 cycles # 3.304 GHz - 11,164,628,593 instructions # 2.61 insn per cycle - 1.297182267 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4435) (512y: 231) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.652883e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.834044e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.834044e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.717936 sec +INFO: No Floating Point Exceptions have been reported + 4,726,163,144 cycles # 2.747 GHz + 11,165,051,323 instructions # 2.36 insn per cycle + 1.721718897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4403) (512y: 245) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416466 Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.882281e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.931934e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.931934e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.893359 sec -INFO: No Floating Point Exceptions have been reported - 2,951,961,781 cycles # 3.299 GHz - 6,173,673,234 instructions # 2.09 insn per cycle - 0.894959757 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1520) (512y: 119) (512z: 3677) +EvtsPerSec[Rmb+ME] (23) = ( 7.029308e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.126499e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.126499e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.353551 sec +INFO: No Floating Point Exceptions have been reported + 4,165,348,623 cycles # 1.768 GHz + 6,220,012,480 instructions # 1.49 insn per cycle + 2.357450464 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1513) (512y: 140) (512z: 3679) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index e86400db6..0344b19ae 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_09:44:42 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + +DATE: 2024-09-15_11:14:17 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.690436e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.002102e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.037933e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.455498 sec +INFO: No Floating Point Exceptions have been reported + 1,928,242,310 cycles # 2.870 GHz + 2,746,045,826 instructions # 1.42 insn per cycle + 0.728814382 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.680469e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.378510e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.424836e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 +TOTAL : 0.509889 sec +INFO: No Floating Point Exceptions have been reported + 2,121,031,452 cycles # 2.862 GHz + 3,036,959,694 instructions # 1.43 insn per cycle + 0.800065199 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412607e+00 +Avg ME (F77/GPU) = 1.4132214305330990 +Relative difference = 0.0004349621183379836 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.293248e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.310036e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.310036e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724761e+02 +- 2.665338e+02 ) GeV^-2 -TOTAL : 4.997970 sec -INFO: No Floating Point Exceptions have been reported - 17,489,434,617 cycles # 3.498 GHz - 59,452,136,671 instructions # 3.40 insn per cycle - 4.999548338 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.504687e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.517708e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.517708e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.560247 sec +INFO: No Floating Point Exceptions have been reported + 19,257,464,373 cycles # 2.934 GHz + 59,612,594,917 instructions # 3.10 insn per cycle + 6.564375492 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412995e+00 Avg ME (F77/C++) = 1.4129949096991936 Relative difference = 6.390737857384068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.079975e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.098214e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.098214e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724754e+02 +- 2.665334e+02 ) GeV^-2 -TOTAL : 1.538660 sec -INFO: No Floating Point Exceptions have been reported - 5,385,436,458 cycles # 3.497 GHz - 17,016,424,433 instructions # 3.16 insn per cycle - 1.540193702 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.084277e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.218488e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.218488e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 2.043801 sec +INFO: No Floating Point Exceptions have been reported + 6,009,096,977 cycles # 2.936 GHz + 17,060,655,087 instructions # 2.84 insn per cycle + 2.047534449 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412995e+00 Avg ME (F77/C++) = 1.4129954647353316 Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.353966e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.440765e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.440765e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 -TOTAL : 0.717145 sec -INFO: No Floating Point Exceptions have been reported - 2,375,230,153 cycles # 3.306 GHz - 6,156,220,680 instructions # 2.59 insn per cycle - 0.718701049 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.741509e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.801920e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.801920e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.958794 sec +INFO: No Floating Point Exceptions have been reported + 2,632,796,186 cycles # 2.737 GHz + 6,187,347,650 instructions # 2.35 insn per cycle + 0.962496439 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413313e+00 Avg ME (F77/C++) = 1.4133132969790267 Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.572662e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.670221e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.670221e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 -TOTAL : 0.658474 sec -INFO: No Floating Point Exceptions have been reported - 2,181,371,307 cycles # 3.306 GHz - 5,785,028,963 instructions # 2.65 insn per cycle - 0.660022688 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4989) (512y: 2) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.912269e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.986419e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.986419e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.875039 sec +INFO: No Floating Point Exceptions have been reported + 2,407,469,182 cycles # 2.742 GHz + 5,790,784,602 instructions # 2.41 insn per cycle + 0.878768885 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4908) (512y: 36) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413313e+00 Avg ME (F77/C++) = 1.4133132969790267 Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.842273e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.068596e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.068596e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 -TOTAL : 0.448095 sec -INFO: No Floating Point Exceptions have been reported - 1,485,276,741 cycles # 3.305 GHz - 3,351,984,330 instructions # 2.26 insn per cycle - 0.449619110 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2248) (512y: 7) (512z: 3788) +EvtsPerSec[Rmb+ME] (23) = ( 1.443174e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.485583e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.485583e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.155478 sec +INFO: No Floating Point Exceptions have been reported + 2,073,615,836 cycles # 1.790 GHz + 3,391,178,624 instructions # 1.64 insn per cycle + 1.159306518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2235) (512y: 39) (512z: 3789) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index d5383a71e..93fdf05be 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,48 +1,133 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_10:01:36 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + +DATE: 2024-09-15_11:47:26 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.480682e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.545762e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.545762e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 +TOTAL : 0.469335 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,971,009,268 cycles # 2.861 GHz + 2,878,621,667 instructions # 1.46 insn per cycle + 0.747088963 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.503732e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.296845e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.296845e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.737499e+02 +- 4.776369e+02 ) GeV^-2 +TOTAL : 0.653804 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,546,264,462 cycles # 2.875 GHz + 3,884,000,523 instructions # 1.53 insn per cycle + 0.944357505 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412607e+00 +Avg ME (F77/GPU) = 1.4132214305330990 +Relative difference = 0.0004349621183379836 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.292310e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.309156e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.309156e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724761e+02 +- 2.665338e+02 ) GeV^-2 -TOTAL : 5.001343 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.503119e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.516211e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.516211e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.568401 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 17,499,985,711 cycles # 3.498 GHz - 59,456,396,239 instructions # 3.40 insn per cycle - 5.002963975 seconds time elapsed + 19,288,292,034 cycles # 2.936 GHz + 59,615,397,281 instructions # 3.09 insn per cycle + 6.572330246 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -50,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412995e+00 Avg ME (F77/C++) = 1.4129949096991936 Relative difference = 6.390737857384068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.078088e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.096255e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.096255e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724754e+02 +- 2.665334e+02 ) GeV^-2 -TOTAL : 1.544376 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.074244e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.211619e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.211619e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 2.051717 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 5,402,665,777 cycles # 3.498 GHz - 17,064,790,447 instructions # 3.16 insn per cycle - 1.545978941 seconds time elapsed + 6,032,564,255 cycles # 2.936 GHz + 17,108,905,426 instructions # 2.84 insn per cycle + 2.055577630 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -84,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412995e+00 Avg ME (F77/C++) = 1.4129954647353316 Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.347762e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.434649e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.434649e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 -TOTAL : 0.721763 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.735664e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.796597e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.796597e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.965943 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,391,070,507 cycles # 3.307 GHz - 6,193,389,053 instructions # 2.59 insn per cycle - 0.723349142 seconds time elapsed + 2,652,269,080 cycles # 2.742 GHz + 6,224,274,753 instructions # 2.35 insn per cycle + 0.969710233 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -118,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413313e+00 Avg ME (F77/C++) = 1.4133132969790267 Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.567590e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.664446e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.664446e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 -TOTAL : 0.662527 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.901208e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.974602e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.974602e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.883566 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,195,465,662 cycles # 3.307 GHz - 5,822,291,849 instructions # 2.65 insn per cycle - 0.664160104 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4989) (512y: 2) (512z: 0) + 2,424,412,243 cycles # 2.734 GHz + 5,827,930,388 instructions # 2.40 insn per cycle + 0.887425140 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4908) (512y: 36) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -152,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413313e+00 Avg ME (F77/C++) = 1.4133132969790267 Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.832041e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.054575e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.054575e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 -TOTAL : 0.452217 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.438672e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.481527e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.481527e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.163548 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,499,538,668 cycles # 3.306 GHz - 3,393,589,764 instructions # 2.26 insn per cycle - 0.453843398 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2248) (512y: 7) (512z: 3788) + 2,093,109,210 cycles # 1.794 GHz + 3,432,132,802 instructions # 1.64 insn per cycle + 1.167531871 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2235) (512y: 39) (512z: 3789) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -186,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index ec0001e97..04fc107fb 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_09:44:52 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + +DATE: 2024-09-15_11:14:38 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.693098e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.040838e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.074373e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.456865 sec +INFO: No Floating Point Exceptions have been reported + 1,922,600,682 cycles # 2.857 GHz + 2,735,771,538 instructions # 1.42 insn per cycle + 0.729676644 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.681790e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.366127e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.406124e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 +TOTAL : 0.509422 sec +INFO: No Floating Point Exceptions have been reported + 2,117,565,229 cycles # 2.869 GHz + 3,056,275,302 instructions # 1.44 insn per cycle + 0.796290434 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412607e+00 +Avg ME (F77/GPU) = 1.4132214305330990 +Relative difference = 0.0004349621183379836 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.275457e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.292273e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.292273e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724761e+02 +- 2.665338e+02 ) GeV^-2 -TOTAL : 5.025015 sec -INFO: No Floating Point Exceptions have been reported - 17,582,826,278 cycles # 3.498 GHz - 59,189,127,533 instructions # 3.37 insn per cycle - 5.026501970 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.491972e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.504565e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.504565e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.593721 sec +INFO: No Floating Point Exceptions have been reported + 19,401,378,848 cycles # 2.941 GHz + 59,351,233,195 instructions # 3.06 insn per cycle + 6.597810534 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412995e+00 Avg ME (F77/C++) = 1.4129949096991936 Relative difference = 6.390737857384068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.145658e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166241e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.166241e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724754e+02 +- 2.665334e+02 ) GeV^-2 -TOTAL : 1.451582 sec -INFO: No Floating Point Exceptions have been reported - 5,082,102,690 cycles # 3.498 GHz - 16,804,361,272 instructions # 3.31 insn per cycle - 1.453134742 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.427450e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.574205e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.574205e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 1.961524 sec +INFO: No Floating Point Exceptions have been reported + 5,763,417,063 cycles # 2.934 GHz + 16,848,552,420 instructions # 2.92 insn per cycle + 1.965663621 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5611) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412995e+00 Avg ME (F77/C++) = 1.4129954647353316 Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.082659e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.150500e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.150500e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 -TOTAL : 0.807721 sec -INFO: No Floating Point Exceptions have been reported - 2,674,049,754 cycles # 3.305 GHz - 6,816,825,477 instructions # 2.55 insn per cycle - 0.809264923 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.513418e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.559668e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.559668e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.100420 sec +INFO: No Floating Point Exceptions have been reported + 3,014,454,268 cycles # 2.733 GHz + 6,847,622,992 instructions # 2.27 insn per cycle + 1.104094178 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5735) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413313e+00 Avg ME (F77/C++) = 1.4133132969790267 Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.235768e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.309118e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.309118e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 -TOTAL : 0.754421 sec -INFO: No Floating Point Exceptions have been reported - 2,497,964,841 cycles # 3.305 GHz - 6,408,222,671 instructions # 2.57 insn per cycle - 0.755960612 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5535) (512y: 8) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.641246e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.695747e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.695747e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.015943 sec +INFO: No Floating Point Exceptions have been reported + 2,793,517,683 cycles # 2.742 GHz + 6,436,907,448 instructions # 2.30 insn per cycle + 1.019630864 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5509) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413313e+00 Avg ME (F77/C++) = 1.4133132969790267 Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.417809e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.598994e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.598994e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 -TOTAL : 0.500988 sec -INFO: No Floating Point Exceptions have been reported - 1,651,536,842 cycles # 3.288 GHz - 3,718,450,916 instructions # 2.25 insn per cycle - 0.502595310 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2477) (512y: 9) (512z: 4083) +EvtsPerSec[Rmb+ME] (23) = ( 1.322600e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.358678e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.358678e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.258576 sec +INFO: No Floating Point Exceptions have been reported + 2,248,626,373 cycles # 1.783 GHz + 3,754,168,834 instructions # 1.67 insn per cycle + 1.262333902 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2466) (512y: 29) (512z: 4084) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index bdd15f711..2641b6a6f 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_09:45:02 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + +DATE: 2024-09-15_11:14:59 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.553984e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.813684e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.925982e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.473727 sec +INFO: No Floating Point Exceptions have been reported + 1,964,125,217 cycles # 2.842 GHz + 2,850,802,933 instructions # 1.45 insn per cycle + 0.747533169 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.039046e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.224514e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235497e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.623745 sec +INFO: No Floating Point Exceptions have been reported + 2,491,113,981 cycles # 2.884 GHz + 3,741,355,868 instructions # 1.50 insn per cycle + 0.924633822 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.119777e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.133912e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.133912e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.276168 sec -INFO: No Floating Point Exceptions have been reported - 18,454,886,881 cycles # 3.497 GHz - 61,037,764,900 instructions # 3.31 insn per cycle - 5.277779580 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1232) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.390069e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.401784e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.401784e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.876043 sec +INFO: No Floating Point Exceptions have been reported + 20,176,006,103 cycles # 2.934 GHz + 60,944,588,650 instructions # 3.02 insn per cycle + 6.880217907 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1220) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.129711e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.183532e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.183532e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.696176 sec -INFO: No Floating Point Exceptions have been reported - 9,432,676,110 cycles # 3.497 GHz - 30,730,516,656 instructions # 3.26 insn per cycle - 2.697739460 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.624106e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.667122e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.667122e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.563355 sec +INFO: No Floating Point Exceptions have been reported + 10,467,283,500 cycles # 2.935 GHz + 30,820,693,493 instructions # 2.94 insn per cycle + 3.567171047 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5351) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.269496e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.290954e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.290954e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.313325 sec -INFO: No Floating Point Exceptions have been reported - 4,344,173,486 cycles # 3.304 GHz - 11,304,069,067 instructions # 2.60 insn per cycle - 1.314963292 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 9.172379e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.336577e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.336577e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.806966 sec +INFO: No Floating Point Exceptions have been reported + 4,954,879,411 cycles # 2.737 GHz + 11,359,422,816 instructions # 2.29 insn per cycle + 1.810872816 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.376481e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.401784e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.401784e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.213294 sec -INFO: No Floating Point Exceptions have been reported - 4,013,940,050 cycles # 3.305 GHz - 10,608,756,715 instructions # 2.64 insn per cycle - 1.214851480 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4584) (512y: 49) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.036379e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.057513e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.057513e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.601666 sec +INFO: No Floating Point Exceptions have been reported + 4,380,983,099 cycles # 2.729 GHz + 10,610,165,712 instructions # 2.42 insn per cycle + 1.605990710 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4503) (512y: 83) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.813383e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.857903e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.857903e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.926499 sec -INFO: No Floating Point Exceptions have been reported - 3,064,996,743 cycles # 3.303 GHz - 6,097,907,097 instructions # 1.99 insn per cycle - 0.928110126 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2144) (512y: 85) (512z: 3650) +EvtsPerSec[Rmb+ME] (23) = ( 6.900446e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.995461e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.995461e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.397174 sec +INFO: No Floating Point Exceptions have been reported + 4,245,323,919 cycles # 1.769 GHz + 6,166,210,089 instructions # 1.45 insn per cycle + 2.401100901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2140) (512y: 117) (512z: 3653) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213786174055 Relative difference = 4.3972324717191576e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 4bffeb54b..0766319c3 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_09:45:15 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + +DATE: 2024-09-15_11:15:25 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.556212e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.906743e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.026143e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.472306 sec +INFO: No Floating Point Exceptions have been reported + 1,982,520,983 cycles # 2.874 GHz + 2,863,074,866 instructions # 1.44 insn per cycle + 0.745845869 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.042838e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.231665e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.242155e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.611235 sec +INFO: No Floating Point Exceptions have been reported + 2,455,829,243 cycles # 2.879 GHz + 3,741,729,771 instructions # 1.52 insn per cycle + 0.912428146 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.107002e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.121091e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.121091e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.297762 sec -INFO: No Floating Point Exceptions have been reported - 18,526,198,916 cycles # 3.496 GHz - 61,241,358,829 instructions # 3.31 insn per cycle - 5.299387508 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.386940e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.398394e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.398394e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.884140 sec +INFO: No Floating Point Exceptions have been reported + 20,272,568,697 cycles # 2.944 GHz + 61,168,730,148 instructions # 3.02 insn per cycle + 6.888274413 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1272) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.185549e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.240488e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.240488e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.671797 sec -INFO: No Floating Point Exceptions have been reported - 9,341,966,829 cycles # 3.495 GHz - 30,411,366,752 instructions # 3.26 insn per cycle - 2.673416045 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.669440e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.713215e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.713215e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.529000 sec +INFO: No Floating Point Exceptions have been reported + 10,335,535,502 cycles # 2.926 GHz + 30,533,410,675 instructions # 2.95 insn per cycle + 3.532867905 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5155) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.203500e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.223007e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.223007e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.383803 sec -INFO: No Floating Point Exceptions have been reported - 4,576,792,877 cycles # 3.304 GHz - 11,797,737,496 instructions # 2.58 insn per cycle - 1.385400236 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4882) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.803371e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.957146e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.957146e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.880842 sec +INFO: No Floating Point Exceptions have been reported + 5,141,108,977 cycles # 2.729 GHz + 11,871,626,607 instructions # 2.31 insn per cycle + 1.885060685 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4887) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.310306e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.333310e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.333310e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.273487 sec -INFO: No Floating Point Exceptions have been reported - 4,212,858,476 cycles # 3.305 GHz - 11,109,227,402 instructions # 2.64 insn per cycle - 1.275107436 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4539) (512y: 223) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.734351e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.920231e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.920231e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.703697 sec +INFO: No Floating Point Exceptions have been reported + 4,677,605,202 cycles # 2.740 GHz + 11,166,557,237 instructions # 2.39 insn per cycle + 1.707597039 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4508) (512y: 238) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.819719e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.865874e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.865874e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.923241 sec -INFO: No Floating Point Exceptions have been reported - 3,052,013,926 cycles # 3.301 GHz - 6,339,586,789 instructions # 2.08 insn per cycle - 0.924905864 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2043) (512y: 142) (512z: 3729) +EvtsPerSec[Rmb+ME] (23) = ( 6.863155e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.956338e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.956338e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.409053 sec +INFO: No Floating Point Exceptions have been reported + 4,255,960,621 cycles # 1.764 GHz + 6,404,237,522 instructions # 1.50 insn per cycle + 2.413297760 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2036) (512y: 163) (512z: 3731) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213786174055 Relative difference = 4.3972324717191576e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 21d6c7dce..5f3726dce 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_09:45:28 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:15:51 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.308012e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.334511e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.336215e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.533591 sec +INFO: No Floating Point Exceptions have been reported + 2,205,805,938 cycles # 2.869 GHz + 3,444,884,387 instructions # 1.56 insn per cycle + 0.825307966 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.131073e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.161174e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.162405e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.048991 sec +INFO: No Floating Point Exceptions have been reported + 9,688,028,273 cycles # 2.924 GHz + 22,036,541,373 instructions # 2.27 insn per cycle + 3.369850770 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.465053e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.466253e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.466253e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.659075 sec -INFO: No Floating Point Exceptions have been reported - 23,299,884,551 cycles # 3.498 GHz - 78,642,385,482 instructions # 3.38 insn per cycle - 6.660602471 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4797) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.879946e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.880868e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.880868e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.730075 sec +INFO: No Floating Point Exceptions have been reported + 25,643,153,835 cycles # 2.937 GHz + 78,954,437,611 instructions # 3.08 insn per cycle + 8.734432118 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.769046e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.773489e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.773489e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.444836 sec -INFO: No Floating Point Exceptions have been reported - 11,831,605,720 cycles # 3.433 GHz - 39,496,739,081 instructions # 3.34 insn per cycle - 3.446501016 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.520374e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.523613e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.523613e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.665332 sec +INFO: No Floating Point Exceptions have been reported + 13,099,128,105 cycles # 2.806 GHz + 39,559,591,481 instructions # 3.02 insn per cycle + 4.669271517 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.139317e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.141681e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.141681e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.445283 sec -INFO: No Floating Point Exceptions have been reported - 4,774,019,115 cycles # 3.300 GHz - 13,790,655,410 instructions # 2.89 insn per cycle - 1.446939987 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.059011e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.075081e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.075081e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.042413 sec +INFO: No Floating Point Exceptions have been reported + 5,610,747,752 cycles # 2.743 GHz + 13,824,504,616 instructions # 2.46 insn per cycle + 2.046398223 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.259556e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.262487e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.262487e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.307858 sec -INFO: No Floating Point Exceptions have been reported - 4,320,303,980 cycles # 3.300 GHz - 12,485,707,896 instructions # 2.89 insn per cycle - 1.309459083 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10522) (512y: 54) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.162703e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.184308e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.184308e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.796726 sec +INFO: No Floating Point Exceptions have been reported + 4,922,237,700 cycles # 2.735 GHz + 12,506,994,545 instructions # 2.54 insn per cycle + 1.800589813 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.023817e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.031195e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.031195e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.816038 sec -INFO: No Floating Point Exceptions have been reported - 2,695,610,967 cycles # 3.298 GHz - 6,362,986,982 instructions # 2.36 insn per cycle - 0.817602729 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1979) (512y: 70) (512z: 9383) +EvtsPerSec[Rmb+ME] (23) = ( 6.982168e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.994583e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.994583e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.355452 sec +INFO: No Floating Point Exceptions have been reported + 4,140,123,386 cycles # 1.756 GHz + 6,390,153,387 instructions # 1.54 insn per cycle + 2.359734916 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 297847c7a..1d93db579 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,48 +1,133 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_10:01:54 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:48:14 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.969430e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.268357e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.268357e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.523555 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,166,841,175 cycles # 2.867 GHz + 3,453,451,458 instructions # 1.59 insn per cycle + 0.814918597 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.613032e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.091578e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.091578e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.317499 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,422,723,136 cycles # 2.898 GHz + 15,879,167,379 instructions # 1.52 insn per cycle + 3.658545225 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.464746e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.465946e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.465946e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.662758 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.878765e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.879684e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.879684e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.741583 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 23,308,473,808 cycles # 3.498 GHz - 78,648,193,378 instructions # 3.37 insn per cycle - 6.664584876 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4797) (avx2: 0) (512y: 0) (512z: 0) + 25,666,397,830 cycles # 2.935 GHz + 78,965,262,045 instructions # 3.08 insn per cycle + 8.745862119 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -50,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.752036e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.756386e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.756386e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.459774 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.560977e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.564368e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.564368e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.617447 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 11,847,973,146 cycles # 3.423 GHz - 39,509,949,648 instructions # 3.33 insn per cycle - 3.461623879 seconds time elapsed + 13,088,026,122 cycles # 2.833 GHz + 39,572,731,788 instructions # 3.02 insn per cycle + 4.621932955 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -84,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.139708e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.142087e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.142087e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.447619 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.016507e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.032941e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.032941e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.058059 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,782,552,418 cycles # 3.300 GHz - 13,801,648,841 instructions # 2.89 insn per cycle - 1.449490065 seconds time elapsed + 5,631,279,447 cycles # 2.732 GHz + 13,836,775,240 instructions # 2.46 insn per cycle + 2.062638485 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -118,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.259264e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.262201e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.262201e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.310952 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.172752e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.195878e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.195878e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.800276 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,331,363,318 cycles # 3.300 GHz - 12,497,184,763 instructions # 2.89 insn per cycle - 1.312808274 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10522) (512y: 54) (512z: 0) + 4,940,734,767 cycles # 2.740 GHz + 12,518,660,568 instructions # 2.53 insn per cycle + 1.804734715 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -152,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.027874e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.035505e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.035505e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.817538 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.912888e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.925297e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.925297e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.384873 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,702,894,611 cycles # 3.300 GHz - 6,374,639,178 instructions # 2.36 insn per cycle - 0.819421724 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1979) (512y: 70) (512z: 9383) + 4,161,817,801 cycles # 1.743 GHz + 6,405,054,448 instructions # 1.54 insn per cycle + 2.389410885 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -186,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index d015a553c..fc2e4b7aa 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_10:06:16 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:59:16 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.322702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.346002e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.347615e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.517480 sec +INFO: No Floating Point Exceptions have been reported + 2,154,192,085 cycles # 2.875 GHz + 3,384,532,263 instructions # 1.57 insn per cycle + 0.808566781 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.137866e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.167359e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168584e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 +TOTAL : 3.158061 sec +INFO: No Floating Point Exceptions have been reported + 9,917,250,541 cycles # 2.905 GHz + 22,199,780,027 instructions # 2.24 insn per cycle + 3.469925481 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.465671e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.466884e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.466884e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.876744e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.877706e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.877706e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.657336 sec +TOTAL : 8.747062 sec INFO: No Floating Point Exceptions have been reported - 23,293,161,430 cycles # 3.498 GHz - 78,642,692,912 instructions # 3.38 insn per cycle - 6.658860761 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4797) (avx2: 0) (512y: 0) (512z: 0) + 25,648,612,426 cycles # 2.931 GHz + 78,952,780,288 instructions # 3.08 insn per cycle + 8.750949596 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.756541e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.760949e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.760949e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.516490e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.519795e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.519795e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.453749 sec +TOTAL : 4.672214 sec INFO: No Floating Point Exceptions have been reported - 11,838,734,353 cycles # 3.427 GHz - 39,496,408,028 instructions # 3.34 insn per cycle - 3.455286382 seconds time elapsed + 13,064,181,413 cycles # 2.795 GHz + 39,557,975,845 instructions # 3.03 insn per cycle + 4.676162125 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.137233e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.139588e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.139588e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.035027e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.051957e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.051957e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.447832 sec +TOTAL : 2.050050 sec INFO: No Floating Point Exceptions have been reported - 4,782,007,929 cycles # 3.300 GHz - 13,790,489,272 instructions # 2.88 insn per cycle - 1.449389026 seconds time elapsed + 5,620,854,320 cycles # 2.738 GHz + 13,824,518,317 instructions # 2.46 insn per cycle + 2.054209584 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.258232e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.261078e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.261078e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.147098e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.168017e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.168017e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.309034 sec +TOTAL : 1.801905 sec INFO: No Floating Point Exceptions have been reported - 4,324,118,060 cycles # 3.300 GHz - 12,485,861,077 instructions # 2.89 insn per cycle - 1.310602447 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10522) (512y: 54) (512z: 0) + 4,929,552,945 cycles # 2.731 GHz + 12,503,971,607 instructions # 2.54 insn per cycle + 1.805808510 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.029599e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.037110e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.037110e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.940588e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.953237e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.953237e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.813862 sec +TOTAL : 2.372033 sec INFO: No Floating Point Exceptions have been reported - 2,688,511,126 cycles # 3.298 GHz - 6,362,944,374 instructions # 2.37 insn per cycle - 0.815409532 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1979) (512y: 70) (512z: 9383) + 4,149,036,914 cycles # 1.747 GHz + 6,390,952,192 instructions # 1.54 insn per cycle + 2.375889237 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index d0233b939..989861023 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-01_23:52:39 +DATE: 2024-09-15_11:56:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.318416e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.342529e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.344516e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.313175e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.336829e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.338592e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.514493 sec +TOTAL : 0.516520 sec INFO: No Floating Point Exceptions have been reported - 2,165,328,295 cycles # 2.912 GHz - 3,440,104,971 instructions # 1.59 insn per cycle - 0.805805306 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst + 2,145,509,423 cycles # 2.867 GHz + 3,354,996,461 instructions # 1.56 insn per cycle + 0.808722776 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.134192e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.166205e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.167551e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.143109e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.171681e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.172874e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.076661 sec +TOTAL : 3.078670 sec INFO: No Floating Point Exceptions have been reported - 9,913,932,835 cycles # 2.974 GHz - 22,782,867,940 instructions # 2.30 insn per cycle - 3.391164141 seconds time elapsed + 9,691,265,584 cycles # 2.906 GHz + 21,333,506,403 instructions # 2.20 insn per cycle + 3.390410721 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.920273e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.921234e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.921234e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.881675e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.882585e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.882585e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.546594 sec +TOTAL : 8.722848 sec INFO: No Floating Point Exceptions have been reported - 25,646,311,324 cycles # 3.000 GHz - 78,960,017,472 instructions # 3.08 insn per cycle - 8.550676592 seconds time elapsed + 25,651,425,779 cycles # 2.940 GHz + 78,952,412,711 instructions # 3.08 insn per cycle + 8.726726338 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.594154e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.597374e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.597374e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.517566e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.520849e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.520849e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.568989 sec +TOTAL : 4.669337 sec INFO: No Floating Point Exceptions have been reported - 13,102,878,898 cycles # 2.866 GHz - 39,561,274,098 instructions # 3.02 insn per cycle - 4.573103208 seconds time elapsed + 13,090,805,200 cycles # 2.802 GHz + 39,558,366,602 instructions # 3.02 insn per cycle + 4.673224740 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.221553e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.238357e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.238357e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.039895e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.055822e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.055822e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.000959 sec +TOTAL : 2.046582 sec INFO: No Floating Point Exceptions have been reported - 5,606,842,172 cycles # 2.797 GHz - 13,823,630,038 instructions # 2.47 insn per cycle - 2.005081475 seconds time elapsed + 5,609,737,694 cycles # 2.737 GHz + 13,824,245,582 instructions # 2.46 insn per cycle + 2.050512234 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.323867e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.345458e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.345458e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.131967e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.152226e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.152226e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.765274 sec +TOTAL : 1.803136 sec INFO: No Floating Point Exceptions have been reported - 4,923,007,337 cycles # 2.783 GHz - 12,505,051,193 instructions # 2.54 insn per cycle - 1.769470001 seconds time elapsed + 4,925,220,417 cycles # 2.727 GHz + 12,506,075,504 instructions # 2.54 insn per cycle + 1.807026447 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.173442e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.186199e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.186199e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.955213e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.967580e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.967580e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.292842 sec +TOTAL : 2.365271 sec INFO: No Floating Point Exceptions have been reported - 4,144,135,456 cycles # 1.805 GHz - 6,391,162,760 instructions # 1.54 insn per cycle - 2.297102234 seconds time elapsed + 4,141,642,997 cycles # 1.749 GHz + 6,391,703,659 instructions # 1.54 insn per cycle + 2.369209251 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index a47c65287..794fb1a80 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,46 +1,120 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_10:04:48 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:53:35 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.043181e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.332865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.334802e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.523228 sec +INFO: No Floating Point Exceptions have been reported + 2,169,607,107 cycles # 2.877 GHz + 3,459,237,306 instructions # 1.59 insn per cycle + 0.815202021 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.724697e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.162122e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.163349e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.220306 sec +INFO: No Floating Point Exceptions have been reported + 10,133,189,238 cycles # 2.914 GHz + 23,148,993,968 instructions # 2.28 insn per cycle + 3.534065636 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.464503e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.465714e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.465714e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.660592 sec -INFO: No Floating Point Exceptions have been reported - 23,306,006,795 cycles # 3.498 GHz - 78,642,067,829 instructions # 3.37 insn per cycle - 6.662110674 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4797) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.879192e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.880098e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.880098e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.734016 sec +INFO: No Floating Point Exceptions have been reported + 25,661,507,280 cycles # 2.937 GHz + 78,953,590,713 instructions # 3.08 insn per cycle + 8.737772518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +122,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.761142e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.765537e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.765537e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.450474 sec -INFO: No Floating Point Exceptions have been reported - 11,835,868,804 cycles # 3.429 GHz - 39,496,299,465 instructions # 3.34 insn per cycle - 3.451992916 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.527947e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.531121e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.531121e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.656259 sec +INFO: No Floating Point Exceptions have been reported + 13,056,695,540 cycles # 2.803 GHz + 39,560,471,761 instructions # 3.03 insn per cycle + 4.660116201 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +154,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.136083e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.138411e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.138411e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.449111 sec -INFO: No Floating Point Exceptions have been reported - 4,786,209,252 cycles # 3.300 GHz - 13,790,510,134 instructions # 2.88 insn per cycle - 1.450672865 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.991321e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.007389e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.007389e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.059547 sec +INFO: No Floating Point Exceptions have been reported + 5,612,692,411 cycles # 2.721 GHz + 13,825,461,651 instructions # 2.46 insn per cycle + 2.063599543 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +186,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.258644e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.261589e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.261589e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.308910 sec -INFO: No Floating Point Exceptions have been reported - 4,324,078,858 cycles # 3.300 GHz - 12,486,022,461 instructions # 2.89 insn per cycle - 1.310607706 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10522) (512y: 54) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.168393e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.189788e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.189788e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.795888 sec +INFO: No Floating Point Exceptions have been reported + 4,923,088,043 cycles # 2.737 GHz + 12,506,721,234 instructions # 2.54 insn per cycle + 1.799844991 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +218,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.026430e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.034088e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.034088e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.815274 sec -INFO: No Floating Point Exceptions have been reported - 2,691,640,541 cycles # 3.296 GHz - 6,362,935,343 instructions # 2.36 insn per cycle - 0.816870795 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1979) (512y: 70) (512z: 9383) +EvtsPerSec[Rmb+ME] (23) = ( 6.949785e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.962226e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.962226e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.367179 sec +INFO: No Floating Point Exceptions have been reported + 4,144,948,568 cycles # 1.750 GHz + 6,391,796,529 instructions # 1.54 insn per cycle + 2.371216811 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +250,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 3c8e91186..80da0089a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_09:45:44 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:16:25 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.297695e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.326014e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328135e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.536243 sec +INFO: No Floating Point Exceptions have been reported + 2,198,395,650 cycles # 2.855 GHz + 3,395,768,128 instructions # 1.54 insn per cycle + 0.828484590 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.139738e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.170223e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.171507e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.043962 sec +INFO: No Floating Point Exceptions have been reported + 9,646,114,898 cycles # 2.920 GHz + 22,170,499,370 instructions # 2.30 insn per cycle + 3.361578134 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.468255e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.469457e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.469457e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.650394 sec -INFO: No Floating Point Exceptions have been reported - 23,265,834,425 cycles # 3.498 GHz - 78,347,653,067 instructions # 3.37 insn per cycle - 6.651960491 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4182) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.884927e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.885834e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.885834e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.707889 sec +INFO: No Floating Point Exceptions have been reported + 25,619,332,595 cycles # 2.941 GHz + 78,702,929,908 instructions # 3.07 insn per cycle + 8.712420077 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4191) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.696007e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.700269e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.700269e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.498010 sec -INFO: No Floating Point Exceptions have been reported - 11,880,569,986 cycles # 3.395 GHz - 39,416,349,299 instructions # 3.32 insn per cycle - 3.499573834 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.574307e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.577560e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.577560e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.595188 sec +INFO: No Floating Point Exceptions have been reported + 13,048,399,086 cycles # 2.838 GHz + 39,450,691,251 instructions # 3.02 insn per cycle + 4.599210719 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:12966) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.129765e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.132081e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.132081e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.457285 sec -INFO: No Floating Point Exceptions have been reported - 4,813,323,364 cycles # 3.300 GHz - 13,891,393,777 instructions # 2.89 insn per cycle - 1.458851975 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.930739e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.946422e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.946422e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.074898 sec +INFO: No Floating Point Exceptions have been reported + 5,675,338,380 cycles # 2.732 GHz + 13,910,840,784 instructions # 2.45 insn per cycle + 2.079006346 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11582) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.250186e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.253072e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.253072e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.317541 sec -INFO: No Floating Point Exceptions have been reported - 4,352,407,850 cycles # 3.300 GHz - 12,589,781,459 instructions # 2.89 insn per cycle - 1.319169667 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10455) (512y: 226) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.062486e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.083766e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.083766e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.816561 sec +INFO: No Floating Point Exceptions have been reported + 4,996,440,015 cycles # 2.746 GHz + 12,603,390,155 instructions # 2.52 insn per cycle + 1.820566072 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10423) (512y: 240) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.978134e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.985413e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.985413e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.834752 sec -INFO: No Floating Point Exceptions have been reported - 2,758,782,558 cycles # 3.300 GHz - 6,482,278,503 instructions # 2.35 insn per cycle - 0.836512880 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1762) (512y: 184) (512z: 9381) +EvtsPerSec[Rmb+ME] (23) = ( 6.965367e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.977715e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.977715e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.361480 sec +INFO: No Floating Point Exceptions have been reported + 4,159,091,159 cycles # 1.759 GHz + 6,499,576,244 instructions # 1.56 insn per cycle + 2.365402468 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1751) (512y: 194) (512z: 9382) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 98fd9dcca..a149b91e1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_09:56:05 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:36:57 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.106076e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.131267e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.133080e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.544695 sec +INFO: No Floating Point Exceptions have been reported + 2,219,428,600 cycles # 2.858 GHz + 3,493,527,234 instructions # 1.57 insn per cycle + 0.834499500 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.753348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.778591e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.779655e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.312449 sec +INFO: No Floating Point Exceptions have been reported + 10,421,899,087 cycles # 2.913 GHz + 24,058,421,553 instructions # 2.31 insn per cycle + 3.636091022 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.048075e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.048588e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.048588e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 32.494476 sec -INFO: No Floating Point Exceptions have been reported - 113,653,044,619 cycles # 3.498 GHz - 143,704,552,718 instructions # 1.26 insn per cycle - 32.496203155 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21084) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.268976e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.269450e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.269450e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 38.423779 sec +INFO: No Floating Point Exceptions have been reported + 112,730,268,623 cycles # 2.934 GHz + 144,772,135,406 instructions # 1.28 insn per cycle + 38.427951659 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21273) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140461E-004 Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.953356e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.956370e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.956370e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.154353 sec -INFO: No Floating Point Exceptions have been reported - 14,540,328,731 cycles # 3.499 GHz - 37,642,996,337 instructions # 2.59 insn per cycle - 4.155904157 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.077363e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.079742e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.079742e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.335511 sec +INFO: No Floating Point Exceptions have been reported + 14,752,370,812 cycles # 2.763 GHz + 37,645,694,563 instructions # 2.55 insn per cycle + 5.339828429 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:68253) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141209E-004 Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.047895e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.063171e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.063171e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.818407 sec -INFO: No Floating Point Exceptions have been reported - 6,005,265,391 cycles # 3.300 GHz - 13,056,790,638 instructions # 2.17 insn per cycle - 1.820066485 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.332306e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.345602e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.345602e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.243210 sec +INFO: No Floating Point Exceptions have been reported + 6,130,370,628 cycles # 2.729 GHz + 13,060,931,234 instructions # 2.13 insn per cycle + 2.247428561 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46973) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.092529e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.094710e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.094710e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.506865 sec -INFO: No Floating Point Exceptions have been reported - 4,977,192,625 cycles # 3.300 GHz - 11,448,900,146 instructions # 2.30 insn per cycle - 1.508524432 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.812729e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.832105e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.832105e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.867280 sec +INFO: No Floating Point Exceptions have been reported + 5,063,580,201 cycles # 2.707 GHz + 11,453,397,200 instructions # 2.26 insn per cycle + 1.871531437 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40498) (512y: 285) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.995153e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002506e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.002506e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.827931 sec -INFO: No Floating Point Exceptions have been reported - 2,736,296,917 cycles # 3.299 GHz - 5,918,147,091 instructions # 2.16 insn per cycle - 0.829549967 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.266726e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.280024e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.280024e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.263638 sec +INFO: No Floating Point Exceptions have been reported + 3,957,788,966 cycles # 1.746 GHz + 5,926,468,977 instructions # 1.50 insn per cycle + 2.267826067 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2444) (512y: 337) (512z:39349) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index b34d539e3..c0add05aa 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_09:56:52 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:38:07 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.096121e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.123086e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.125058e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.541972 sec +INFO: No Floating Point Exceptions have been reported + 2,219,963,696 cycles # 2.870 GHz + 3,470,909,979 instructions # 1.56 insn per cycle + 0.830712751 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.756387e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.782287e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.783295e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.298693 sec +INFO: No Floating Point Exceptions have been reported + 10,399,316,447 cycles # 2.922 GHz + 23,584,057,660 instructions # 2.27 insn per cycle + 3.614068267 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.984137e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.984631e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.984631e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 32.910187 sec -INFO: No Floating Point Exceptions have been reported - 115,155,606,195 cycles # 3.499 GHz - 143,402,302,108 instructions # 1.25 insn per cycle - 32.911764368 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20963) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.224460e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.224899e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.224899e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 38.827526 sec +INFO: No Floating Point Exceptions have been reported + 113,783,414,735 cycles # 2.930 GHz + 144,278,309,276 instructions # 1.27 insn per cycle + 38.831628591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21024) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140450E-004 Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.826417e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.829262e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.829262e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.291936 sec -INFO: No Floating Point Exceptions have been reported - 15,016,274,847 cycles # 3.498 GHz - 38,385,253,005 instructions # 2.56 insn per cycle - 4.293716194 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.989108e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.991357e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.991357e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.492626 sec +INFO: No Floating Point Exceptions have been reported + 15,275,599,565 cycles # 2.780 GHz + 38,389,599,156 instructions # 2.51 insn per cycle + 5.496788286 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:69643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141209E-004 Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.176150e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.191499e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.191499e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.792998 sec -INFO: No Floating Point Exceptions have been reported - 5,921,591,055 cycles # 3.300 GHz - 12,930,869,971 instructions # 2.18 insn per cycle - 1.794707914 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.497881e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.512338e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.512338e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.193501 sec +INFO: No Floating Point Exceptions have been reported + 6,019,122,923 cycles # 2.740 GHz + 12,933,620,431 instructions # 2.15 insn per cycle + 2.197765722 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46099) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.090485e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092676e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.092676e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.509778 sec -INFO: No Floating Point Exceptions have been reported - 4,986,930,940 cycles # 3.300 GHz - 11,445,003,426 instructions # 2.29 insn per cycle - 1.511414211 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.839318e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.859960e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.859960e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.861821 sec +INFO: No Floating Point Exceptions have been reported + 5,093,783,286 cycles # 2.731 GHz + 11,449,481,812 instructions # 2.25 insn per cycle + 1.866150033 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40142) (512y: 219) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.012034e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.019539e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.019539e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.821008 sec -INFO: No Floating Point Exceptions have been reported - 2,712,507,668 cycles # 3.298 GHz - 5,880,187,742 instructions # 2.17 insn per cycle - 0.822686293 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.279822e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.293417e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.293417e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.259516 sec +INFO: No Floating Point Exceptions have been reported + 3,958,337,222 cycles # 1.750 GHz + 5,889,113,860 instructions # 1.49 insn per cycle + 2.263750575 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1959) (512y: 259) (512z:38927) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 919a21da1..f53bdfcb0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_09:46:01 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:16:59 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.467249e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.509285e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.513718e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.494218 sec +INFO: No Floating Point Exceptions have been reported + 2,034,103,432 cycles # 2.860 GHz + 3,045,186,386 instructions # 1.50 insn per cycle + 0.768364063 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.128844e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.190571e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.193248e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.796967 sec +INFO: No Floating Point Exceptions have been reported + 5,948,917,067 cycles # 2.924 GHz + 12,254,957,631 instructions # 2.06 insn per cycle + 2.089755272 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.512655e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.513875e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.513875e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.532482 sec -INFO: No Floating Point Exceptions have been reported - 22,860,482,449 cycles # 3.499 GHz - 78,985,585,208 instructions # 3.46 insn per cycle - 6.533948384 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3561) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.939424e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.940380e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.940380e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.461205 sec +INFO: No Floating Point Exceptions have been reported + 24,939,277,475 cycles # 2.947 GHz + 79,109,068,255 instructions # 3.17 insn per cycle + 8.465315543 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863312764526E-004 Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.762719e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.781386e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.781386e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.684772 sec -INFO: No Floating Point Exceptions have been reported - 5,759,833,339 cycles # 3.416 GHz - 20,238,655,308 instructions # 3.51 insn per cycle - 1.686293363 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.989306e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.001573e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.001573e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.351266 sec +INFO: No Floating Point Exceptions have been reported + 6,525,064,847 cycles # 2.771 GHz + 20,269,487,959 instructions # 3.11 insn per cycle + 2.355049106 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861442972011E-004 Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.237421e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.246938e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.246938e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.737975 sec -INFO: No Floating Point Exceptions have been reported - 2,439,486,220 cycles # 3.300 GHz - 7,048,203,209 instructions # 2.89 insn per cycle - 0.739507351 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.582613e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.589051e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.589051e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.041194 sec +INFO: No Floating Point Exceptions have been reported + 2,848,829,047 cycles # 2.729 GHz + 7,065,493,216 instructions # 2.48 insn per cycle + 1.044894531 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.474918e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.486713e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.486713e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.667580 sec -INFO: No Floating Point Exceptions have been reported - 2,207,250,583 cycles # 3.300 GHz - 6,392,330,973 instructions # 2.90 insn per cycle - 0.669133820 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11102) (512y: 9) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.794003e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.802231e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.802231e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.919231 sec +INFO: No Floating Point Exceptions have been reported + 2,522,001,135 cycles # 2.735 GHz + 6,403,495,458 instructions # 2.54 insn per cycle + 0.923373159 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.085970e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.118812e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.118812e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.406404 sec -INFO: No Floating Point Exceptions have been reported - 1,344,198,516 cycles # 3.297 GHz - 3,288,394,043 instructions # 2.45 insn per cycle - 0.407897071 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2614) (512y: 14) (512z: 9604) +EvtsPerSec[Rmb+ME] (23) = ( 1.403418e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.408437e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.408437e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.173408 sec +INFO: No Floating Point Exceptions have been reported + 2,065,585,282 cycles # 1.756 GHz + 3,303,212,083 instructions # 1.60 insn per cycle + 1.177101647 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779718007E-004 Relative difference = 4.194411063934945e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 83bf4e8c3..99ccf0b7c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,48 +1,133 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_10:02:11 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:48:48 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.945945e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.468849e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.468849e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.482042 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,997,062,353 cycles # 2.864 GHz + 3,031,546,242 instructions # 1.52 insn per cycle + 0.755384112 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.954589e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.017029e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.017029e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 +TOTAL : 2.150287 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,911,046,669 cycles # 2.894 GHz + 9,720,301,924 instructions # 1.41 insn per cycle + 2.447262326 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.512044e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.513265e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.513265e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.535650 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.933142e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.934088e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934088e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.491300 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 22,862,717,969 cycles # 3.497 GHz - 78,993,207,841 instructions # 3.46 insn per cycle - 6.537275629 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3561) (avx2: 0) (512y: 0) (512z: 0) + 24,914,436,852 cycles # 2.933 GHz + 79,112,976,787 instructions # 3.18 insn per cycle + 8.495346137 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -50,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863312764526E-004 Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.737318e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.756480e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.756480e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.691320 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.985457e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.998623e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.998623e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.355396 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 5,777,092,719 cycles # 3.413 GHz - 20,248,332,384 instructions # 3.50 insn per cycle - 1.693109247 seconds time elapsed + 6,536,388,186 cycles # 2.771 GHz + 20,278,657,318 instructions # 3.10 insn per cycle + 2.359340287 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -84,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861442972011E-004 Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.236428e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.246126e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.246126e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.740378 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.588469e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.595113e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.595113e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.040170 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,448,353,610 cycles # 3.300 GHz - 7,058,951,025 instructions # 2.88 insn per cycle - 0.742207753 seconds time elapsed + 2,854,653,705 cycles # 2.736 GHz + 7,075,192,119 instructions # 2.48 insn per cycle + 1.044116961 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -118,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.471483e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.483310e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.483310e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.670431 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.764583e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.772721e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.772721e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.937436 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,217,354,725 cycles # 3.300 GHz - 6,402,966,975 instructions # 2.89 insn per cycle - 0.672207322 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11102) (512y: 9) (512z: 0) + 2,529,004,808 cycles # 2.688 GHz + 6,413,196,189 instructions # 2.54 insn per cycle + 0.941494819 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -152,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.082078e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.115476e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.115476e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.408693 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.393518e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.398724e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.398724e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.184860 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,352,310,626 cycles # 3.297 GHz - 3,299,059,875 instructions # 2.44 insn per cycle - 0.410356180 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2614) (512y: 14) (512z: 9604) + 2,077,752,907 cycles # 1.749 GHz + 3,313,647,639 instructions # 1.59 insn per cycle + 1.188846310 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -186,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779718007E-004 Relative difference = 4.194411063934945e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index dca4c5b67..19f64c3e7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_10:06:32 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:59:51 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.517186e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.553165e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.556664e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.159396e-01 +- 3.238803e-01 ) GeV^-4 +TOTAL : 0.477606 sec +INFO: No Floating Point Exceptions have been reported + 1,989,569,741 cycles # 2.873 GHz + 3,005,042,417 instructions # 1.51 insn per cycle + 0.749585148 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.132207e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.190283e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.192869e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 +TOTAL : 1.892599 sec +INFO: No Floating Point Exceptions have been reported + 6,180,255,032 cycles # 2.913 GHz + 13,158,154,431 instructions # 2.13 insn per cycle + 2.179693271 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.511767e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.512989e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.512989e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.919400e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.920348e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.920348e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.534633 sec +TOTAL : 8.550896 sec INFO: No Floating Point Exceptions have been reported - 22,867,029,327 cycles # 3.499 GHz - 78,985,833,115 instructions # 3.45 insn per cycle - 6.536139059 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3561) (avx2: 0) (512y: 0) (512z: 0) + 24,917,761,266 cycles # 2.921 GHz + 79,107,928,249 instructions # 3.17 insn per cycle + 8.554412617 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863312764526E-004 Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.745714e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.764626e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.764626e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.947395e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.959971e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.959971e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.687701 sec +TOTAL : 2.367170 sec INFO: No Floating Point Exceptions have been reported - 5,761,015,668 cycles # 3.411 GHz - 20,238,735,092 instructions # 3.51 insn per cycle - 1.689187889 seconds time elapsed + 6,537,639,637 cycles # 2.759 GHz + 20,270,199,231 instructions # 3.10 insn per cycle + 2.370801699 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861442972011E-004 Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.232457e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.242111e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.242111e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.589719e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.596352e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.596352e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.739716 sec +TOTAL : 1.038242 sec INFO: No Floating Point Exceptions have been reported - 2,445,369,349 cycles # 3.300 GHz - 7,048,476,388 instructions # 2.88 insn per cycle - 0.741256560 seconds time elapsed + 2,854,773,942 cycles # 2.742 GHz + 7,065,309,093 instructions # 2.47 insn per cycle + 1.041774606 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.474727e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.486653e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.486653e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.793297e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.801693e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.801693e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.667614 sec +TOTAL : 0.921472 sec INFO: No Floating Point Exceptions have been reported - 2,207,341,643 cycles # 3.300 GHz - 6,392,315,226 instructions # 2.90 insn per cycle - 0.669166078 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11102) (512y: 9) (512z: 0) + 2,523,779,273 cycles # 2.730 GHz + 6,401,399,707 instructions # 2.54 insn per cycle + 0.925110369 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.038275e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.070569e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.070569e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.398357e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.403401e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.403401e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.411204 sec +TOTAL : 1.179617 sec INFO: No Floating Point Exceptions have been reported - 1,359,373,229 cycles # 3.295 GHz - 3,288,786,353 instructions # 2.42 insn per cycle - 0.412808829 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2614) (512y: 14) (512z: 9604) + 2,071,965,297 cycles # 1.751 GHz + 3,301,502,867 instructions # 1.59 insn per cycle + 1.184374263 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779718007E-004 Relative difference = 4.194411063934945e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 5594b3833..71166778f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-01_23:53:13 +DATE: 2024-09-15_11:56:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.515702e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.552924e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.556552e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.530906e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.566446e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.569964e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.474495 sec +TOTAL : 0.476038 sec INFO: No Floating Point Exceptions have been reported - 2,011,973,393 cycles # 2.920 GHz - 3,012,287,196 instructions # 1.50 insn per cycle - 0.746226316 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst + 1,985,362,661 cycles # 2.871 GHz + 2,993,731,363 instructions # 1.51 insn per cycle + 0.748126319 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.157102e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.219957e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.222675e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.141583e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.198514e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.201079e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.819017 sec +TOTAL : 1.826652 sec INFO: No Floating Point Exceptions have been reported - 6,066,507,433 cycles # 2.960 GHz - 12,322,393,310 instructions # 2.03 insn per cycle - 2.106524554 seconds time elapsed + 5,952,227,239 cycles # 2.897 GHz + 12,995,171,990 instructions # 2.18 insn per cycle + 2.112711437 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262667672387088E-004 Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.960123e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.961113e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.961113e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.933291e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.934237e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934237e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.372365 sec +TOTAL : 8.487946 sec INFO: No Floating Point Exceptions have been reported - 24,913,204,556 cycles # 2.975 GHz - 79,109,275,563 instructions # 3.18 insn per cycle - 8.376463077 seconds time elapsed + 24,924,562,928 cycles # 2.936 GHz + 79,109,269,886 instructions # 3.17 insn per cycle + 8.491506444 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863312764526E-004 Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.100093e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.113139e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.113139e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.966222e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.978443e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.978443e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.314710 sec +TOTAL : 2.359225 sec INFO: No Floating Point Exceptions have been reported - 6,539,620,343 cycles # 2.821 GHz - 20,270,856,622 instructions # 3.10 insn per cycle - 2.318690633 seconds time elapsed + 6,526,176,570 cycles # 2.763 GHz + 20,269,541,830 instructions # 3.11 insn per cycle + 2.362842375 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861442972011E-004 Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.634403e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.641056e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.641056e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.543496e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.549592e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.549592e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.008306 sec +TOTAL : 1.067538 sec INFO: No Floating Point Exceptions have been reported - 2,848,061,854 cycles # 2.816 GHz - 7,066,418,028 instructions # 2.48 insn per cycle - 1.012267305 seconds time elapsed + 2,860,867,267 cycles # 2.672 GHz + 7,065,461,760 instructions # 2.47 insn per cycle + 1.071393478 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.832427e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.840853e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.840853e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.800483e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.808998e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.808998e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.899982 sec +TOTAL : 0.915759 sec INFO: No Floating Point Exceptions have been reported - 2,519,286,089 cycles # 2.789 GHz - 6,403,779,811 instructions # 2.54 insn per cycle - 0.904006492 seconds time elapsed + 2,516,887,683 cycles # 2.739 GHz + 6,403,177,488 instructions # 2.54 insn per cycle + 0.919482910 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.443265e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.448590e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.448590e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.400235e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.405247e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.405247e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.141808 sec +TOTAL : 1.176437 sec INFO: No Floating Point Exceptions have been reported - 2,068,467,128 cycles # 1.806 GHz - 3,304,001,462 instructions # 1.60 insn per cycle - 1.145999074 seconds time elapsed + 2,065,991,158 cycles # 1.752 GHz + 3,303,729,120 instructions # 1.60 insn per cycle + 1.180108788 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779718007E-004 Relative difference = 4.194411063934945e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 2dc0c996c..dd6ac1052 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,46 +1,120 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_10:05:05 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:54:10 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.065242e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.543001e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.546426e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.481262 sec +INFO: No Floating Point Exceptions have been reported + 1,996,897,335 cycles # 2.872 GHz + 2,970,634,149 instructions # 1.49 insn per cycle + 0.754034663 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.141287e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.200996e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.203446e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 +TOTAL : 1.902964 sec +INFO: No Floating Point Exceptions have been reported + 6,190,673,302 cycles # 2.901 GHz + 13,306,269,630 instructions # 2.15 insn per cycle + 2.189368892 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.513640e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.514864e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.514864e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.529792 sec -INFO: No Floating Point Exceptions have been reported - 22,849,373,868 cycles # 3.499 GHz - 78,985,408,162 instructions # 3.46 insn per cycle - 6.531265597 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3561) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.933487e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.934407e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934407e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.487231 sec +INFO: No Floating Point Exceptions have been reported + 24,900,546,223 cycles # 2.933 GHz + 79,107,234,987 instructions # 3.18 insn per cycle + 8.490956598 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +122,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863312764526E-004 Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.752139e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.771077e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.771077e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.686672 sec -INFO: No Floating Point Exceptions have been reported - 5,761,349,777 cycles # 3.413 GHz - 20,238,467,626 instructions # 3.51 insn per cycle - 1.688178249 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.983704e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.996217e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.996217e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.353085 sec +INFO: No Floating Point Exceptions have been reported + 6,541,995,614 cycles # 2.777 GHz + 20,269,407,860 instructions # 3.10 insn per cycle + 2.356873297 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +154,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861442972011E-004 Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.233553e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.243099e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.243099e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.739249 sec -INFO: No Floating Point Exceptions have been reported - 2,443,650,863 cycles # 3.300 GHz - 7,048,299,452 instructions # 2.88 insn per cycle - 0.740763767 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.585450e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.591953e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.591953e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.039372 sec +INFO: No Floating Point Exceptions have been reported + 2,850,375,088 cycles # 2.735 GHz + 7,065,899,998 instructions # 2.48 insn per cycle + 1.043028953 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +186,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.472731e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.484316e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.484316e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.668008 sec -INFO: No Floating Point Exceptions have been reported - 2,208,359,276 cycles # 3.300 GHz - 6,392,392,175 instructions # 2.89 insn per cycle - 0.669463341 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11102) (512y: 9) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.792341e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.800787e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.800787e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.920003 sec +INFO: No Floating Point Exceptions have been reported + 2,517,551,147 cycles # 2.728 GHz + 6,403,207,803 instructions # 2.54 insn per cycle + 0.923687532 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +218,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.084680e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.117372e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.117372e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.406439 sec -INFO: No Floating Point Exceptions have been reported - 1,344,653,168 cycles # 3.298 GHz - 3,288,234,799 instructions # 2.45 insn per cycle - 0.407951695 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2614) (512y: 14) (512z: 9604) +EvtsPerSec[Rmb+ME] (23) = ( 1.402198e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.407285e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.407285e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.174725 sec +INFO: No Floating Point Exceptions have been reported + 2,068,104,176 cycles # 1.756 GHz + 3,303,725,822 instructions # 1.60 insn per cycle + 1.178407380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +250,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779718007E-004 Relative difference = 4.194411063934945e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 8f3d395ab..0807d31ee 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_09:46:14 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:17:26 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.482391e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.527725e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.532031e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.493760 sec +INFO: No Floating Point Exceptions have been reported + 2,047,228,800 cycles # 2.877 GHz + 3,039,242,832 instructions # 1.48 insn per cycle + 0.768472979 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.102852e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.164567e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.167207e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.802499 sec +INFO: No Floating Point Exceptions have been reported + 5,908,213,594 cycles # 2.909 GHz + 12,456,477,911 instructions # 2.11 insn per cycle + 2.093284072 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.515093e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.516321e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.516321e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.526025 sec -INFO: No Floating Point Exceptions have been reported - 22,833,447,394 cycles # 3.498 GHz - 78,622,176,997 instructions # 3.44 insn per cycle - 6.527542853 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3098) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.936218e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.937180e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.937180e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.475402 sec +INFO: No Floating Point Exceptions have been reported + 24,949,332,764 cycles # 2.943 GHz + 78,839,555,653 instructions # 3.16 insn per cycle + 8.479529977 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863368736420E-004 -Relative difference = 5.082977799514712e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274866250177339E-004 +Relative difference = 5.65798569465384e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.778368e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.797633e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.797633e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.682111 sec -INFO: No Floating Point Exceptions have been reported - 5,702,239,859 cycles # 3.387 GHz - 20,198,052,558 instructions # 3.54 insn per cycle - 1.683644751 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.122699e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.135567e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.135567e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.306947 sec +INFO: No Floating Point Exceptions have been reported + 6,466,639,499 cycles # 2.800 GHz + 20,230,851,658 instructions # 3.13 insn per cycle + 2.310638118 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13491) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861448331612E-004 Relative difference = 2.1853408865157068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.127729e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.136586e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.136586e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.775730 sec -INFO: No Floating Point Exceptions have been reported - 2,563,969,579 cycles # 3.300 GHz - 7,189,168,651 instructions # 2.80 insn per cycle - 0.777233198 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.507818e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.513887e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.513887e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.092466 sec +INFO: No Floating Point Exceptions have been reported + 2,980,915,950 cycles # 2.722 GHz + 7,206,628,057 instructions # 2.42 insn per cycle + 1.096222389 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12437) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271939668088170E-004 Relative difference = 5.008331292535666e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.414813e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.426060e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.426060e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.683994 sec -INFO: No Floating Point Exceptions have been reported - 2,261,449,111 cycles # 3.300 GHz - 6,528,854,948 instructions # 2.89 insn per cycle - 0.685544430 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11475) (512y: 12) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.724603e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.732183e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.732183e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.955874 sec +INFO: No Floating Point Exceptions have been reported + 2,613,667,112 cycles # 2.726 GHz + 6,544,516,026 instructions # 2.50 insn per cycle + 0.959652526 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11449) (512y: 26) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271939668088170E-004 Relative difference = 5.008331292535666e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.860635e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.889842e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.889842e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.429627 sec -INFO: No Floating Point Exceptions have been reported - 1,420,694,502 cycles # 3.297 GHz - 3,447,208,375 instructions # 2.43 insn per cycle - 0.431155320 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3047) (512y: 17) (512z: 9676) +EvtsPerSec[Rmb+ME] (23) = ( 1.352025e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.356715e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.356715e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.218129 sec +INFO: No Floating Point Exceptions have been reported + 2,137,040,914 cycles # 1.750 GHz + 3,460,849,319 instructions # 1.62 insn per cycle + 1.221974093 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3037) (512y: 25) (512z: 9677) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952032316561E-004 Relative difference = 3.066631594207157e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index a80f54652..507a64eed 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_09:57:40 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:39:16 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.567838e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.605874e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.609566e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.496412 sec +INFO: No Floating Point Exceptions have been reported + 2,051,587,227 cycles # 2.873 GHz + 3,025,794,403 instructions # 1.47 insn per cycle + 0.774558823 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.651284e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.721094e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.724249e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.731680 sec +INFO: No Floating Point Exceptions have been reported + 5,770,677,421 cycles # 2.916 GHz + 12,010,283,008 instructions # 2.08 insn per cycle + 2.035197700 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262669162351490E-004 +Relative difference = 2.8232862531213374e-05 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 6.170616e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.171350e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.171350e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204932e-01 +- 3.252405e-01 ) GeV^-4 -TOTAL : 26.583153 sec -INFO: No Floating Point Exceptions have been reported - 93,017,346,685 cycles # 3.499 GHz - 135,023,854,992 instructions # 1.45 insn per cycle - 26.584764454 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15773) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.459828e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.460600e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.460600e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 30.042280 sec +INFO: No Floating Point Exceptions have been reported + 86,122,252,676 cycles # 2.867 GHz + 135,657,307,138 instructions # 1.58 insn per cycle + 30.046456599 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275349723273619E-004 -Relative difference = 4.17540435263525e-09 +Avg ME (F77/C++) = 6.6275349717465765E-004 +Relative difference = 4.26303654465793e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.669008e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.683943e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.683943e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 1.896842 sec -INFO: No Floating Point Exceptions have been reported - 6,519,022,274 cycles # 3.434 GHz - 19,353,339,936 instructions # 2.97 insn per cycle - 1.898382175 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.672428e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.686393e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.686393e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.462988 sec +INFO: No Floating Point Exceptions have been reported + 6,758,193,786 cycles # 2.742 GHz + 19,357,772,182 instructions # 2.86 insn per cycle + 2.467248153 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:69591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274862748188362E-004 Relative difference = 4.14665283800746e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.744578e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.750377e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.750377e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 0.944987 sec -INFO: No Floating Point Exceptions have been reported - 3,122,659,079 cycles # 3.300 GHz - 6,789,191,696 instructions # 2.17 insn per cycle - 0.946523171 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.362305e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.367046e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.367046e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.208685 sec +INFO: No Floating Point Exceptions have been reported + 3,166,621,827 cycles # 2.612 GHz + 6,792,444,940 instructions # 2.15 insn per cycle + 1.212802697 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49012) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731568543797E-004 Relative difference = 2.3668012430631962e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.115633e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.124241e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.124241e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 0.780134 sec -INFO: No Floating Point Exceptions have been reported - 2,578,805,187 cycles # 3.300 GHz - 5,965,783,279 instructions # 2.31 insn per cycle - 0.781702457 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.652877e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.659885e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.659885e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 0.997146 sec +INFO: No Floating Point Exceptions have been reported + 2,625,468,482 cycles # 2.624 GHz + 5,970,509,824 instructions # 2.27 insn per cycle + 1.001249505 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42601) (512y: 11) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731568543797E-004 Relative difference = 2.3668012430631962e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.307029e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.328463e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.328463e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211848e-01 +- 3.254639e-01 ) GeV^-4 -TOTAL : 0.500996 sec -INFO: No Floating Point Exceptions have been reported - 1,657,050,717 cycles # 3.299 GHz - 3,487,640,447 instructions # 2.10 insn per cycle - 0.502578510 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.322992e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.327409e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.327409e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.244528 sec +INFO: No Floating Point Exceptions have been reported + 2,076,691,772 cycles # 1.664 GHz + 3,494,505,327 instructions # 1.68 insn per cycle + 1.248709350 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5207) (512y: 3) (512z:44836) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627275e-04 Avg ME (F77/C++) = 6.6272750237027223E-004 Relative difference = 3.5765412974815996e-09 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index 3b9af81f3..c027e7477 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_09:58:16 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:40:08 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.562972e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.598287e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.601913e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.492421 sec +INFO: No Floating Point Exceptions have been reported + 2,010,223,902 cycles # 2.826 GHz + 3,031,193,233 instructions # 1.51 insn per cycle + 0.770287796 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.689601e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.749985e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.752940e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.732807 sec +INFO: No Floating Point Exceptions have been reported + 5,640,346,322 cycles # 2.872 GHz + 11,210,275,869 instructions # 1.99 insn per cycle + 2.022037581 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262669162351490E-004 +Relative difference = 2.8232862531213374e-05 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 6.203317e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.204061e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.204061e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204930e-01 +- 3.252404e-01 ) GeV^-4 -TOTAL : 26.443224 sec -INFO: No Floating Point Exceptions have been reported - 92,519,847,982 cycles # 3.499 GHz - 134,744,771,667 instructions # 1.46 insn per cycle - 26.444811458 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15373) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.446094e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.446837e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.446837e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 30.118401 sec +INFO: No Floating Point Exceptions have been reported + 86,113,084,692 cycles # 2.859 GHz + 135,363,065,912 instructions # 1.57 insn per cycle + 30.122446956 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15471) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275349622942764E-004 -Relative difference = 5.689253087276038e-09 +Avg ME (F77/C++) = 6.6275349662128086E-004 +Relative difference = 5.098002770919431e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.606517e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.621032e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.621032e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 1.910551 sec -INFO: No Floating Point Exceptions have been reported - 6,637,809,135 cycles # 3.472 GHz - 19,403,865,942 instructions # 2.92 insn per cycle - 1.912114625 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.516652e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.527742e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.527742e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.521798 sec +INFO: No Floating Point Exceptions have been reported + 6,856,870,344 cycles # 2.715 GHz + 19,407,796,379 instructions # 2.83 insn per cycle + 2.529187527 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:69622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274862799683282E-004 Relative difference = 4.2243518621014775e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.788132e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.794321e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.794321e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 0.922283 sec -INFO: No Floating Point Exceptions have been reported - 3,047,690,654 cycles # 3.300 GHz - 6,712,636,595 instructions # 2.20 insn per cycle - 0.923793261 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.378784e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.383778e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.383778e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.194647 sec +INFO: No Floating Point Exceptions have been reported + 3,106,911,149 cycles # 2.593 GHz + 6,716,375,817 instructions # 2.16 insn per cycle + 1.199018593 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47699) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731623419345E-004 Relative difference = 2.449603850635964e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.124464e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.133213e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.133213e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 0.776857 sec -INFO: No Floating Point Exceptions have been reported - 2,567,788,851 cycles # 3.300 GHz - 5,964,970,238 instructions # 2.32 insn per cycle - 0.778380309 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.633831e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.642301e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.642301e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.009236 sec +INFO: No Floating Point Exceptions have been reported + 2,628,290,758 cycles # 2.601 GHz + 5,969,462,739 instructions # 2.27 insn per cycle + 1.017917591 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41882) (512y: 13) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731623419345E-004 Relative difference = 2.449603850635964e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.323499e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.345164e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.345164e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211848e-01 +- 3.254639e-01 ) GeV^-4 -TOTAL : 0.498438 sec -INFO: No Floating Point Exceptions have been reported - 1,647,448,512 cycles # 3.297 GHz - 3,481,202,486 instructions # 2.11 insn per cycle - 0.499935143 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.325974e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.330533e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.330533e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.242092 sec +INFO: No Floating Point Exceptions have been reported + 2,077,381,824 cycles # 1.674 GHz + 3,490,865,426 instructions # 1.68 insn per cycle + 1.248861709 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4171) (512y: 4) (512z:44487) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627275e-04 Avg ME (F77/C++) = 6.6272750247886592E-004 Relative difference = 3.740400032174438e-09 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index e4a054af7..f1d40dff2 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_09:46:26 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:17:52 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.307071e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.337162e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.339154e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.532508 sec +INFO: No Floating Point Exceptions have been reported + 2,203,526,312 cycles # 2.869 GHz + 3,467,986,959 instructions # 1.57 insn per cycle + 0.824379177 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.133497e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.164330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.165560e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.045403 sec +INFO: No Floating Point Exceptions have been reported + 9,651,769,916 cycles # 2.918 GHz + 21,560,396,285 instructions # 2.23 insn per cycle + 3.363407224 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266732376103494E-004 +Relative difference = 2.659538381540814e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.444450e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.445637e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.445637e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.715248 sec -INFO: No Floating Point Exceptions have been reported - 23,498,836,888 cycles # 3.499 GHz - 79,171,623,626 instructions # 3.37 insn per cycle - 6.716804251 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.856718e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.857596e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.857596e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.839571 sec +INFO: No Floating Point Exceptions have been reported + 25,916,613,183 cycles # 2.931 GHz + 79,423,792,934 instructions # 3.06 insn per cycle + 8.843857471 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4775) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.601473e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.605588e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.605588e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.569996 sec -INFO: No Floating Point Exceptions have been reported - 11,785,251,522 cycles # 3.300 GHz - 38,761,314,630 instructions # 3.29 insn per cycle - 3.571582202 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.495399e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.498545e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.498545e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.699126 sec +INFO: No Floating Point Exceptions have been reported + 12,847,395,150 cycles # 2.733 GHz + 38,826,102,030 instructions # 3.02 insn per cycle + 4.703180057 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13173) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.143663e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.146030e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.146030e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.439685 sec -INFO: No Floating Point Exceptions have been reported - 4,755,365,686 cycles # 3.300 GHz - 13,583,733,392 instructions # 2.86 insn per cycle - 1.441292966 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.037182e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.053225e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.053225e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.047520 sec +INFO: No Floating Point Exceptions have been reported + 5,598,661,180 cycles # 2.730 GHz + 13,618,631,873 instructions # 2.43 insn per cycle + 2.051512013 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11427) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.261108e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.263943e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.263943e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.306175 sec -INFO: No Floating Point Exceptions have been reported - 4,314,752,098 cycles # 3.300 GHz - 12,275,546,438 instructions # 2.85 insn per cycle - 1.307774762 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10414) (512y: 45) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.221665e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.243251e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.243251e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.785642 sec +INFO: No Floating Point Exceptions have been reported + 4,865,374,839 cycles # 2.720 GHz + 12,297,660,832 instructions # 2.53 insn per cycle + 1.789585857 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10331) (512y: 79) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.018676e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.026206e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.026206e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.818387 sec -INFO: No Floating Point Exceptions have been reported - 2,702,253,621 cycles # 3.296 GHz - 6,343,839,690 instructions # 2.35 insn per cycle - 0.820037467 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1984) (512y: 61) (512z: 9357) +EvtsPerSec[Rmb+ME] (23) = ( 6.872514e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.884400e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.884400e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.393483 sec +INFO: No Floating Point Exceptions have been reported + 4,171,721,525 cycles # 1.741 GHz + 6,391,185,056 instructions # 1.53 insn per cycle + 2.397568985 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1980) (512y: 93) (512z: 9360) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index 527ee7e45..ae3635632 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_09:46:43 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +DATE: 2024-09-15_11:18:27 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.320961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.350573e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.352533e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.532217 sec +INFO: No Floating Point Exceptions have been reported + 2,207,479,579 cycles # 2.873 GHz + 3,464,148,832 instructions # 1.57 insn per cycle + 0.824297603 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.148990e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.180422e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.181658e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.035739 sec +INFO: No Floating Point Exceptions have been reported + 9,612,803,881 cycles # 2.915 GHz + 20,074,302,744 instructions # 2.09 insn per cycle + 3.353532451 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266732376103494E-004 +Relative difference = 2.659538381540814e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.449348e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.450537e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.450537e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.701778 sec -INFO: No Floating Point Exceptions have been reported - 23,434,282,774 cycles # 3.496 GHz - 79,093,647,040 instructions # 3.38 insn per cycle - 6.703384022 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4405) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.831511e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.832364e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.832364e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.961804 sec +INFO: No Floating Point Exceptions have been reported + 26,010,493,082 cycles # 2.902 GHz + 79,449,384,960 instructions # 3.05 insn per cycle + 8.965752302 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.625119e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.629278e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.629278e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.551577 sec -INFO: No Floating Point Exceptions have been reported - 11,724,585,334 cycles # 3.300 GHz - 38,692,545,578 instructions # 3.30 insn per cycle - 3.553215708 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.477024e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.480127e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.480127e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.723753 sec +INFO: No Floating Point Exceptions have been reported + 12,826,084,303 cycles # 2.714 GHz + 38,778,289,694 instructions # 3.02 insn per cycle + 4.727826379 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:12935) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.112524e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.114765e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.114765e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.479825 sec -INFO: No Floating Point Exceptions have been reported - 4,887,835,354 cycles # 3.300 GHz - 13,686,260,573 instructions # 2.80 insn per cycle - 1.481432698 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.051767e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.067821e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.067821e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.043694 sec +INFO: No Floating Point Exceptions have been reported + 5,591,778,218 cycles # 2.733 GHz + 13,733,552,430 instructions # 2.46 insn per cycle + 2.047665232 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11510) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.251706e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.254569e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.254569e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.315853 sec -INFO: No Floating Point Exceptions have been reported - 4,346,676,063 cycles # 3.300 GHz - 12,379,946,329 instructions # 2.85 insn per cycle - 1.317434066 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10354) (512y: 225) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.123898e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.144357e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.144357e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.804403 sec +INFO: No Floating Point Exceptions have been reported + 4,951,573,094 cycles # 2.739 GHz + 12,422,632,916 instructions # 2.51 insn per cycle + 1.808331695 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10322) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.010143e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.017615e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.017615e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.821679 sec -INFO: No Floating Point Exceptions have been reported - 2,714,943,480 cycles # 3.299 GHz - 6,444,390,104 instructions # 2.37 insn per cycle - 0.823289944 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1814) (512y: 181) (512z: 9368) +EvtsPerSec[Rmb+ME] (23) = ( 6.884699e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.896731e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.896731e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.389141 sec +INFO: No Floating Point Exceptions have been reported + 4,181,828,175 cycles # 1.750 GHz + 6,496,177,989 instructions # 1.55 insn per cycle + 2.393377398 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1803) (512y: 191) (512z: 9369) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index ba6072372..407fbbe6c 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,28 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_09:47:25 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:20:21 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.059284e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.059685e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.059813e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.451621 sec +INFO: No Floating Point Exceptions have been reported + 8,080,887,114 cycles # 2.907 GHz + 16,734,437,330 instructions # 2.07 insn per cycle + 2.836211679 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.254596e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.256737e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.256950e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.020216 sec +INFO: No Floating Point Exceptions have been reported + 12,719,492,672 cycles # 2.923 GHz + 29,448,097,640 instructions # 2.32 insn per cycle + 4.407436029 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.965506e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.965758e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.965758e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.299329 sec -INFO: No Floating Point Exceptions have been reported - 18,545,449,831 cycles # 3.499 GHz - 53,609,315,061 instructions # 2.89 insn per cycle - 5.300821963 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32499) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.610103e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.610301e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.610301e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.935255 sec +INFO: No Floating Point Exceptions have been reported + 18,974,774,871 cycles # 2.735 GHz + 53,899,721,094 instructions # 2.84 insn per cycle + 6.939338261 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -48,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.886496e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.886583e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.886583e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.799849 sec -INFO: No Floating Point Exceptions have been reported - 9,799,044,340 cycles # 3.498 GHz - 27,127,500,270 instructions # 2.77 insn per cycle - 2.801431333 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.579226e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.579318e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.579318e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.345304 sec +INFO: No Floating Point Exceptions have been reported + 9,800,813,517 cycles # 2.927 GHz + 27,149,189,789 instructions # 2.77 insn per cycle + 3.349514409 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -80,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.222815e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.223248e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.223248e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.250803 sec -INFO: No Floating Point Exceptions have been reported - 4,131,816,531 cycles # 3.300 GHz - 9,576,703,211 instructions # 2.32 insn per cycle - 1.252319892 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.366336e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.366803e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.366803e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.570292 sec +INFO: No Floating Point Exceptions have been reported + 4,287,053,926 cycles # 2.724 GHz + 9,590,127,631 instructions # 2.24 insn per cycle + 1.574599019 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -112,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.854175e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.854734e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.854734e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.088082 sec -INFO: No Floating Point Exceptions have been reported - 3,594,913,818 cycles # 3.300 GHz - 8,501,934,373 instructions # 2.36 insn per cycle - 1.089622378 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80692) (512y: 55) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.904765e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.905290e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.905290e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.353979 sec +INFO: No Floating Point Exceptions have been reported + 3,709,436,689 cycles # 2.733 GHz + 8,514,247,183 instructions # 2.30 insn per cycle + 1.357880276 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 89) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -144,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.366191e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.367851e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.367851e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.632459 sec -INFO: No Floating Point Exceptions have been reported - 2,083,082,510 cycles # 3.287 GHz - 4,269,037,403 instructions # 2.05 insn per cycle - 0.634094589 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2857) (512y: 71) (512z:79111) +EvtsPerSec[Rmb+ME] (23) = ( 3.407683e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.408196e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.408196e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.552623 sec +INFO: No Floating Point Exceptions have been reported + 2,699,560,921 cycles # 1.736 GHz + 4,280,862,154 instructions # 1.59 insn per cycle + 1.556608026 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2853) (512y: 103) (512z:79114) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -176,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 46aad9671..e03215103 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,30 +19,96 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_10:02:23 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:49:15 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.054597e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.057500e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.057500e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.436820 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 8,044,156,582 cycles # 2.913 GHz + 18,167,469,518 instructions # 2.26 insn per cycle + 2.819990438 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.188185e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.221546e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.221546e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.024442 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 12,704,137,155 cycles # 2.918 GHz + 27,467,799,669 instructions # 2.16 insn per cycle + 4.411963692 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.959478e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.959732e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.959732e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.302822 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.352982e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.353176e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.353176e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 7.182913 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 18,557,415,270 cycles # 3.499 GHz - 53,611,384,150 instructions # 2.89 insn per cycle - 5.304465399 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32499) (avx2: 0) (512y: 0) (512z: 0) + 19,533,134,643 cycles # 2.719 GHz + 53,904,822,620 instructions # 2.76 insn per cycle + 7.186820393 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -50,33 +116,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.884894e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.884981e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.884981e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.802479 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.583220e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.583307e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.583307e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.337581 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 9,804,112,308 cycles # 3.497 GHz - 27,129,641,085 instructions # 2.77 insn per cycle - 2.804176142 seconds time elapsed + 9,779,129,351 cycles # 2.927 GHz + 27,151,664,900 instructions # 2.78 insn per cycle + 3.341583664 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -84,33 +150,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.230358e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.230780e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.230780e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.248930 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.365450e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.365854e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.365854e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.570834 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,126,132,975 cycles # 3.300 GHz - 9,578,684,382 instructions # 2.32 insn per cycle - 1.250614553 seconds time elapsed + 4,275,074,655 cycles # 2.716 GHz + 9,592,294,661 instructions # 2.24 insn per cycle + 1.574792391 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -118,33 +184,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.855831e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.856394e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.856394e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.088159 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.876513e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.877048e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.877048e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.364090 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 3,595,559,768 cycles # 3.300 GHz - 8,503,853,040 instructions # 2.37 insn per cycle - 1.089833582 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80692) (512y: 55) (512z: 0) + 3,720,013,902 cycles # 2.721 GHz + 8,517,094,572 instructions # 2.29 insn per cycle + 1.368386654 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 89) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -152,33 +218,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.555595e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.557326e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.557326e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.618707 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.420108e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.420617e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.420617e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.546895 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,043,689,035 cycles # 3.296 GHz - 4,271,034,338 instructions # 2.09 insn per cycle - 0.620361527 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2857) (512y: 71) (512z:79111) + 2,698,104,238 cycles # 1.741 GHz + 4,283,566,876 instructions # 1.59 insn per cycle + 1.551097954 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2853) (512y: 103) (512z:79114) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -186,8 +252,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 65b332ec8..71b1803a4 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,28 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_09:48:15 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:21:49 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.055075e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.055529e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.055650e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.454190 sec +INFO: No Floating Point Exceptions have been reported + 8,101,270,896 cycles # 2.912 GHz + 18,320,414,768 instructions # 2.26 insn per cycle + 2.837550341 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.224205e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.226444e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.226692e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.029157 sec +INFO: No Floating Point Exceptions have been reported + 12,695,828,795 cycles # 2.910 GHz + 28,709,503,011 instructions # 2.26 insn per cycle + 4.420420636 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.971368e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.971621e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.971621e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.296883 sec -INFO: No Floating Point Exceptions have been reported - 18,531,801,459 cycles # 3.498 GHz - 53,628,860,461 instructions # 2.89 insn per cycle - 5.298434989 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32139) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.201824e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.202080e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.202080e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.438167 sec +INFO: No Floating Point Exceptions have been reported + 18,843,132,149 cycles # 2.926 GHz + 53,928,570,497 instructions # 2.86 insn per cycle + 6.442267111 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32022) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -48,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.884705e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.884792e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.884792e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.802607 sec -INFO: No Floating Point Exceptions have been reported - 9,807,669,299 cycles # 3.498 GHz - 27,114,388,159 instructions # 2.76 insn per cycle - 2.804148812 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.562611e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.562704e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.562704e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.380180 sec +INFO: No Floating Point Exceptions have been reported + 9,918,861,148 cycles # 2.932 GHz + 27,128,280,341 instructions # 2.74 insn per cycle + 3.383996000 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96368) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -80,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.265793e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.266229e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.266229e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.238087 sec -INFO: No Floating Point Exceptions have been reported - 4,089,819,684 cycles # 3.300 GHz - 9,574,852,634 instructions # 2.34 insn per cycle - 1.239613303 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.368711e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.369114e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.369114e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.568940 sec +INFO: No Floating Point Exceptions have been reported + 4,289,720,535 cycles # 2.728 GHz + 9,584,928,513 instructions # 2.23 insn per cycle + 1.573132113 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84968) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -112,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.867053e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.867603e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.867603e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.085229 sec -INFO: No Floating Point Exceptions have been reported - 3,585,300,735 cycles # 3.300 GHz - 8,498,044,499 instructions # 2.37 insn per cycle - 1.086741064 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80664) (512y: 225) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.874256e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.874798e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.874798e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.364932 sec +INFO: No Floating Point Exceptions have been reported + 3,728,944,037 cycles # 2.726 GHz + 8,507,330,131 instructions # 2.28 insn per cycle + 1.368786926 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80632) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -144,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.524845e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.526526e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.526526e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.620642 sec -INFO: No Floating Point Exceptions have been reported - 2,049,573,753 cycles # 3.295 GHz - 4,270,949,619 instructions # 2.08 insn per cycle - 0.622178806 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2701) (512y: 175) (512z:79097) +EvtsPerSec[Rmb+ME] (23) = ( 3.414224e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.414743e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.414743e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.549013 sec +INFO: No Floating Point Exceptions have been reported + 2,698,122,905 cycles # 1.738 GHz + 4,280,648,246 instructions # 1.59 insn per cycle + 1.553090413 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 185) (512z:79098) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -176,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index a82780767..26694465d 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,28 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_09:49:05 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:23:17 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.208704e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.209632e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.209859e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.761935 sec +INFO: No Floating Point Exceptions have been reported + 5,908,004,381 cycles # 2.901 GHz + 11,686,361,328 instructions # 1.98 insn per cycle + 2.093948305 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.102338e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.102897e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.103014e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 +TOTAL : 2.075560 sec +INFO: No Floating Point Exceptions have been reported + 6,795,219,354 cycles # 2.902 GHz + 14,967,758,240 instructions # 2.20 insn per cycle + 2.398428041 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849635e-03 +Avg ME (F77/GPU) = 9.8712451931260159E-003 +Relative difference = 0.0021940095370046923 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.023815e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.023843e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.023843e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 5.158316 sec -INFO: No Floating Point Exceptions have been reported - 18,049,131,108 cycles # 3.498 GHz - 53,709,451,387 instructions # 2.98 insn per cycle - 5.159871335 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20347) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.563117e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.563375e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.563375e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.168224 sec +INFO: No Floating Point Exceptions have been reported + 18,106,019,929 cycles # 2.934 GHz + 53,907,716,361 instructions # 2.98 insn per cycle + 6.172403776 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -48,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087560668E-003 -Relative difference = 2.1197897336597208e-08 +Avg ME (F77/C++) = 9.8479612087551509E-003 +Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.194323e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.194780e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.194780e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 -TOTAL : 1.260020 sec -INFO: No Floating Point Exceptions have been reported - 4,411,342,415 cycles # 3.497 GHz - 13,793,255,466 instructions # 3.13 insn per cycle - 1.261536752 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.366569e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.366962e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.366962e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.570259 sec +INFO: No Floating Point Exceptions have been reported + 4,597,646,888 cycles # 2.923 GHz + 13,807,163,752 instructions # 3.00 insn per cycle + 1.574045592 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -80,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896367235E-003 Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.464841e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.466734e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.466734e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.624304 sec -INFO: No Floating Point Exceptions have been reported - 2,064,565,769 cycles # 3.300 GHz - 4,827,347,108 instructions # 2.34 insn per cycle - 0.625923663 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.801272e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.802916e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.802916e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.778379 sec +INFO: No Floating Point Exceptions have been reported + 2,130,043,758 cycles # 2.726 GHz + 4,836,599,174 instructions # 2.27 insn per cycle + 0.782206721 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -112,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161091246E-003 Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.631311e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.633600e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.633600e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.548741 sec -INFO: No Floating Point Exceptions have been reported - 1,815,064,332 cycles # 3.300 GHz - 4,282,229,596 instructions # 2.36 insn per cycle - 0.550296601 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81266) (512y: 10) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.682520e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.684604e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.684604e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.688940 sec +INFO: No Floating Point Exceptions have been reported + 1,884,507,725 cycles # 2.723 GHz + 4,290,819,235 instructions # 2.28 insn per cycle + 0.692749981 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81185) (512y: 44) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -144,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161091246E-003 Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.704206e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.704947e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.704947e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 -TOTAL : 0.310947 sec -INFO: No Floating Point Exceptions have been reported - 1,028,259,669 cycles # 3.293 GHz - 2,154,144,001 instructions # 2.09 insn per cycle - 0.312447594 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3492) (512y: 15) (512z:79329) +EvtsPerSec[Rmb+ME] (23) = ( 6.875530e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.877565e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.877565e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.770700 sec +INFO: No Floating Point Exceptions have been reported + 1,352,613,897 cycles # 1.747 GHz + 2,162,405,721 instructions # 1.60 insn per cycle + 0.774947088 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3479) (512y: 47) (512z:79330) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -176,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982676284E-003 Relative difference = 2.004124217057488e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 404175faa..8e4037314 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,30 +19,96 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_10:03:13 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:50:44 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.261572e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.268191e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.268191e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187093e-05 +- 9.825663e-06 ) GeV^-6 +TOTAL : 1.738260 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,820,188,860 cycles # 2.913 GHz + 12,502,480,728 instructions # 2.15 insn per cycle + 2.056507800 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.148842e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.160493e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.160493e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856440e-04 +- 8.331091e-05 ) GeV^-6 +TOTAL : 2.045649 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,753,066,902 cycles # 2.917 GHz + 14,813,097,918 instructions # 2.19 insn per cycle + 2.374262766 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849635e-03 +Avg ME (F77/GPU) = 9.8712451931260159E-003 +Relative difference = 0.0021940095370046923 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.022761e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.022790e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.022790e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 5.163587 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.502637e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.502889e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.502889e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.211468 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 18,054,092,101 cycles # 3.495 GHz - 53,711,654,498 instructions # 2.98 insn per cycle - 5.165281201 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20347) (avx2: 0) (512y: 0) (512z: 0) + 18,143,644,137 cycles # 2.921 GHz + 53,909,939,321 instructions # 2.97 insn per cycle + 6.215559174 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -50,33 +116,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087560668E-003 -Relative difference = 2.1197897336597208e-08 +Avg ME (F77/C++) = 9.8479612087551509E-003 +Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.195567e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.196030e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.196030e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 -TOTAL : 1.259672 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.339632e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.340031e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.340031e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.583158 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,411,510,830 cycles # 3.498 GHz - 13,795,267,035 instructions # 3.13 insn per cycle - 1.261326794 seconds time elapsed + 4,644,642,721 cycles # 2.928 GHz + 13,808,855,992 instructions # 2.97 insn per cycle + 1.587116749 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -84,33 +150,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896367235E-003 Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.453887e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.455760e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.455760e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.625229 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.786207e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.787843e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.787843e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.780530 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,067,908,391 cycles # 3.300 GHz - 4,829,484,912 instructions # 2.34 insn per cycle - 0.626910053 seconds time elapsed + 2,130,238,055 cycles # 2.718 GHz + 4,838,587,482 instructions # 2.27 insn per cycle + 0.784611119 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -118,33 +184,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161091246E-003 Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.652126e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.654627e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.654627e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.547760 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.698223e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.700507e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.700507e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.688171 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,812,238,828 cycles # 3.300 GHz - 4,284,304,700 instructions # 2.36 insn per cycle - 0.549403589 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81266) (512y: 10) (512z: 0) + 1,885,276,472 cycles # 2.726 GHz + 4,293,094,440 instructions # 2.28 insn per cycle + 0.692122848 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81185) (512y: 44) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -152,33 +218,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161091246E-003 Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.701714e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.702457e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.702457e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 -TOTAL : 0.311575 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.810636e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.812720e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.812720e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.777981 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,030,177,094 cycles # 3.291 GHz - 2,156,246,337 instructions # 2.09 insn per cycle - 0.313257304 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3492) (512y: 15) (512z:79329) + 1,355,130,660 cycles # 1.735 GHz + 2,164,600,762 instructions # 1.60 insn per cycle + 0.782043846 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3479) (512y: 47) (512z:79330) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -186,8 +252,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982676284E-003 Relative difference = 2.004124217057488e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 5c0a88233..113bcaacf 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,28 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_09:49:41 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:24:20 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.202287e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.203031e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.203251e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.759878 sec +INFO: No Floating Point Exceptions have been reported + 5,921,315,429 cycles # 2.907 GHz + 12,451,469,321 instructions # 2.10 insn per cycle + 2.095417433 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.113173e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.113784e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.113870e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 +TOTAL : 2.088774 sec +INFO: No Floating Point Exceptions have been reported + 6,829,574,271 cycles # 2.905 GHz + 14,898,722,914 instructions # 2.18 insn per cycle + 2.410171422 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849635e-03 +Avg ME (F77/GPU) = 9.8712451931260107E-003 +Relative difference = 0.0021940095370041636 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.024638e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.024666e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.024666e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 5.153918 sec -INFO: No Floating Point Exceptions have been reported - 18,034,374,619 cycles # 3.498 GHz - 53,706,043,596 instructions # 2.98 insn per cycle - 5.155449679 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20316) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.526309e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.526569e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.526569e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.192756 sec +INFO: No Floating Point Exceptions have been reported + 18,135,421,902 cycles # 2.927 GHz + 53,892,650,631 instructions # 2.97 insn per cycle + 6.196840431 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -48,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087562004E-003 -Relative difference = 2.119791090018744e-08 +Avg ME (F77/C++) = 9.8479612087572898E-003 +Relative difference = 2.1198021522715588e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.205671e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.206134e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.206134e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 -TOTAL : 1.256322 sec -INFO: No Floating Point Exceptions have been reported - 4,396,878,544 cycles # 3.496 GHz - 13,786,467,106 instructions # 3.14 insn per cycle - 1.257895061 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.396709e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.397124e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.397124e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.556067 sec +INFO: No Floating Point Exceptions have been reported + 4,573,398,855 cycles # 2.934 GHz + 13,800,378,388 instructions # 3.02 insn per cycle + 1.559827589 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96651) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -80,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896065809E-003 Relative difference = 3.151856596628469e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.477547e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.479403e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.479403e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.623170 sec -INFO: No Floating Point Exceptions have been reported - 2,060,605,900 cycles # 3.300 GHz - 4,831,453,881 instructions # 2.34 insn per cycle - 0.624668920 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.651495e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.653049e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.653049e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.795401 sec +INFO: No Floating Point Exceptions have been reported + 2,148,860,867 cycles # 2.691 GHz + 4,840,602,339 instructions # 2.25 insn per cycle + 0.799229981 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85884) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -112,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161091923E-003 Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.618900e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.621238e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.621238e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.549294 sec -INFO: No Floating Point Exceptions have been reported - 1,816,923,437 cycles # 3.300 GHz - 4,285,533,202 instructions # 2.36 insn per cycle - 0.550797533 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81751) (512y: 10) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.688407e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.690576e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.690576e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.688541 sec +INFO: No Floating Point Exceptions have been reported + 1,890,706,185 cycles # 2.733 GHz + 4,294,394,779 instructions # 2.27 insn per cycle + 0.692328039 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81725) (512y: 24) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -144,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161091923E-003 Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.692217e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.692927e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.692927e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 -TOTAL : 0.313041 sec -INFO: No Floating Point Exceptions have been reported - 1,035,209,806 cycles # 3.293 GHz - 2,161,056,689 instructions # 2.09 insn per cycle - 0.314588980 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4102) (512y: 24) (512z:79550) +EvtsPerSec[Rmb+ME] (23) = ( 6.826093e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.828148e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.828148e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.775828 sec +INFO: No Floating Point Exceptions have been reported + 1,357,390,482 cycles # 1.742 GHz + 2,169,212,126 instructions # 1.60 insn per cycle + 0.779795742 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4092) (512y: 32) (512z:79551) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -176,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982957326E-003 Relative difference = 2.0044082998332894e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 7a9897fa3..2e59aa225 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,28 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_09:50:18 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:25:23 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.663841e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.664390e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.664590e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.203630 sec +INFO: No Floating Point Exceptions have been reported + 7,260,397,959 cycles # 2.866 GHz + 15,031,707,879 instructions # 2.07 insn per cycle + 2.589013700 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.107763e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.108067e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108098e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.442022 sec +INFO: No Floating Point Exceptions have been reported + 10,932,120,354 cycles # 2.895 GHz + 24,906,946,249 instructions # 2.28 insn per cycle + 3.831975982 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.842302e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.842549e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.842549e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.366189 sec -INFO: No Floating Point Exceptions have been reported - 18,773,359,413 cycles # 3.498 GHz - 53,840,665,740 instructions # 2.87 insn per cycle - 5.367740662 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32145) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.516129e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.516327e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.516327e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 7.025685 sec +INFO: No Floating Point Exceptions have been reported + 19,256,305,943 cycles # 2.740 GHz + 54,130,622,749 instructions # 2.81 insn per cycle + 7.029878997 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32000) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -48,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.976154e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.976249e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.976249e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.673252 sec -INFO: No Floating Point Exceptions have been reported - 9,357,534,951 cycles # 3.499 GHz - 26,169,402,077 instructions # 2.80 insn per cycle - 2.674850919 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.524890e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.524973e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.524973e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.464849 sec +INFO: No Floating Point Exceptions have been reported + 9,453,784,509 cycles # 2.726 GHz + 26,186,103,091 instructions # 2.77 insn per cycle + 3.468732831 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96049) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -80,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.396430e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.396888e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.396888e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.201527 sec -INFO: No Floating Point Exceptions have been reported - 3,969,364,613 cycles # 3.300 GHz - 9,235,071,562 instructions # 2.33 insn per cycle - 1.203080161 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.508306e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.508754e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508754e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.507182 sec +INFO: No Floating Point Exceptions have been reported + 4,099,795,192 cycles # 2.715 GHz + 9,249,955,249 instructions # 2.26 insn per cycle + 1.510975685 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -112,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.066780e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.067377e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.067377e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.042692 sec -INFO: No Floating Point Exceptions have been reported - 3,445,010,184 cycles # 3.300 GHz - 8,170,130,625 instructions # 2.37 insn per cycle - 1.044235947 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80098) (512y: 45) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.116819e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.117442e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.117442e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.284411 sec +INFO: No Floating Point Exceptions have been reported + 3,509,716,252 cycles # 2.725 GHz + 8,182,475,258 instructions # 2.33 insn per cycle + 1.288638878 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80015) (512y: 79) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -144,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.793034e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.794842e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.794842e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.601773 sec -INFO: No Floating Point Exceptions have been reported - 1,987,927,417 cycles # 3.296 GHz - 4,162,385,007 instructions # 2.09 insn per cycle - 0.603318376 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2616) (512y: 61) (512z:78907) +EvtsPerSec[Rmb+ME] (23) = ( 3.462021e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.462537e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.462537e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.527279 sec +INFO: No Floating Point Exceptions have been reported + 2,661,319,941 cycles # 1.739 GHz + 4,172,569,565 instructions # 1.57 insn per cycle + 1.531717386 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2612) (512y: 93) (512z:78910) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -176,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index ca216db73..f2e4a2151 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,28 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_09:51:07 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:26:49 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.668216e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.668742e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.668891e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.204926 sec +INFO: No Floating Point Exceptions have been reported + 7,354,907,186 cycles # 2.903 GHz + 15,835,326,846 instructions # 2.15 insn per cycle + 2.589353613 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.111109e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.111413e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111447e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.435805 sec +INFO: No Floating Point Exceptions have been reported + 11,002,728,447 cycles # 2.923 GHz + 25,822,053,923 instructions # 2.35 insn per cycle + 3.822280777 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.835028e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.835268e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.835268e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.369738 sec -INFO: No Floating Point Exceptions have been reported - 18,793,366,072 cycles # 3.499 GHz - 53,857,690,319 instructions # 2.87 insn per cycle - 5.371282668 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32332) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.824002e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.824211e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.824211e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.748238 sec +INFO: No Floating Point Exceptions have been reported + 19,286,477,225 cycles # 2.857 GHz + 54,157,907,603 instructions # 2.81 insn per cycle + 6.752432065 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32202) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -48,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.997027e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.997123e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.997123e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.645133 sec -INFO: No Floating Point Exceptions have been reported - 9,258,312,982 cycles # 3.498 GHz - 26,074,347,724 instructions # 2.82 insn per cycle - 2.646713771 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.548001e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.548086e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.548086e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.412561 sec +INFO: No Floating Point Exceptions have been reported + 9,302,368,855 cycles # 2.723 GHz + 26,085,336,117 instructions # 2.80 insn per cycle + 3.416771061 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:95938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -80,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.425112e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.425568e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.425568e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.193605 sec -INFO: No Floating Point Exceptions have been reported - 3,943,104,197 cycles # 3.300 GHz - 9,201,854,109 instructions # 2.33 insn per cycle - 1.195195408 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.533570e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.534051e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.534051e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.496264 sec +INFO: No Floating Point Exceptions have been reported + 4,086,923,304 cycles # 2.726 GHz + 9,212,952,806 instructions # 2.25 insn per cycle + 1.500090267 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83864) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -112,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.123835e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.124474e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.124474e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.030920 sec -INFO: No Floating Point Exceptions have been reported - 3,406,397,639 cycles # 3.300 GHz - 8,156,836,494 instructions # 2.39 insn per cycle - 1.032505730 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79482) (512y: 215) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.068352e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.068931e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.068931e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.299365 sec +INFO: No Floating Point Exceptions have been reported + 3,513,960,907 cycles # 2.698 GHz + 8,167,668,326 instructions # 2.32 insn per cycle + 1.303235401 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79421) (512y: 229) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -144,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.858788e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.860628e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.860628e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.597408 sec -INFO: No Floating Point Exceptions have been reported - 1,972,920,750 cycles # 3.295 GHz - 4,158,567,065 instructions # 2.11 insn per cycle - 0.599004941 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1887) (512y: 165) (512z:78883) +EvtsPerSec[Rmb+ME] (23) = ( 3.521239e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.521794e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.521794e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.500932 sec +INFO: No Floating Point Exceptions have been reported + 2,617,549,535 cycles # 1.740 GHz + 4,166,941,618 instructions # 1.59 insn per cycle + 1.504880250 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1876) (512y: 175) (512z:78884) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -176,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index cc2d155fb..73af5e5b3 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_09:47:00 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + +DATE: 2024-09-15_11:19:01 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.740481e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.765338e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.886154e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.459950 sec +INFO: No Floating Point Exceptions have been reported + 1,934,018,539 cycles # 2.861 GHz + 2,739,518,446 instructions # 1.42 insn per cycle + 0.734446139 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.975676e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.474629e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.695966e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.539678 sec +INFO: No Floating Point Exceptions have been reported + 2,257,806,163 cycles # 2.877 GHz + 3,239,125,642 instructions # 1.43 insn per cycle + 0.841027050 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.412610e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.442060e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.442060e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.184020 sec -INFO: No Floating Point Exceptions have been reported - 4,141,597,664 cycles # 3.494 GHz - 13,187,383,524 instructions # 3.18 insn per cycle - 1.185605038 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 708) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.056428e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.078475e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078475e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.570596 sec +INFO: No Floating Point Exceptions have been reported + 4,620,202,435 cycles # 2.935 GHz + 13,190,173,768 instructions # 2.85 insn per cycle + 1.574765138 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.508927e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.601671e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.601671e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.676104 sec -INFO: No Floating Point Exceptions have been reported - 2,366,559,763 cycles # 3.493 GHz - 7,529,479,031 instructions # 3.18 insn per cycle - 0.677695252 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.870844e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.942105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.942105e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.895190 sec +INFO: No Floating Point Exceptions have been reported + 2,640,894,010 cycles # 2.940 GHz + 7,556,112,587 instructions # 2.86 insn per cycle + 0.899078617 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.398069e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.665769e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.665769e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.395053 sec -INFO: No Floating Point Exceptions have been reported - 1,313,745,324 cycles # 3.314 GHz - 3,136,809,709 instructions # 2.39 insn per cycle - 0.396624282 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.155420e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.359383e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.359383e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.539606 sec +INFO: No Floating Point Exceptions have been reported + 1,490,717,557 cycles # 2.746 GHz + 3,161,146,919 instructions # 2.12 insn per cycle + 0.543466540 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.635453e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.935351e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.935351e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.376302 sec -INFO: No Floating Point Exceptions have been reported - 1,251,915,440 cycles # 3.315 GHz - 3,004,111,985 instructions # 2.40 insn per cycle - 0.377898055 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2803) (512y: 84) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.514709e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.763624e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.763624e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.485581 sec +INFO: No Floating Point Exceptions have been reported + 1,345,992,067 cycles # 2.752 GHz + 3,013,895,719 instructions # 2.24 insn per cycle + 0.489750963 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2752) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.354533e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.753626e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.753626e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.329077 sec -INFO: No Floating Point Exceptions have been reported - 1,094,608,654 cycles # 3.313 GHz - 1,925,926,390 instructions # 1.76 insn per cycle - 0.330688693 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1372) (512y: 84) (512z: 2216) +EvtsPerSec[Rmb+ME] (23) = ( 2.329309e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.438411e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.438411e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.725847 sec +INFO: No Floating Point Exceptions have been reported + 1,326,647,346 cycles # 1.820 GHz + 1,963,906,161 instructions # 1.48 insn per cycle + 0.729744934 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index a92f42f90..87049bf6b 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,48 +1,133 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_10:01:46 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + +DATE: 2024-09-15_11:47:47 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.302816e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.642797e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.642797e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.484911 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,993,081,726 cycles # 2.857 GHz + 2,967,605,428 instructions # 1.49 insn per cycle + 0.755486323 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.256871e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.326938e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.326938e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.759153 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,900,800,476 cycles # 2.878 GHz + 4,476,324,954 instructions # 1.54 insn per cycle + 1.066391830 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.412679e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.442154e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.442154e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.187839 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.051618e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.074674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.074674e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.585397 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,159,950,372 cycles # 3.497 GHz - 13,194,964,317 instructions # 3.17 insn per cycle - 1.189688990 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 708) (avx2: 0) (512y: 0) (512z: 0) + 4,659,791,475 cycles # 2.933 GHz + 13,199,729,048 instructions # 2.83 insn per cycle + 1.589552076 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -50,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.498144e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.591267e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.591267e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.683826 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.863646e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.935223e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935223e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.905908 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,393,118,373 cycles # 3.491 GHz - 7,579,840,266 instructions # 3.17 insn per cycle - 0.685782666 seconds time elapsed + 2,672,075,267 cycles # 2.939 GHz + 7,605,973,490 instructions # 2.85 insn per cycle + 0.909977972 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -84,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.374243e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.639923e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.639923e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.401891 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.113398e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.317707e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.317707e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.554094 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,337,891,355 cycles # 3.316 GHz - 3,188,313,635 instructions # 2.38 insn per cycle - 0.403731438 seconds time elapsed + 1,524,354,103 cycles # 2.734 GHz + 3,211,905,393 instructions # 2.11 insn per cycle + 0.558166519 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -118,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.605549e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.901656e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.901656e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.383558 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.488860e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.737446e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.737446e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.497012 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,277,780,120 cycles # 3.317 GHz - 3,055,377,542 instructions # 2.39 insn per cycle - 0.385470542 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2803) (512y: 84) (512z: 0) + 1,381,887,844 cycles # 2.761 GHz + 3,066,710,334 instructions # 2.22 insn per cycle + 0.501143809 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2752) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -152,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.298231e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.709931e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.709931e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.337401 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.170464e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.268423e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.268423e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.785429 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,121,172,776 cycles # 3.309 GHz - 1,965,912,257 instructions # 1.75 insn per cycle - 0.339244886 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1372) (512y: 84) (512z: 2216) + 1,369,203,746 cycles # 1.799 GHz + 2,005,266,999 instructions # 1.46 insn per cycle + 0.789533436 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -186,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 0df1c98bc..f184fc3b5 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_09:47:05 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + +DATE: 2024-09-15_11:19:15 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.732857e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.764454e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.875095e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.459022 sec +INFO: No Floating Point Exceptions have been reported + 1,935,515,541 cycles # 2.868 GHz + 2,740,568,582 instructions # 1.42 insn per cycle + 0.732282850 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.938986e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.388894e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.605968e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.543627 sec +INFO: No Floating Point Exceptions have been reported + 2,240,911,931 cycles # 2.849 GHz + 3,134,508,527 instructions # 1.40 insn per cycle + 0.843804985 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.413030e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.442551e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.442551e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.183382 sec -INFO: No Floating Point Exceptions have been reported - 4,141,618,298 cycles # 3.496 GHz - 13,165,611,086 instructions # 3.18 insn per cycle - 1.184920222 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 693) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.033389e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.055879e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.055879e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.606028 sec +INFO: No Floating Point Exceptions have been reported + 4,632,143,331 cycles # 2.878 GHz + 13,180,119,009 instructions # 2.85 insn per cycle + 1.610268805 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.510389e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.604239e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.604239e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.675309 sec -INFO: No Floating Point Exceptions have been reported - 2,364,418,355 cycles # 3.495 GHz - 7,524,710,592 instructions # 3.18 insn per cycle - 0.676879591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3094) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.829571e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.900649e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.900649e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.915734 sec +INFO: No Floating Point Exceptions have been reported + 2,643,771,941 cycles # 2.877 GHz + 7,554,150,292 instructions # 2.86 insn per cycle + 0.919868185 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.447390e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.719828e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.719828e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.390629 sec -INFO: No Floating Point Exceptions have been reported - 1,299,093,525 cycles # 3.314 GHz - 3,134,611,268 instructions # 2.41 insn per cycle - 0.392178924 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2977) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.046256e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.248866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.248866e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.558915 sec +INFO: No Floating Point Exceptions have been reported + 1,500,616,577 cycles # 2.669 GHz + 3,161,167,766 instructions # 2.11 insn per cycle + 0.563154837 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2976) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.629923e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.928873e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.928873e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.376571 sec -INFO: No Floating Point Exceptions have been reported - 1,252,881,147 cycles # 3.315 GHz - 2,998,471,867 instructions # 2.39 insn per cycle - 0.378164555 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2777) (512y: 84) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.429175e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.674618e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.674618e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.498371 sec +INFO: No Floating Point Exceptions have been reported + 1,352,614,614 cycles # 2.696 GHz + 3,013,058,203 instructions # 2.23 insn per cycle + 0.502370936 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2726) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.349755e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.750513e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.750513e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.329058 sec -INFO: No Floating Point Exceptions have been reported - 1,094,136,021 cycles # 3.311 GHz - 1,923,177,976 instructions # 1.76 insn per cycle - 0.330636062 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1349) (512y: 84) (512z: 2216) +EvtsPerSec[Rmb+ME] (23) = ( 2.263352e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.370712e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.370712e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.746831 sec +INFO: No Floating Point Exceptions have been reported + 1,330,812,654 cycles # 1.774 GHz + 1,962,138,478 instructions # 1.47 insn per cycle + 0.751010006 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1356) (512y: 106) (512z: 2218) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 81afd2156..9c9085f21 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_09:47:09 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + +DATE: 2024-09-15_11:19:29 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.616183e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.859144e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.009356e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.455344 sec +INFO: No Floating Point Exceptions have been reported + 1,903,769,704 cycles # 2.832 GHz + 2,695,127,426 instructions # 1.42 insn per cycle + 0.728806465 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.292895e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.269503e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.615665e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 +TOTAL : 0.493521 sec +INFO: No Floating Point Exceptions have been reported + 2,090,898,444 cycles # 2.835 GHz + 2,942,471,441 instructions # 1.41 insn per cycle + 0.794240657 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247487904286338 +Relative difference = 0.0003670698531228044 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.502816e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.538515e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.538515e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945527e+02 +- 1.186198e+02 ) GeV^-2 -TOTAL : 1.112756 sec -INFO: No Floating Point Exceptions have been reported - 3,896,728,098 cycles # 3.498 GHz - 12,921,261,801 instructions # 3.32 insn per cycle - 1.114308908 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.085317e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.110333e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110333e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.528688 sec +INFO: No Floating Point Exceptions have been reported + 4,411,922,721 cycles # 2.879 GHz + 12,951,312,387 instructions # 2.94 insn per cycle + 1.532844163 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246861273719524 Relative difference = 8.940352641194861e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.843942e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.087104e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.087104e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945526e+02 +- 1.186198e+02 ) GeV^-2 -TOTAL : 0.447668 sec -INFO: No Floating Point Exceptions have been reported - 1,568,762,591 cycles # 3.494 GHz - 4,529,699,082 instructions # 2.89 insn per cycle - 0.449147710 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.813599e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.988360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.988360e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.600071 sec +INFO: No Floating Point Exceptions have been reported + 1,729,759,970 cycles # 2.867 GHz + 4,541,750,353 instructions # 2.63 insn per cycle + 0.604044137 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246862329122401 Relative difference = 1.6348320966878032e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.659852e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.669609e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.669609e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947127e+02 +- 1.186879e+02 ) GeV^-2 -TOTAL : 0.235019 sec -INFO: No Floating Point Exceptions have been reported - 784,093,928 cycles # 3.318 GHz - 1,903,310,963 instructions # 2.43 insn per cycle - 0.236562056 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.481903e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.160699e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.160699e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.317183 sec +INFO: No Floating Point Exceptions have been reported + 858,921,512 cycles # 2.679 GHz + 1,917,766,555 instructions # 2.23 insn per cycle + 0.321171597 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491543012991 Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.114059e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.173135e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.173135e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947127e+02 +- 1.186879e+02 ) GeV^-2 -TOTAL : 0.223509 sec -INFO: No Floating Point Exceptions have been reported - 746,004,700 cycles # 3.318 GHz - 1,823,701,260 instructions # 2.44 insn per cycle - 0.225023752 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3457) (512y: 2) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.857503e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.629025e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.629025e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.298118 sec +INFO: No Floating Point Exceptions have been reported + 804,518,989 cycles # 2.670 GHz + 1,834,610,739 instructions # 2.28 insn per cycle + 0.301964643 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3402) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491543012991 Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.002574e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.169810e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.169810e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 -TOTAL : 0.184943 sec -INFO: No Floating Point Exceptions have been reported - 617,830,643 cycles # 3.317 GHz - 1,283,756,309 instructions # 2.08 insn per cycle - 0.186472562 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 2) (512z: 2434) +EvtsPerSec[Rmb+ME] (23) = ( 4.365317e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.786659e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.786659e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.395762 sec +INFO: No Floating Point Exceptions have been reported + 728,663,796 cycles # 1.826 GHz + 1,308,267,192 instructions # 1.80 insn per cycle + 0.399787635 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1963) (512y: 26) (512z: 2434) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491576758442 Relative difference = 1.1066920862943416e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index 9c06b9625..f23dffbec 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,48 +1,133 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_10:01:50 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + +DATE: 2024-09-15_11:48:01 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.986387e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.435739e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.435739e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.017654e+01 +- 1.429183e+01 ) GeV^-2 +TOTAL : 0.462861 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,938,742,838 cycles # 2.865 GHz + 2,865,087,008 instructions # 1.48 insn per cycle + 0.732940290 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.036027e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.082450e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.082450e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.609941e+02 +- 2.115589e+02 ) GeV^-2 +TOTAL : 0.632974 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,495,936,905 cycles # 2.880 GHz + 3,785,157,902 instructions # 1.52 insn per cycle + 0.923834641 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247487904286338 +Relative difference = 0.0003670698531228044 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.502172e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.537510e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.537510e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945527e+02 +- 1.186198e+02 ) GeV^-2 -TOTAL : 1.115381 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.104272e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.129547e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.129547e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.505088 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 3,905,854,092 cycles # 3.497 GHz - 12,926,712,935 instructions # 3.31 insn per cycle - 1.117074300 seconds time elapsed + 4,422,322,267 cycles # 2.932 GHz + 12,955,751,055 instructions # 2.93 insn per cycle + 1.509164533 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -50,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246861273719524 Relative difference = 8.940352641194861e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.794944e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.033627e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.033627e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945526e+02 +- 1.186198e+02 ) GeV^-2 -TOTAL : 0.456708 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.849156e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.028095e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.028095e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.596878 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,596,476,246 cycles # 3.484 GHz - 4,578,502,777 instructions # 2.87 insn per cycle - 0.458391760 seconds time elapsed + 1,746,639,368 cycles # 2.911 GHz + 4,590,056,426 instructions # 2.63 insn per cycle + 0.600772729 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -84,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246862329122401 Relative difference = 1.6348320966878032e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.630059e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.622648e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.622648e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947127e+02 +- 1.186879e+02 ) GeV^-2 -TOTAL : 0.238702 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.482456e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.156915e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.156915e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.321160 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 797,453,823 cycles # 3.320 GHz - 1,940,651,385 instructions # 2.43 insn per cycle - 0.240437922 seconds time elapsed + 875,522,703 cycles # 2.698 GHz + 1,954,476,479 instructions # 2.23 insn per cycle + 0.325091323 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -118,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491543012991 Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.065521e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.109139e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.109139e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947127e+02 +- 1.186879e+02 ) GeV^-2 -TOTAL : 0.227609 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.960780e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.751467e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.751467e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.296758 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 759,780,834 cycles # 3.318 GHz - 1,860,665,516 instructions # 2.45 insn per cycle - 0.229250498 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3457) (512y: 2) (512z: 0) + 821,090,022 cycles # 2.738 GHz + 1,871,468,752 instructions # 2.28 insn per cycle + 0.300472695 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3402) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -152,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491543012991 Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.883059e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.153120e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.153120e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 -TOTAL : 0.190289 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.492830e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.932362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.932362e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.388889 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 636,183,234 cycles # 3.320 GHz - 1,325,537,600 instructions # 2.08 insn per cycle - 0.191886831 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 2) (512z: 2434) + 746,594,357 cycles # 1.904 GHz + 1,349,630,324 instructions # 1.81 insn per cycle + 0.392744433 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1963) (512y: 26) (512z: 2434) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -186,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491576758442 Relative difference = 1.1066920862943416e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index 492bd227f..e2521e45b 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_09:47:13 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + +DATE: 2024-09-15_11:19:41 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.639867e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.865475e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.015127e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.454979 sec +INFO: No Floating Point Exceptions have been reported + 1,901,448,560 cycles # 2.824 GHz + 2,678,183,164 instructions # 1.41 insn per cycle + 0.730072764 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.246644e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.993104e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.325490e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 +TOTAL : 0.496812 sec +INFO: No Floating Point Exceptions have been reported + 2,083,867,357 cycles # 2.832 GHz + 2,934,470,565 instructions # 1.41 insn per cycle + 0.792648211 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247487904286338 +Relative difference = 0.0003670698531228044 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.496062e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.531217e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.531217e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945527e+02 +- 1.186198e+02 ) GeV^-2 -TOTAL : 1.117842 sec -INFO: No Floating Point Exceptions have been reported - 3,911,097,982 cycles # 3.495 GHz - 12,897,087,598 instructions # 3.30 insn per cycle - 1.119395161 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.078136e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.103342e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103342e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.538413 sec +INFO: No Floating Point Exceptions have been reported + 4,411,092,348 cycles # 2.861 GHz + 12,926,836,759 instructions # 2.93 insn per cycle + 1.542610115 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 630) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246861273719524 Relative difference = 8.940352641194861e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.836873e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.078269e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.078269e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945526e+02 +- 1.186198e+02 ) GeV^-2 -TOTAL : 0.448511 sec -INFO: No Floating Point Exceptions have been reported - 1,569,700,907 cycles # 3.490 GHz - 4,524,493,147 instructions # 2.88 insn per cycle - 0.449999638 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.816554e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.994652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.994652e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.599654 sec +INFO: No Floating Point Exceptions have been reported + 1,728,903,265 cycles # 2.870 GHz + 4,536,279,042 instructions # 2.62 insn per cycle + 0.603646034 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3611) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246862329122401 Relative difference = 1.6348320966878032e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.731745e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.753990e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.753990e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947127e+02 +- 1.186879e+02 ) GeV^-2 -TOTAL : 0.232891 sec -INFO: No Floating Point Exceptions have been reported - 777,112,460 cycles # 3.318 GHz - 1,900,049,395 instructions # 2.45 insn per cycle - 0.234417475 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.299874e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.938357e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.938357e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.327791 sec +INFO: No Floating Point Exceptions have been reported + 861,849,665 cycles # 2.602 GHz + 1,914,633,101 instructions # 2.22 insn per cycle + 0.331876637 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3550) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491543012991 Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.105044e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.157608e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.157608e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947127e+02 +- 1.186879e+02 ) GeV^-2 -TOTAL : 0.223687 sec -INFO: No Floating Point Exceptions have been reported - 746,547,267 cycles # 3.318 GHz - 1,819,388,878 instructions # 2.44 insn per cycle - 0.225208643 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3421) (512y: 2) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.927651e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.715679e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.715679e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.294357 sec +INFO: No Floating Point Exceptions have been reported + 802,533,600 cycles # 2.696 GHz + 1,830,391,280 instructions # 2.28 insn per cycle + 0.298329557 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3366) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491543012991 Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.986207e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166804e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.166804e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 -TOTAL : 0.185658 sec -INFO: No Floating Point Exceptions have been reported - 619,532,239 cycles # 3.314 GHz - 1,281,456,829 instructions # 2.07 insn per cycle - 0.187211263 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1929) (512y: 2) (512z: 2437) +EvtsPerSec[Rmb+ME] (23) = ( 4.433633e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.866083e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.866083e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.389558 sec +INFO: No Floating Point Exceptions have been reported + 729,078,705 cycles # 1.856 GHz + 1,305,984,013 instructions # 1.79 insn per cycle + 0.393475655 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1926) (512y: 26) (512z: 2437) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491576758442 Relative difference = 1.1066920862943416e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 32ac36f4c..75ffaff93 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_09:47:16 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + +DATE: 2024-09-15_11:19:53 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.751782e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.854275e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.972003e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.462457 sec +INFO: No Floating Point Exceptions have been reported + 1,917,992,386 cycles # 2.823 GHz + 2,716,857,811 instructions # 1.42 insn per cycle + 0.737791701 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.933049e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.480174e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.706062e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.542910 sec +INFO: No Floating Point Exceptions have been reported + 2,260,071,894 cycles # 2.877 GHz + 3,201,078,521 instructions # 1.42 insn per cycle + 0.842582370 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.396929e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.425842e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.425842e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.196875 sec -INFO: No Floating Point Exceptions have been reported - 4,179,491,801 cycles # 3.488 GHz - 13,174,628,892 instructions # 3.15 insn per cycle - 1.198566357 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 682) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.028418e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.050644e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.050644e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.613813 sec +INFO: No Floating Point Exceptions have been reported + 4,647,383,877 cycles # 2.879 GHz + 13,178,063,049 instructions # 2.84 insn per cycle + 1.618051260 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 681) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.511657e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.605674e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.605674e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.675225 sec -INFO: No Floating Point Exceptions have been reported - 2,362,754,006 cycles # 3.492 GHz - 7,448,839,984 instructions # 3.15 insn per cycle - 0.676834119 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.864504e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.934483e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934483e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.898369 sec +INFO: No Floating Point Exceptions have been reported + 2,648,200,185 cycles # 2.937 GHz + 7,475,755,342 instructions # 2.82 insn per cycle + 0.902206814 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.486392e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.763857e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.763857e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.387527 sec -INFO: No Floating Point Exceptions have been reported - 1,288,949,457 cycles # 3.314 GHz - 3,101,589,348 instructions # 2.41 insn per cycle - 0.389131772 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.200611e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.408501e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.408501e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.532171 sec +INFO: No Floating Point Exceptions have been reported + 1,476,374,652 cycles # 2.757 GHz + 3,128,702,616 instructions # 2.12 insn per cycle + 0.536024340 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3131) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.722684e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.036440e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.036440e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.369843 sec -INFO: No Floating Point Exceptions have been reported - 1,230,232,580 cycles # 3.314 GHz - 2,968,768,298 instructions # 2.41 insn per cycle - 0.371471220 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2944) (512y: 90) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.587903e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.854303e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.854303e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.476990 sec +INFO: No Floating Point Exceptions have been reported + 1,322,669,287 cycles # 2.754 GHz + 2,982,885,294 instructions # 2.26 insn per cycle + 0.480825528 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2893) (512y: 110) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.107855e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.469090e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.469090e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.343714 sec -INFO: No Floating Point Exceptions have been reported - 1,142,936,380 cycles # 3.312 GHz - 1,948,968,623 instructions # 1.71 insn per cycle - 0.345358894 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1672) (512y: 86) (512z: 2250) +EvtsPerSec[Rmb+ME] (23) = ( 2.251912e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.353383e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.353383e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.750102 sec +INFO: No Floating Point Exceptions have been reported + 1,363,693,421 cycles # 1.811 GHz + 1,991,339,845 instructions # 1.46 insn per cycle + 0.753947194 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1679) (512y: 108) (512z: 2252) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index e143965d2..40582e53f 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_09:47:21 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + +DATE: 2024-09-15_11:20:07 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.750483e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.807638e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.927006e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.460124 sec +INFO: No Floating Point Exceptions have been reported + 1,938,834,678 cycles # 2.862 GHz + 2,712,058,421 instructions # 1.40 insn per cycle + 0.735469517 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.925117e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.366077e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.584033e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.544645 sec +INFO: No Floating Point Exceptions have been reported + 2,249,855,314 cycles # 2.863 GHz + 3,222,814,057 instructions # 1.43 insn per cycle + 0.844200129 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.402303e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.431502e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.431502e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.192213 sec -INFO: No Floating Point Exceptions have been reported - 4,172,246,102 cycles # 3.496 GHz - 13,152,525,968 instructions # 3.15 insn per cycle - 1.193807916 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 667) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.054095e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.076295e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.076295e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.574055 sec +INFO: No Floating Point Exceptions have been reported + 4,641,890,435 cycles # 2.943 GHz + 13,165,898,661 instructions # 2.84 insn per cycle + 1.578249512 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.522082e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.616399e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.616399e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.672372 sec -INFO: No Floating Point Exceptions have been reported - 2,355,004,641 cycles # 3.496 GHz - 7,442,765,641 instructions # 3.16 insn per cycle - 0.673994211 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.867370e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.936884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.936884e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.896912 sec +INFO: No Floating Point Exceptions have been reported + 2,636,737,245 cycles # 2.930 GHz + 7,477,755,477 instructions # 2.84 insn per cycle + 0.900719288 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3142) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.523190e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.805789e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.805789e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.384444 sec -INFO: No Floating Point Exceptions have been reported - 1,278,706,337 cycles # 3.315 GHz - 3,098,421,994 instructions # 2.42 insn per cycle - 0.385996044 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.202775e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.410191e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.410191e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.531412 sec +INFO: No Floating Point Exceptions have been reported + 1,468,072,782 cycles # 2.747 GHz + 3,129,202,339 instructions # 2.13 insn per cycle + 0.535248151 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3109) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.716920e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.032441e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.032441e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.370114 sec -INFO: No Floating Point Exceptions have been reported - 1,231,488,568 cycles # 3.316 GHz - 2,965,372,659 instructions # 2.41 insn per cycle - 0.371665259 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2920) (512y: 90) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.576512e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.841608e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.841608e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.477966 sec +INFO: No Floating Point Exceptions have been reported + 1,324,577,804 cycles # 2.753 GHz + 2,983,698,636 instructions # 2.25 insn per cycle + 0.481692847 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2869) (512y: 110) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.095023e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.456546e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.456546e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.344329 sec -INFO: No Floating Point Exceptions have been reported - 1,144,047,009 cycles # 3.309 GHz - 1,946,721,930 instructions # 1.70 insn per cycle - 0.345934801 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1648) (512y: 86) (512z: 2250) +EvtsPerSec[Rmb+ME] (23) = ( 2.229034e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.331255e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.331255e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.757523 sec +INFO: No Floating Point Exceptions have been reported + 1,366,953,688 cycles # 1.797 GHz + 1,991,556,146 instructions # 1.46 insn per cycle + 0.761326972 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1655) (512y: 108) (512z: 2252) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index d6b31eb52..a10430f20 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-15_10:16:12 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + +DATE: 2024-09-15_12:19:52 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.222962e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.849418e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.427313e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.531972 sec +INFO: No Floating Point Exceptions have been reported + 2,207,295,929 cycles # 2.875 GHz + 3,148,652,719 instructions # 1.43 insn per cycle + 0.824191400 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313472e+00 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.111653e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.154588e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.154588e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 5.094853 sec -INFO: No Floating Point Exceptions have been reported - 17,832,518,783 cycles # 3.498 GHz - 51,965,846,286 instructions # 2.91 insn per cycle - 5.097726180 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.605446e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.642912e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.642912e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.682964 sec +INFO: No Floating Point Exceptions have been reported + 19,598,347,374 cycles # 2.930 GHz + 52,065,080,941 instructions # 2.66 insn per cycle + 6.694844262 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.935610e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.096869e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.096869e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.791066 sec -INFO: No Floating Point Exceptions have been reported - 9,773,273,837 cycles # 3.498 GHz - 30,705,982,059 instructions # 3.14 insn per cycle - 2.793926203 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.916629e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.051442e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.051442e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.747547 sec +INFO: No Floating Point Exceptions have been reported + 11,065,354,139 cycles # 2.943 GHz + 30,912,254,749 instructions # 2.79 insn per cycle + 3.760361851 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2914) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.586931e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.035276e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.035276e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.713567 sec -INFO: No Floating Point Exceptions have been reported - 5,705,560,483 cycles # 3.325 GHz - 13,589,058,983 instructions # 2.38 insn per cycle - 1.716402156 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.668387e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.008748e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.008748e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.399916 sec +INFO: No Floating Point Exceptions have been reported + 6,627,221,489 cycles # 2.749 GHz + 13,792,796,598 instructions # 2.08 insn per cycle + 2.412653295 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2941) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.884518e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.373702e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.373702e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.646590 sec -INFO: No Floating Point Exceptions have been reported - 5,484,754,117 cycles # 3.326 GHz - 13,026,648,973 instructions # 2.38 insn per cycle - 1.649443235 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2718) (512y: 126) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.129922e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.540308e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.540308e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.198225 sec +INFO: No Floating Point Exceptions have been reported + 6,104,431,058 cycles # 2.762 GHz + 13,134,794,290 instructions # 2.15 insn per cycle + 2.210920696 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2667) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.769241e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.391150e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.391150e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.474801 sec -INFO: No Floating Point Exceptions have been reported - 4,912,813,690 cycles # 3.325 GHz - 8,457,047,658 instructions # 1.72 insn per cycle - 1.477693257 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1499) (512y: 106) (512z: 1941) +EvtsPerSec[Rmb+ME] (23) = ( 3.449384e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.629220e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.629220e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.193674 sec +INFO: No Floating Point Exceptions have been reported + 5,993,463,965 cycles # 1.870 GHz + 8,712,960,993 instructions # 1.45 insn per cycle + 3.206184057 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1506) (512y: 128) (512z: 1943) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index 596022db2..01ceafd1d 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-15_10:16:27 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + +DATE: 2024-09-15_12:20:19 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.181438e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.797209e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.367517e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.531302 sec +INFO: No Floating Point Exceptions have been reported + 2,216,417,295 cycles # 2.883 GHz + 3,137,968,070 instructions # 1.42 insn per cycle + 0.825226040 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313472e+00 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.211571e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.258158e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.258158e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.870560 sec -INFO: No Floating Point Exceptions have been reported - 17,042,614,805 cycles # 3.497 GHz - 50,113,161,158 instructions # 2.94 insn per cycle - 4.873618672 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 627) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.706608e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.748828e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.748828e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.299466 sec +INFO: No Floating Point Exceptions have been reported + 18,540,883,021 cycles # 2.938 GHz + 50,178,474,604 instructions # 2.71 insn per cycle + 6.311951743 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 626) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.079078e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.254384e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.254384e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.697659 sec -INFO: No Floating Point Exceptions have been reported - 9,435,548,251 cycles # 3.496 GHz - 29,084,263,622 instructions # 3.08 insn per cycle - 2.701074351 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.062664e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.211274e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.211274e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.576786 sec +INFO: No Floating Point Exceptions have been reported + 10,549,321,378 cycles # 2.940 GHz + 29,289,408,214 instructions # 2.78 insn per cycle + 3.589213709 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.004936e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.376120e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.376120e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.868556 sec -INFO: No Floating Point Exceptions have been reported - 6,217,718,096 cycles # 3.323 GHz - 15,072,689,392 instructions # 2.42 insn per cycle - 1.871415403 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.340015e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.632096e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.632096e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.568858 sec +INFO: No Floating Point Exceptions have been reported + 7,118,801,409 cycles # 2.759 GHz + 15,276,261,936 instructions # 2.15 insn per cycle + 2.581007821 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3021) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.209175e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.605003e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.605003e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.812508 sec -INFO: No Floating Point Exceptions have been reported - 6,025,791,566 cycles # 3.323 GHz - 14,511,126,819 instructions # 2.41 insn per cycle - 1.815384334 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.507726e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.822175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.822175e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.479110 sec +INFO: No Floating Point Exceptions have been reported + 6,890,334,799 cycles # 2.767 GHz + 14,747,969,860 instructions # 2.14 insn per cycle + 2.491499387 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2617) (512y: 302) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.830793e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.463329e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.463329e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.462775 sec -INFO: No Floating Point Exceptions have been reported - 4,872,975,219 cycles # 3.325 GHz - 10,266,083,719 instructions # 2.11 insn per cycle - 1.465698862 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.315391e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.482899e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.482899e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.316209 sec +INFO: No Floating Point Exceptions have been reported + 6,207,380,257 cycles # 1.865 GHz + 10,464,609,822 instructions # 1.69 insn per cycle + 3.328585456 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 214) (512z: 2130) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index d00530c6e..2ef1c54aa 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-15_10:16:41 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + +DATE: 2024-09-15_12:20:46 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.552559e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.511007e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.603394e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.491259 sec +INFO: No Floating Point Exceptions have been reported + 2,068,896,846 cycles # 2.881 GHz + 2,979,901,367 instructions # 1.44 insn per cycle + 0.776426531 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 131 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695491848513 +Relative difference = 4.162503792787837e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.312489e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.370363e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.370363e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 4.650386 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 16,276,965,668 cycles # 3.498 GHz - 51,120,191,833 instructions # 3.14 insn per cycle - 4.653033188 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.683914e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.725672e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.725672e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 6.333148 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 18,682,476,989 cycles # 2.947 GHz + 51,267,470,348 instructions # 2.74 insn per cycle + 6.341547157 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 625) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -48,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 @@ -57,24 +113,24 @@ Avg ME (F77/C++) = 4.3135738277342170 Relative difference = 3.9935743068669333e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.362140e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.689605e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.689605e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315913e+00 +- 1.953828e-01 ) GeV^0 -TOTAL : 2.069281 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,240,542,598 cycles # 3.495 GHz - 19,295,743,850 instructions # 2.66 insn per cycle - 2.071894843 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.015012e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.280109e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.280109e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.718050 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 8,007,664,837 cycles # 2.940 GHz + 19,370,996,217 instructions # 2.42 insn per cycle + 2.726376718 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3542) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -82,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313572e+00 @@ -91,24 +147,24 @@ Avg ME (F77/C++) = 4.3135722697479650 Relative difference = 6.253470796314402e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.077132e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.202254e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.202254e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289195e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.082430 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,618,404,442 cycles # 3.335 GHz - 8,800,594,848 instructions # 2.43 insn per cycle - 1.085107619 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.789023e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.799247e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.799247e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.452394 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 4,027,415,196 cycles # 2.759 GHz + 8,886,566,152 instructions # 2.21 insn per cycle + 1.460503609 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3715) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -116,31 +172,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313565e+00 Avg ME (F77/C++) = 4.3135645242873579 Relative difference = 1.1028294269894893e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.117192e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.251647e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.251647e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289195e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.049032 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,506,556,640 cycles # 3.335 GHz - 8,443,755,145 instructions # 2.41 insn per cycle - 1.051650033 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3598) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.322323e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.475598e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.475598e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.366660 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,807,036,059 cycles # 2.770 GHz + 8,489,981,547 instructions # 2.23 insn per cycle + 1.374788749 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3543) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -148,31 +204,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313565e+00 Avg ME (F77/C++) = 4.3135645242873579 Relative difference = 1.1028294269894893e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.271392e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.451704e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.451704e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289196e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 0.936253 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,132,383,797 cycles # 3.336 GHz - 6,165,283,472 instructions # 1.97 insn per cycle - 0.939160391 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2326) (512y: 0) (512z: 2290) +EvtsPerSec[Rmb+ME] (23) = ( 5.974329e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.534282e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.534282e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.862736 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,570,392,990 cycles # 1.910 GHz + 6,298,404,091 instructions # 1.76 insn per cycle + 1.870756064 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2323) (512y: 24) (512z: 2290) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -180,8 +236,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313564e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 18ad1d5d1..479ebdb20 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-15_10:16:52 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + +DATE: 2024-09-15_12:21:08 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.776065e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.594605e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.702542e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.491621 sec +INFO: No Floating Point Exceptions have been reported + 2,069,264,305 cycles # 2.877 GHz + 2,928,235,838 instructions # 1.42 insn per cycle + 0.775773692 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695491848513 +Relative difference = 4.162503792787837e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.394287e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.456040e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.456040e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 4.495559 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 15,737,867,195 cycles # 3.499 GHz - 49,523,350,375 instructions # 3.15 insn per cycle - 4.498175441 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.731608e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.775696e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.775696e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 6.161667 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 18,113,353,892 cycles # 2.937 GHz + 49,656,566,510 instructions # 2.74 insn per cycle + 6.170127822 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 613) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -48,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 @@ -57,24 +113,24 @@ Avg ME (F77/C++) = 4.3135738277342170 Relative difference = 3.9935743068669333e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.124992e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.558263e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.558263e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315913e+00 +- 1.953828e-01 ) GeV^0 -TOTAL : 1.825579 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,392,542,908 cycles # 3.497 GHz - 18,465,725,820 instructions # 2.89 insn per cycle - 1.828210223 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.528214e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.868162e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.868162e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.421156 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 7,159,836,216 cycles # 2.949 GHz + 18,538,672,579 instructions # 2.59 insn per cycle + 2.429136947 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3234) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -82,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313572e+00 @@ -91,24 +147,24 @@ Avg ME (F77/C++) = 4.3135722697479650 Relative difference = 6.253470796314402e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.597043e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.192418e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.192418e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.289195e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.488739 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,958,332,472 cycles # 3.325 GHz - 10,817,415,240 instructions # 2.18 insn per cycle - 1.491446797 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.353305e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.808520e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.808520e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 2.063917 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 5,718,285,058 cycles # 2.760 GHz + 10,903,070,951 instructions # 1.91 insn per cycle + 2.072527320 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4274) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -116,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 @@ -125,24 +181,24 @@ Avg ME (F77/C++) = 4.3135645242873579 Relative difference = 1.1028294269894893e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.479798e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.053954e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.053954e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.289195e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.512486 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,037,046,031 cycles # 3.325 GHz - 10,481,525,782 instructions # 2.08 insn per cycle - 1.515144505 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.452070e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.924355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.924355e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 2.029021 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 5,634,694,028 cycles # 2.767 GHz + 10,598,235,094 instructions # 1.88 insn per cycle + 2.037144953 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4135) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -150,8 +206,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 @@ -159,24 +215,24 @@ Avg ME (F77/C++) = 4.3135645242873579 Relative difference = 1.1028294269894893e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.035994e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.924665e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.924665e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.289196e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.271344 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,234,372,424 cycles # 3.324 GHz - 8,599,786,449 instructions # 2.03 insn per cycle - 1.274041512 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.351507e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.637189e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.637189e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 2.516038 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 4,700,188,022 cycles # 1.863 GHz + 8,712,811,590 instructions # 1.85 insn per cycle + 2.524039667 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 0) (512z: 2885) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -184,8 +240,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313564e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 25e13a4e8..7f1052231 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-15_10:17:04 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + +DATE: 2024-09-15_12:21:31 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.233444e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.828530e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.380985e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.531166 sec +INFO: No Floating Point Exceptions have been reported + 2,204,089,924 cycles # 2.871 GHz + 3,157,003,197 instructions # 1.43 insn per cycle + 0.824717149 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313472e+00 +Avg ME (F77/GPU) = 4.3134711012809239 +Relative difference = 2.0835166567625394e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.995529e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.033456e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.033456e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 5.383544 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 18,839,345,125 cycles # 3.498 GHz - 51,966,732,943 instructions # 2.76 insn per cycle - 5.386387255 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.513438e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.546541e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.546541e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 7.083035 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 20,836,157,287 cycles # 2.937 GHz + 52,059,859,689 instructions # 2.50 insn per cycle + 7.095325403 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -48,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -57,24 +113,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.703328e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.845943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.845943e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.957994 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 10,355,737,244 cycles # 3.498 GHz - 30,511,573,766 instructions # 2.95 insn per cycle - 2.960782520 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.708543e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.825114e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.825114e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 4.025851 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 11,676,241,949 cycles # 2.892 GHz + 30,719,909,890 instructions # 2.63 insn per cycle + 4.038601753 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2971) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -82,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -91,24 +147,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.399095e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.824105e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.824105e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.760122 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,856,764,716 cycles # 3.323 GHz - 13,503,170,096 instructions # 2.31 insn per cycle - 1.762959014 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.506319e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.824086e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.824086e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.479724 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,856,020,570 cycles # 2.752 GHz + 13,733,686,621 instructions # 2.00 insn per cycle + 2.492002268 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3118) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -116,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -125,24 +181,24 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.570991e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.015612e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.015612e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.719966 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,727,291,657 cycles # 3.325 GHz - 12,965,859,963 instructions # 2.26 insn per cycle - 1.722855146 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2902) (512y: 130) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.929662e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.305854e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.305854e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.278940 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,324,440,516 cycles # 2.761 GHz + 13,099,663,654 instructions # 2.07 insn per cycle + 2.291244442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2851) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -150,8 +206,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -159,24 +215,24 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.919830e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.405669e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.405669e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.639009 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,447,871,803 cycles # 3.319 GHz - 8,529,950,287 instructions # 1.57 insn per cycle - 1.641850608 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1785) (512y: 108) (512z: 2011) +EvtsPerSec[Rmb+ME] (23) = ( 3.121063e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.267193e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.267193e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.513123 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,549,229,378 cycles # 1.858 GHz + 8,826,958,587 instructions # 1.35 insn per cycle + 3.525479379 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1792) (512y: 130) (512z: 2013) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -184,8 +240,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index d3a6ddfae..b5ff528c4 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-15_10:17:19 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + +DATE: 2024-09-15_12:21:59 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.263927e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.696044e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.276288e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.532325 sec +INFO: No Floating Point Exceptions have been reported + 2,205,625,214 cycles # 2.868 GHz + 3,174,502,414 instructions # 1.44 insn per cycle + 0.825987629 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313472e+00 +Avg ME (F77/GPU) = 4.3134711012809239 +Relative difference = 2.0835166567625394e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.085069e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.126681e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.126681e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 5.159751 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 18,054,093,495 cycles # 3.497 GHz - 50,013,616,572 instructions # 2.77 insn per cycle - 5.162778757 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.606903e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.644535e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.644535e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.677680 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 19,666,494,079 cycles # 2.940 GHz + 50,081,060,677 instructions # 2.55 insn per cycle + 6.689882991 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 599) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -48,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -57,24 +113,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.892030e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.051043e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.051043e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.822229 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 9,875,354,864 cycles # 3.496 GHz - 29,082,286,712 instructions # 2.94 insn per cycle - 2.825249661 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2826) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.871340e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.003903e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.003903e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.840726 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 11,259,304,877 cycles # 2.923 GHz + 29,230,934,183 instructions # 2.60 insn per cycle + 3.852980170 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2807) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -82,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -91,24 +147,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.241444e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.526958e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.526958e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.123495 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,058,903,941 cycles # 3.320 GHz - 15,065,566,193 instructions # 2.13 insn per cycle - 2.126591771 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.726077e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.943109e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.943109e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.970419 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 8,229,432,856 cycles # 2.759 GHz + 15,297,097,015 instructions # 1.86 insn per cycle + 2.983820409 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3202) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -116,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -125,24 +181,24 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.410970e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.710991e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.710991e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.061955 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,856,926,635 cycles # 3.321 GHz - 14,339,013,630 instructions # 2.09 insn per cycle - 2.064983245 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.908170e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.147639e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.147639e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.839910 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 7,866,112,540 cycles # 2.759 GHz + 14,608,431,526 instructions # 1.86 insn per cycle + 2.852893659 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2774) (512y: 304) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -150,8 +206,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -159,24 +215,24 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.045178e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.558583e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.558583e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.612026 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,356,509,237 cycles # 3.320 GHz - 9,776,232,875 instructions # 1.83 insn per cycle - 1.614994589 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.030076e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.169205e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.169205e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.616163 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,699,969,895 cycles # 1.847 GHz + 10,018,865,936 instructions # 1.50 insn per cycle + 3.629335211 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1565) (512y: 216) (512z: 2217) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -184,8 +240,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index b4368bd58..7707f676a 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-15_10:15:57 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + +DATE: 2024-09-15_12:18:46 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.767516e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.784818e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.787795e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.470421 sec +INFO: No Floating Point Exceptions have been reported + 1,978,272,924 cycles # 2.864 GHz + 2,912,164,766 instructions # 1.47 insn per cycle + 0.749211691 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.005244e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.117313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.126114e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.483026 sec +INFO: No Floating Point Exceptions have been reported + 2,024,260,948 cycles # 2.878 GHz + 3,029,497,927 instructions # 1.50 insn per cycle + 0.762830166 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127459e-06 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.444421e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.448614e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.448614e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.119796 sec -INFO: No Floating Point Exceptions have been reported - 420,886,415 cycles # 3.480 GHz - 1,385,953,992 instructions # 3.29 insn per cycle - 0.121147591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3909) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.405701e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.409074e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.409074e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.157165 sec +INFO: No Floating Point Exceptions have been reported + 467,074,127 cycles # 2.919 GHz + 1,389,682,298 instructions # 2.98 insn per cycle + 0.160520641 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3908) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.717042e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.732316e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.732316e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.061404 sec -INFO: No Floating Point Exceptions have been reported - 212,966,152 cycles # 3.405 GHz - 686,398,914 instructions # 3.22 insn per cycle - 0.062812309 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.459230e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.470849e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.470849e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.083614 sec +INFO: No Floating Point Exceptions have been reported + 239,038,405 cycles # 2.765 GHz + 692,921,675 instructions # 2.90 insn per cycle + 0.087016440 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 9483) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.027903e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.036105e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.036105e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.026816 sec -INFO: No Floating Point Exceptions have been reported - 92,252,143 cycles # 3.299 GHz - 252,276,633 instructions # 2.73 insn per cycle - 0.028140147 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.419984e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.425694e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.425694e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.039288 sec +INFO: No Floating Point Exceptions have been reported + 113,366,397 cycles # 2.696 GHz + 257,996,166 instructions # 2.28 insn per cycle + 0.042698199 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8503) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.219256e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.228906e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.228906e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.024561 sec -INFO: No Floating Point Exceptions have been reported - 84,783,562 cycles # 3.299 GHz - 234,825,300 instructions # 2.77 insn per cycle - 0.025940587 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8191) (512y: 130) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.624961e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.632288e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.632288e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.034517 sec +INFO: No Floating Point Exceptions have been reported + 101,263,068 cycles # 2.711 GHz + 239,969,377 instructions # 2.37 insn per cycle + 0.037861089 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8140) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.290752e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.312349e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.312349e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.016845 sec -INFO: No Floating Point Exceptions have been reported - 57,304,566 cycles # 3.181 GHz - 128,817,374 instructions # 2.25 insn per cycle - 0.018196623 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1936) (512y: 104) (512z: 7088) +EvtsPerSec[Rmb+ME] (23) = ( 1.199166e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.204857e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204857e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.046301 sec +INFO: No Floating Point Exceptions have been reported + 89,031,390 cycles # 1.806 GHz + 134,346,666 instructions # 1.51 insn per cycle + 0.049794003 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1943) (512y: 126) (512z: 7090) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index 8c1c9f482..ca3a407fd 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-15_10:16:00 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + +DATE: 2024-09-15_12:18:57 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.802842e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.821481e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.824642e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.470683 sec +INFO: No Floating Point Exceptions have been reported + 1,990,204,132 cycles # 2.870 GHz + 2,908,985,105 instructions # 1.46 insn per cycle + 0.750697991 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.083966e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.205394e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.213656e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.487582 sec +INFO: No Floating Point Exceptions have been reported + 2,025,056,560 cycles # 2.852 GHz + 2,989,421,142 instructions # 1.48 insn per cycle + 0.769117174 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127459e-06 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.456011e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.460423e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.460423e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.118937 sec -INFO: No Floating Point Exceptions have been reported - 417,792,541 cycles # 3.479 GHz - 1,379,641,359 instructions # 3.30 insn per cycle - 0.120266260 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3798) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.394566e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.397741e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.397741e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.157033 sec +INFO: No Floating Point Exceptions have been reported + 465,720,728 cycles # 2.911 GHz + 1,385,003,144 instructions # 2.97 insn per cycle + 0.160593741 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3796) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.764533e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.780277e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.780277e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.060533 sec -INFO: No Floating Point Exceptions have been reported - 209,880,820 cycles # 3.405 GHz - 681,731,886 instructions # 3.25 insn per cycle - 0.061875110 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.474186e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.485931e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.485931e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.082867 sec +INFO: No Floating Point Exceptions have been reported + 237,575,401 cycles # 2.770 GHz + 689,116,420 instructions # 2.90 insn per cycle + 0.086305788 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 9528) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.008345e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.016097e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.016097e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.026538 sec -INFO: No Floating Point Exceptions have been reported - 91,272,438 cycles # 3.299 GHz - 247,460,752 instructions # 2.71 insn per cycle - 0.027848654 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.436754e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.442531e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.442531e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.038039 sec +INFO: No Floating Point Exceptions have been reported + 110,520,646 cycles # 2.700 GHz + 253,448,082 instructions # 2.29 insn per cycle + 0.041474271 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8458) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.213852e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.223373e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.223373e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.024102 sec -INFO: No Floating Point Exceptions have been reported - 83,233,060 cycles # 3.299 GHz - 230,071,653 instructions # 2.76 insn per cycle - 0.025413764 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8149) (512y: 130) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.611056e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.618327e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.618327e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.034052 sec +INFO: No Floating Point Exceptions have been reported + 98,863,837 cycles # 2.687 GHz + 235,605,174 instructions # 2.38 insn per cycle + 0.037353270 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8098) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.297706e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.319278e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.319278e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.016294 sec -INFO: No Floating Point Exceptions have been reported - 55,413,110 cycles # 3.179 GHz - 124,082,984 instructions # 2.24 insn per cycle - 0.017608904 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1892) (512y: 104) (512z: 7092) +EvtsPerSec[Rmb+ME] (23) = ( 1.176506e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181658e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.181658e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.046442 sec +INFO: No Floating Point Exceptions have been reported + 86,647,290 cycles # 1.764 GHz + 129,720,267 instructions # 1.50 insn per cycle + 0.049837932 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1899) (512y: 126) (512z: 7094) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 13046c60a..0df257cc6 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-15_10:16:03 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + +DATE: 2024-09-15_12:19:09 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.204232e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.214249e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.216451e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 +TOTAL : 0.474860 sec +INFO: No Floating Point Exceptions have been reported + 1,958,003,333 cycles # 2.836 GHz + 2,859,472,548 instructions # 1.46 insn per cycle + 0.747968607 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.933159e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.016706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.024424e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.020494e-03 +- 4.025605e-03 ) GeV^-4 +TOTAL : 0.477251 sec +INFO: No Floating Point Exceptions have been reported + 1,992,584,596 cycles # 2.867 GHz + 2,884,692,368 instructions # 1.45 insn per cycle + 0.751767434 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272869669930272E-006 +Relative difference = 4.548524165778887e-06 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.493404e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.497770e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.497770e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 -TOTAL : 0.118472 sec -INFO: No Floating Point Exceptions have been reported - 416,153,723 cycles # 3.479 GHz - 1,373,194,564 instructions # 3.30 insn per cycle - 0.119838875 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3040) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.431583e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.434922e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.434922e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.155945 sec +INFO: No Floating Point Exceptions have been reported + 462,855,819 cycles # 2.915 GHz + 1,381,844,785 instructions # 2.99 insn per cycle + 0.159290331 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127811e-06 Avg ME (F77/C++) = 8.1278105271212486E-006 Relative difference = 5.8180333155894157e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.714272e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.720588e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.720588e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 -TOTAL : 0.031565 sec -INFO: No Floating Point Exceptions have been reported - 111,690,136 cycles # 3.415 GHz - 366,492,877 instructions # 3.28 insn per cycle - 0.032882152 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.210882e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.215211e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.215211e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.045512 sec +INFO: No Floating Point Exceptions have been reported + 131,360,157 cycles # 2.718 GHz + 372,013,509 instructions # 2.83 insn per cycle + 0.048801319 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:10141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127809e-06 Avg ME (F77/C++) = 8.1278090510674588E-006 Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.868303e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.899940e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.899940e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275184e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.014400 sec -INFO: No Floating Point Exceptions have been reported - 51,254,242 cycles # 3.298 GHz - 137,758,048 instructions # 2.69 insn per cycle - 0.015737791 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.769306e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.791871e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.791871e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.021120 sec +INFO: No Floating Point Exceptions have been reported + 64,157,831 cycles # 2.680 GHz + 142,829,765 instructions # 2.23 insn per cycle + 0.024479209 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9251) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.077129e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.112499e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.112499e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275184e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.013698 sec -INFO: No Floating Point Exceptions have been reported - 49,032,194 cycles # 3.297 GHz - 127,942,099 instructions # 2.61 insn per cycle - 0.015061334 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9018) (512y: 8) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.078002e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.105354e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.105354e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.019156 sec +INFO: No Floating Point Exceptions have been reported + 59,143,033 cycles # 2.685 GHz + 132,774,537 instructions # 2.24 insn per cycle + 0.022562262 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8963) (512y: 28) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.318843e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.408986e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.408986e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.009146 sec -INFO: No Floating Point Exceptions have been reported - 32,185,576 cycles # 3.131 GHz - 74,586,891 instructions # 2.32 insn per cycle - 0.010449947 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2837) (512y: 8) (512z: 7442) +EvtsPerSec[Rmb+ME] (23) = ( 2.363857e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.386270e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.386270e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.024531 sec +INFO: No Floating Point Exceptions have been reported + 51,349,038 cycles # 1.867 GHz + 79,557,658 instructions # 1.55 insn per cycle + 0.028087213 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2834) (512y: 32) (512z: 7442) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275369863475849E-006 Relative difference = 1.6797726498700304e-09 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 7dfca0c84..1f9290161 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-15_10:16:05 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + +DATE: 2024-09-15_12:19:19 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.234747e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.244150e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.246143e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 +TOTAL : 0.473072 sec +INFO: No Floating Point Exceptions have been reported + 1,983,051,703 cycles # 2.868 GHz + 2,920,120,611 instructions # 1.47 insn per cycle + 0.748230768 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.099236e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.191571e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.199199e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.020496e-03 +- 4.025606e-03 ) GeV^-4 +TOTAL : 0.474597 sec +INFO: No Floating Point Exceptions have been reported + 1,988,967,454 cycles # 2.876 GHz + 2,944,964,203 instructions # 1.48 insn per cycle + 0.748107743 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272866419447706E-006 +Relative difference = 4.508529302013153e-06 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.475067e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.479498e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.479498e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 -TOTAL : 0.118415 sec -INFO: No Floating Point Exceptions have been reported - 415,034,055 cycles # 3.471 GHz - 1,368,251,607 instructions # 3.30 insn per cycle - 0.119813579 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2920) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.448809e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.452114e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.452114e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.154406 sec +INFO: No Floating Point Exceptions have been reported + 460,841,033 cycles # 2.931 GHz + 1,376,637,796 instructions # 2.99 insn per cycle + 0.157690889 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2930) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127811e-06 Avg ME (F77/C++) = 8.1278105271212486E-006 Relative difference = 5.8180333155894157e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.711055e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.717444e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.717444e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 -TOTAL : 0.031100 sec -INFO: No Floating Point Exceptions have been reported - 109,822,005 cycles # 3.407 GHz - 361,653,646 instructions # 3.29 insn per cycle - 0.032413176 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.217964e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.222354e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.222354e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.044518 sec +INFO: No Floating Point Exceptions have been reported + 129,447,390 cycles # 2.729 GHz + 367,192,934 instructions # 2.84 insn per cycle + 0.047990838 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:10124) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127809e-06 Avg ME (F77/C++) = 8.1278090510674588E-006 Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.887373e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.920634e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.920634e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275184e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.013809 sec -INFO: No Floating Point Exceptions have been reported - 49,261,008 cycles # 3.297 GHz - 132,936,286 instructions # 2.70 insn per cycle - 0.015128154 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.769546e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.792490e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.792490e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.020349 sec +INFO: No Floating Point Exceptions have been reported + 62,145,033 cycles # 2.684 GHz + 138,048,264 instructions # 2.22 insn per cycle + 0.023682982 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9205) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.286972e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.326938e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.326938e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275184e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.012541 sec -INFO: No Floating Point Exceptions have been reported - 45,154,884 cycles # 3.297 GHz - 123,161,944 instructions # 2.73 insn per cycle - 0.013880398 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8974) (512y: 8) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.058079e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.086570e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.086570e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.018477 sec +INFO: No Floating Point Exceptions have been reported + 56,677,502 cycles # 2.660 GHz + 127,963,925 instructions # 2.26 insn per cycle + 0.021825959 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8919) (512y: 28) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.275200e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.362894e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.362894e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.008670 sec -INFO: No Floating Point Exceptions have been reported - 30,678,765 cycles # 3.122 GHz - 69,803,910 instructions # 2.28 insn per cycle - 0.010014093 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2792) (512y: 8) (512z: 7444) +EvtsPerSec[Rmb+ME] (23) = ( 2.337142e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.358958e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.358958e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.023953 sec +INFO: No Floating Point Exceptions have been reported + 48,824,483 cycles # 1.820 GHz + 74,785,723 instructions # 1.53 insn per cycle + 0.027430916 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2789) (512y: 32) (512z: 7444) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275369863475849E-006 Relative difference = 1.6797726498700304e-09 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 88c198c1a..c9ae97348 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-15_10:16:07 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + +DATE: 2024-09-15_12:19:30 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.749294e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.767595e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.770609e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.467392 sec +INFO: No Floating Point Exceptions have been reported + 1,983,595,553 cycles # 2.874 GHz + 2,922,486,219 instructions # 1.47 insn per cycle + 0.746529670 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.927630e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.040034e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.047831e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.485867 sec +INFO: No Floating Point Exceptions have been reported + 2,031,462,606 cycles # 2.875 GHz + 3,037,983,552 instructions # 1.50 insn per cycle + 0.765937206 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127459e-06 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.428332e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.432465e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.432465e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.120211 sec -INFO: No Floating Point Exceptions have been reported - 422,360,891 cycles # 3.480 GHz - 1,394,565,931 instructions # 3.30 insn per cycle - 0.121598088 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3896) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.382949e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.386200e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.386200e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.158173 sec +INFO: No Floating Point Exceptions have been reported + 471,387,733 cycles # 2.929 GHz + 1,398,281,899 instructions # 2.97 insn per cycle + 0.161473463 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.917732e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.933577e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.933577e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.060055 sec -INFO: No Floating Point Exceptions have been reported - 210,080,708 cycles # 3.433 GHz - 682,364,178 instructions # 3.25 insn per cycle - 0.061434237 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.673807e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.686050e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.686050e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.080997 sec +INFO: No Floating Point Exceptions have been reported + 235,160,008 cycles # 2.808 GHz + 688,033,850 instructions # 2.93 insn per cycle + 0.084339129 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 9328) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.027555e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.036103e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.036103e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.026883 sec -INFO: No Floating Point Exceptions have been reported - 92,407,630 cycles # 3.299 GHz - 247,832,707 instructions # 2.68 insn per cycle - 0.028189869 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.415459e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.422136e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.422136e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.039362 sec +INFO: No Floating Point Exceptions have been reported + 112,339,380 cycles # 2.665 GHz + 253,052,093 instructions # 2.25 insn per cycle + 0.042695307 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8363) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.225908e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.235767e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.235767e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.024494 sec -INFO: No Floating Point Exceptions have been reported - 84,582,389 cycles # 3.298 GHz - 228,938,148 instructions # 2.71 insn per cycle - 0.025842335 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7552) (512y: 126) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.648852e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.656658e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.656658e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.034079 sec +INFO: No Floating Point Exceptions have been reported + 100,217,114 cycles # 2.715 GHz + 233,607,212 instructions # 2.33 insn per cycle + 0.037476380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7501) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.253598e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.274772e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.274772e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.017044 sec -INFO: No Floating Point Exceptions have been reported - 57,563,203 cycles # 3.160 GHz - 127,860,719 instructions # 2.22 insn per cycle - 0.018401076 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2077) (512y: 100) (512z: 6354) +EvtsPerSec[Rmb+ME] (23) = ( 1.192314e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.197366e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.197366e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.046499 sec +INFO: No Floating Point Exceptions have been reported + 89,493,670 cycles # 1.812 GHz + 133,128,515 instructions # 1.49 insn per cycle + 0.049962595 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2084) (512y: 122) (512z: 6356) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index da2853bd2..1d81f994c 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,46 +1,117 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-15_10:16:10 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + +DATE: 2024-09-15_12:19:41 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.765961e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.783708e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.789350e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.474700 sec +INFO: No Floating Point Exceptions have been reported + 1,974,719,372 cycles # 2.833 GHz + 2,899,642,626 instructions # 1.47 insn per cycle + 0.754986373 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.058757e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.171730e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.179415e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.484755 sec +INFO: No Floating Point Exceptions have been reported + 2,032,400,386 cycles # 2.878 GHz + 3,034,442,470 instructions # 1.49 insn per cycle + 0.765490241 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127459e-06 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.423910e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.428242e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.428242e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.119800 sec -INFO: No Floating Point Exceptions have been reported - 420,923,349 cycles # 3.480 GHz - 1,388,131,684 instructions # 3.30 insn per cycle - 0.121174856 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3795) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.419840e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.423095e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.423095e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.155760 sec +INFO: No Floating Point Exceptions have been reported + 467,249,665 cycles # 2.946 GHz + 1,393,566,061 instructions # 2.98 insn per cycle + 0.159156822 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -48,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.972239e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.988815e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.988815e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.059165 sec -INFO: No Floating Point Exceptions have been reported - 207,112,754 cycles # 3.435 GHz - 677,713,138 instructions # 3.27 insn per cycle - 0.060528059 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.647634e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.659890e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.659890e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.080628 sec +INFO: No Floating Point Exceptions have been reported + 234,377,416 cycles # 2.808 GHz + 684,139,763 instructions # 2.92 insn per cycle + 0.083918243 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 9361) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -80,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.059520e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.067745e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.067745e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.025893 sec -INFO: No Floating Point Exceptions have been reported - 89,170,673 cycles # 3.299 GHz - 243,037,983 instructions # 2.73 insn per cycle - 0.027255216 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.444361e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.450464e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.450464e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037847 sec +INFO: No Floating Point Exceptions have been reported + 110,057,998 cycles # 2.704 GHz + 248,602,467 instructions # 2.26 insn per cycle + 0.041225455 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8316) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -112,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.236711e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.246698e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.246698e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.023854 sec -INFO: No Floating Point Exceptions have been reported - 82,587,275 cycles # 3.298 GHz - 224,111,268 instructions # 2.71 insn per cycle - 0.025226021 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7503) (512y: 126) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.658647e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.666343e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.666343e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.033122 sec +INFO: No Floating Point Exceptions have been reported + 97,824,445 cycles # 2.731 GHz + 229,151,030 instructions # 2.34 insn per cycle + 0.036353420 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7452) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -144,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.248272e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.269741e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.269741e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.016537 sec -INFO: No Floating Point Exceptions have been reported - 55,612,509 cycles # 3.146 GHz - 123,069,837 instructions # 2.21 insn per cycle - 0.017867245 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2028) (512y: 100) (512z: 6354) +EvtsPerSec[Rmb+ME] (23) = ( 1.188861e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.193842e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.193842e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.046038 sec +INFO: No Floating Point Exceptions have been reported + 87,629,988 cycles # 1.795 GHz + 128,556,729 instructions # 1.47 insn per cycle + 0.049600721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2035) (512y: 122) (512z: 6356) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -176,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index def33d7a2..808bf6828 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-15_10:15:33 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + +DATE: 2024-09-15_12:17:33 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.071674e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.333003e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.756234e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.519163 sec +INFO: No Floating Point Exceptions have been reported + 2,179,162,165 cycles # 2.882 GHz + 3,070,881,799 instructions # 1.41 insn per cycle + 0.812256060 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.477196e-01 +Avg ME (F77/GPU) = 0.14771956172964262 +Relative difference = 2.590743366698123e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.239338e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.403334e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.403334e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.977633 sec -INFO: No Floating Point Exceptions have been reported - 3,426,208,543 cycles # 3.494 GHz - 9,703,135,034 instructions # 2.83 insn per cycle - 0.980872397 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.736392e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.961242e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.961242e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.358463 sec +INFO: No Floating Point Exceptions have been reported + 3,905,907,731 cycles # 2.851 GHz + 9,863,781,254 instructions # 2.53 insn per cycle + 1.371009162 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.207782e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.835190e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.835190e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.603514 sec -INFO: No Floating Point Exceptions have been reported - 2,116,359,405 cycles # 3.491 GHz - 5,906,343,418 instructions # 2.79 insn per cycle - 0.606525847 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.459599e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.873378e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.873378e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.877453 sec +INFO: No Floating Point Exceptions have been reported + 2,486,018,663 cycles # 2.796 GHz + 6,068,811,134 instructions # 2.44 insn per cycle + 0.890013058 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1369) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.312964e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.879135e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.879135e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.440778 sec -INFO: No Floating Point Exceptions have been reported - 1,507,782,910 cycles # 3.400 GHz - 3,276,943,155 instructions # 2.17 insn per cycle - 0.443726291 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.202336e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.241982e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.241982e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.633614 sec +INFO: No Floating Point Exceptions have been reported + 1,818,277,006 cycles # 2.816 GHz + 3,450,832,845 instructions # 1.90 insn per cycle + 0.646259584 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1499) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.412807e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.088806e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.088806e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.433077 sec -INFO: No Floating Point Exceptions have been reported - 1,481,581,258 cycles # 3.400 GHz - 3,215,241,147 instructions # 2.17 insn per cycle - 0.436053276 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.283565e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.391222e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.391222e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.617501 sec +INFO: No Floating Point Exceptions have been reported + 1,780,688,704 cycles # 2.829 GHz + 3,420,263,634 instructions # 1.92 insn per cycle + 0.630172459 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1375) (512y: 96) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.271863e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.225393e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.225393e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.371972 sec -INFO: No Floating Point Exceptions have been reported - 1,276,412,330 cycles # 3.407 GHz - 2,366,391,986 instructions # 1.85 insn per cycle - 0.374909218 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.121818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.051040e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.051040e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.653754 sec +INFO: No Floating Point Exceptions have been reported + 1,527,075,900 cycles # 2.294 GHz + 2,560,289,188 instructions # 1.68 insn per cycle + 0.666212420 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 580) (512y: 60) (512z: 1021) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index 285ad8b33..06cbb3e92 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-15_10:15:38 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + +DATE: 2024-09-15_12:17:46 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.969739e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.449933e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.971179e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.523956 sec +INFO: No Floating Point Exceptions have been reported + 2,197,273,631 cycles # 2.867 GHz + 3,116,260,127 instructions # 1.42 insn per cycle + 0.822598423 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.477196e-01 +Avg ME (F77/GPU) = 0.14771956172964262 +Relative difference = 2.590743366698123e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.235675e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.396806e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.396806e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.978638 sec -INFO: No Floating Point Exceptions have been reported - 3,431,903,160 cycles # 3.497 GHz - 9,585,068,417 instructions # 2.79 insn per cycle - 0.981537814 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 9.011162e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.031790e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.031790e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.321281 sec +INFO: No Floating Point Exceptions have been reported + 3,893,594,822 cycles # 2.920 GHz + 9,744,555,445 instructions # 2.50 insn per cycle + 1.334263922 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.183080e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.797565e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.797565e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.609452 sec -INFO: No Floating Point Exceptions have been reported - 2,138,146,178 cycles # 3.492 GHz - 5,851,586,431 instructions # 2.74 insn per cycle - 0.612571643 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.381914e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.838565e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.838565e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.917902 sec +INFO: No Floating Point Exceptions have been reported + 2,659,815,270 cycles # 2.862 GHz + 6,026,660,919 instructions # 2.27 insn per cycle + 0.930359460 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1335) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.297670e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.859580e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.859580e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.442580 sec -INFO: No Floating Point Exceptions have been reported - 1,512,766,739 cycles # 3.398 GHz - 3,249,594,241 instructions # 2.15 insn per cycle - 0.445504934 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.192597e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.249251e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.249251e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.636612 sec +INFO: No Floating Point Exceptions have been reported + 1,830,188,885 cycles # 2.821 GHz + 3,421,758,036 instructions # 1.87 insn per cycle + 0.649438298 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1436) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.440790e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.146878e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.146878e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.429974 sec -INFO: No Floating Point Exceptions have been reported - 1,472,025,211 cycles # 3.403 GHz - 3,192,051,214 instructions # 2.17 insn per cycle - 0.432871908 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.271531e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.393290e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.393290e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.619901 sec +INFO: No Floating Point Exceptions have been reported + 1,782,873,131 cycles # 2.820 GHz + 3,395,941,059 instructions # 1.90 insn per cycle + 0.632832806 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1328) (512y: 96) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.270100e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.150808e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.150808e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.371647 sec -INFO: No Floating Point Exceptions have been reported - 1,275,259,231 cycles # 3.405 GHz - 2,351,213,572 instructions # 1.84 insn per cycle - 0.374728190 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.134142e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.070820e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.070820e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.650853 sec +INFO: No Floating Point Exceptions have been reported + 1,547,796,135 cycles # 2.334 GHz + 2,545,431,106 instructions # 1.64 insn per cycle + 0.664117617 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 547) (512y: 60) (512z: 1007) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index f6664057d..6bbc9fb0d 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-15_10:15:42 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + +DATE: 2024-09-15_12:17:58 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.000684e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.992629e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.388281e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 +TOTAL : 0.488697 sec +INFO: No Floating Point Exceptions have been reported + 2,047,674,909 cycles # 2.864 GHz + 2,921,802,724 instructions # 1.43 insn per cycle + 0.773667864 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 97 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.477195e-01 +Avg ME (F77/GPU) = 0.14771956735057756 +Relative difference = 4.559355911674916e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.309459e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.517250e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.517250e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283184e-05 ) GeV^0 -TOTAL : 0.918074 sec -INFO: No Floating Point Exceptions have been reported - 3,219,436,139 cycles # 3.497 GHz - 9,589,445,730 instructions # 2.98 insn per cycle - 0.920785803 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 9.029870e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.042886e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.042886e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 1.272870 sec +INFO: No Floating Point Exceptions have been reported + 3,752,313,957 cycles # 2.930 GHz + 9,659,106,684 instructions # 2.57 insn per cycle + 1.281538641 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956094773486 Relative difference = 2.643675256627469e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.168121e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.797547e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.797547e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283184e-05 ) GeV^0 -TOTAL : 0.446005 sec -INFO: No Floating Point Exceptions have been reported - 1,567,145,442 cycles # 3.494 GHz - 3,967,318,763 instructions # 2.53 insn per cycle - 0.448719398 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.197021e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.333033e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.333033e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 0.588107 sec +INFO: No Floating Point Exceptions have been reported + 1,715,029,446 cycles # 2.877 GHz + 4,025,277,973 instructions # 2.35 insn per cycle + 0.596790312 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1579) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955861942843 Relative difference = 2.80129187869649e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.398705e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.597097e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.597097e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.348766 sec -INFO: No Floating Point Exceptions have been reported - 1,196,981,389 cycles # 3.408 GHz - 2,476,155,949 instructions # 2.07 insn per cycle - 0.351452047 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.961392e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.263984e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.263984e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.463961 sec +INFO: No Floating Point Exceptions have been reported + 1,335,854,072 cycles # 2.831 GHz + 2,555,445,671 instructions # 1.91 insn per cycle + 0.472547002 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1924) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955698961392 Relative difference = 2.9116235141448046e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.516497e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.949939e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.949939e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.344298 sec -INFO: No Floating Point Exceptions have been reported - 1,183,378,251 cycles # 3.413 GHz - 2,416,642,373 instructions # 2.04 insn per cycle - 0.346996009 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.064902e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.593039e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.593039e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.451960 sec +INFO: No Floating Point Exceptions have been reported + 1,308,601,918 cycles # 2.845 GHz + 2,529,434,362 instructions # 1.93 insn per cycle + 0.460618771 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1867) (512y: 1) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955698961392 Relative difference = 2.9116235141448046e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.296619e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.100097e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.100097e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283177e-05 ) GeV^0 -TOTAL : 0.311531 sec -INFO: No Floating Point Exceptions have been reported - 1,071,207,326 cycles # 3.411 GHz - 2,020,810,284 instructions # 1.89 insn per cycle - 0.314240853 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.884808e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.904943e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.904943e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 +TOTAL : 0.475883 sec +INFO: No Floating Point Exceptions have been reported + 1,154,211,341 cycles # 2.384 GHz + 2,131,381,757 instructions # 1.85 insn per cycle + 0.484642507 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1011) (512y: 5) (512z: 1292) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955262403935 Relative difference = 3.207154680524219e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index c8ea227e1..5f533fb3c 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-15_10:15:45 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + +DATE: 2024-09-15_12:18:10 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.019765e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.955728e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.339790e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 +TOTAL : 0.485800 sec +INFO: No Floating Point Exceptions have been reported + 2,046,836,184 cycles # 2.868 GHz + 2,861,763,521 instructions # 1.40 insn per cycle + 0.770456449 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 86 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.477195e-01 +Avg ME (F77/GPU) = 0.14771956525510177 +Relative difference = 4.4175008557828484e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.308667e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.516598e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.516598e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283184e-05 ) GeV^0 -TOTAL : 0.917486 sec -INFO: No Floating Point Exceptions have been reported - 3,217,291,273 cycles # 3.497 GHz - 9,459,142,701 instructions # 2.94 insn per cycle - 0.920206683 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 9.176079e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.058039e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.058039e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 1.251903 sec +INFO: No Floating Point Exceptions have been reported + 3,703,836,740 cycles # 2.940 GHz + 9,528,821,992 instructions # 2.57 insn per cycle + 1.260572218 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 367) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956094773486 Relative difference = 2.643675256627469e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.160141e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.788053e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.788053e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283184e-05 ) GeV^0 -TOTAL : 0.447583 sec -INFO: No Floating Point Exceptions have been reported - 1,571,707,613 cycles # 3.491 GHz - 3,933,040,708 instructions # 2.50 insn per cycle - 0.450484892 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.192175e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.322296e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.322296e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 0.587889 sec +INFO: No Floating Point Exceptions have been reported + 1,712,573,858 cycles # 2.874 GHz + 3,991,164,090 instructions # 2.33 insn per cycle + 0.596469979 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1517) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955861942843 Relative difference = 2.80129187869649e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.368931e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.575759e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.575759e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.350512 sec -INFO: No Floating Point Exceptions have been reported - 1,199,348,200 cycles # 3.398 GHz - 2,460,374,556 instructions # 2.05 insn per cycle - 0.353272501 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.984131e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.291891e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.291891e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.460621 sec +INFO: No Floating Point Exceptions have been reported + 1,332,768,943 cycles # 2.844 GHz + 2,539,760,549 instructions # 1.91 insn per cycle + 0.469223881 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1815) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955698961392 Relative difference = 2.9116235141448046e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.513547e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.932855e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.932855e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.344019 sec -INFO: No Floating Point Exceptions have been reported - 1,182,212,065 cycles # 3.412 GHz - 2,403,543,524 instructions # 2.03 insn per cycle - 0.346714362 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.068338e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.608348e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.608348e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.451660 sec +INFO: No Floating Point Exceptions have been reported + 1,303,705,490 cycles # 2.835 GHz + 2,516,660,988 instructions # 1.93 insn per cycle + 0.460426647 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1776) (512y: 1) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955698961392 Relative difference = 2.9116235141448046e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.320964e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.110043e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.110043e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283177e-05 ) GeV^0 -TOTAL : 0.309454 sec -INFO: No Floating Point Exceptions have been reported - 1,068,182,236 cycles # 3.422 GHz - 2,004,694,606 instructions # 1.88 insn per cycle - 0.312419782 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.904674e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.952335e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.952335e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 +TOTAL : 0.470523 sec +INFO: No Floating Point Exceptions have been reported + 1,148,816,748 cycles # 2.401 GHz + 2,115,600,264 instructions # 1.84 insn per cycle + 0.478989217 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 906) (512y: 5) (512z: 1273) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955262403935 Relative difference = 3.207154680524219e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 6e01b3f2b..45ada3a90 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-15_10:15:49 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + +DATE: 2024-09-15_12:18:21 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.081665e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.353918e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.800720e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.522074 sec +INFO: No Floating Point Exceptions have been reported + 2,188,417,927 cycles # 2.886 GHz + 3,109,980,535 instructions # 1.42 insn per cycle + 0.814803667 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.477196e-01 +Avg ME (F77/GPU) = 0.14771956187351573 +Relative difference = 2.5810037581511336e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.205997e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.359879e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.359879e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 1.001738 sec -INFO: No Floating Point Exceptions have been reported - 3,511,598,446 cycles # 3.497 GHz - 9,728,214,340 instructions # 2.77 insn per cycle - 1.004628780 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.910495e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.017920e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.017920e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.334640 sec +INFO: No Floating Point Exceptions have been reported + 3,942,717,867 cycles # 2.929 GHz + 9,888,397,619 instructions # 2.51 insn per cycle + 1.346816311 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.216488e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.847973e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.847973e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.603318 sec -INFO: No Floating Point Exceptions have been reported - 2,118,570,678 cycles # 3.496 GHz - 5,885,625,123 instructions # 2.78 insn per cycle - 0.606257219 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1411) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.550381e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.026476e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.026476e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.835395 sec +INFO: No Floating Point Exceptions have been reported + 2,474,407,927 cycles # 2.921 GHz + 6,051,781,084 instructions # 2.45 insn per cycle + 0.847852996 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1410) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.465260e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.213835e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.213835e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.426842 sec -INFO: No Floating Point Exceptions have been reported - 1,461,786,293 cycles # 3.404 GHz - 3,205,338,171 instructions # 2.19 insn per cycle - 0.429743140 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.251869e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.352067e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.352067e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.625126 sec +INFO: No Floating Point Exceptions have been reported + 1,795,351,792 cycles # 2.819 GHz + 3,389,782,871 instructions # 1.89 insn per cycle + 0.637929251 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1567) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956674392650 Relative difference = 2.2512972893324335e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.549936e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.396224e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.396224e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.421673 sec -INFO: No Floating Point Exceptions have been reported - 1,445,030,348 cycles # 3.405 GHz - 3,128,814,798 instructions # 2.17 insn per cycle - 0.424563119 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.324637e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.489814e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.489814e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.609151 sec +INFO: No Floating Point Exceptions have been reported + 1,759,711,411 cycles # 2.834 GHz + 3,345,109,138 instructions # 1.90 insn per cycle + 0.621588850 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1446) (512y: 101) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956674392650 Relative difference = 2.2512972893324335e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.425601e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.638472e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.638472e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.363091 sec -INFO: No Floating Point Exceptions have been reported - 1,245,984,523 cycles # 3.407 GHz - 2,305,509,027 instructions # 1.85 insn per cycle - 0.365983048 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.155541e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.119089e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.119089e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.645680 sec +INFO: No Floating Point Exceptions have been reported + 1,520,276,942 cycles # 2.311 GHz + 2,512,095,426 instructions # 1.65 insn per cycle + 0.658351218 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 768) (512y: 64) (512z: 1063) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956674392650 Relative difference = 2.2512972893324335e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index 6a6cefc03..635fef145 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-15_10:15:53 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + +DATE: 2024-09-15_12:18:34 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.163952e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.460728e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.975996e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.521235 sec +INFO: No Floating Point Exceptions have been reported + 2,178,235,183 cycles # 2.877 GHz + 3,088,126,574 instructions # 1.42 insn per cycle + 0.814490194 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.477196e-01 +Avg ME (F77/GPU) = 0.14771956187351573 +Relative difference = 2.5810037581511336e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.225002e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.384289e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.384289e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.987736 sec -INFO: No Floating Point Exceptions have been reported - 3,460,736,345 cycles # 3.495 GHz - 9,619,000,237 instructions # 2.78 insn per cycle - 0.990645439 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.905010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.017534e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.017534e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.334358 sec +INFO: No Floating Point Exceptions have been reported + 3,930,631,045 cycles # 2.921 GHz + 9,778,615,750 instructions # 2.49 insn per cycle + 1.346795690 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.218899e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.855080e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.855080e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.602725 sec -INFO: No Floating Point Exceptions have been reported - 2,115,922,927 cycles # 3.495 GHz - 5,815,400,729 instructions # 2.75 insn per cycle - 0.605657692 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1369) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.520527e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.978156e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.978156e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.848228 sec +INFO: No Floating Point Exceptions have been reported + 2,460,773,168 cycles # 2.862 GHz + 5,993,984,003 instructions # 2.44 insn per cycle + 0.860657174 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1368) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.454131e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.199755e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.199755e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.428498 sec -INFO: No Floating Point Exceptions have been reported - 1,465,409,132 cycles # 3.397 GHz - 3,163,298,818 instructions # 2.16 insn per cycle - 0.431563016 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.233001e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.325225e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.325225e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.627322 sec +INFO: No Floating Point Exceptions have been reported + 1,810,072,132 cycles # 2.832 GHz + 3,352,499,816 instructions # 1.85 insn per cycle + 0.639697989 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1483) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956674392650 Relative difference = 2.2512972893324335e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.555702e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.414319e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.414319e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.420391 sec -INFO: No Floating Point Exceptions have been reported - 1,440,022,287 cycles # 3.404 GHz - 3,094,465,013 instructions # 2.15 insn per cycle - 0.423339516 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.331445e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.508166e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508166e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.606762 sec +INFO: No Floating Point Exceptions have been reported + 1,747,202,335 cycles # 2.825 GHz + 3,316,993,487 instructions # 1.90 insn per cycle + 0.619170203 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1382) (512y: 101) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956674392650 Relative difference = 2.2512972893324335e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.423443e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.634483e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.634483e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.363262 sec -INFO: No Floating Point Exceptions have been reported - 1,247,596,693 cycles # 3.408 GHz - 2,286,371,480 instructions # 1.83 insn per cycle - 0.366411865 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 712) (512y: 64) (512z: 1052) +EvtsPerSec[Rmb+ME] (23) = ( 2.144157e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.097547e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.097547e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.647875 sec +INFO: No Floating Point Exceptions have been reported + 1,527,422,709 cycles # 2.315 GHz + 2,496,191,682 instructions # 1.63 insn per cycle + 0.660479795 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 716) (512y: 64) (512z: 1054) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956674392650 Relative difference = 2.2512972893324335e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index ee61e9123..0f0996a4b 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_10:14:20 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_12:15:09 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.705596e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.093007e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.806904e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.547454 sec +INFO: No Floating Point Exceptions have been reported + 2,244,643,125 cycles # 2.856 GHz + 3,194,753,552 instructions # 1.42 insn per cycle + 0.844381889 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015836e+00 +Avg ME (F77/GPU) = 2.0158358666195562 +Relative difference = 6.616631711254798e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.393314e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.448313e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.448313e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 4.508852 sec -INFO: No Floating Point Exceptions have been reported - 15,780,850,537 cycles # 3.498 GHz - 46,014,375,021 instructions # 2.92 insn per cycle - 4.511668051 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 623) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.817695e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.865026e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.865026e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.917395 sec +INFO: No Floating Point Exceptions have been reported + 17,420,747,172 cycles # 2.939 GHz + 46,039,408,535 instructions # 2.64 insn per cycle + 5.929443281 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194407 Relative difference = 6.616637439061751e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.340157e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.537925e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.537925e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.542216 sec -INFO: No Floating Point Exceptions have been reported - 8,901,691,283 cycles # 3.498 GHz - 27,763,795,697 instructions # 3.12 insn per cycle - 2.545182871 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.177458e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.337417e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.337417e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.449474 sec +INFO: No Floating Point Exceptions have been reported + 10,167,811,545 cycles # 2.940 GHz + 27,922,488,818 instructions # 2.75 insn per cycle + 3.461267593 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2533) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.065603e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.584756e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.584756e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.605974 sec -INFO: No Floating Point Exceptions have been reported - 5,351,077,605 cycles # 3.326 GHz - 12,517,636,881 instructions # 2.34 insn per cycle - 1.608907126 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.001375e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.394642e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.394642e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.246102 sec +INFO: No Floating Point Exceptions have been reported + 6,225,214,133 cycles # 2.758 GHz + 12,703,481,596 instructions # 2.04 insn per cycle + 2.257992148 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2620) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.208117e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.751863e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.751863e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.578323 sec -INFO: No Floating Point Exceptions have been reported - 5,260,374,252 cycles # 3.327 GHz - 12,054,678,780 instructions # 2.29 insn per cycle - 1.581300998 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2415) (512y: 124) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.452222e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.920256e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.920256e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.070361 sec +INFO: No Floating Point Exceptions have been reported + 5,740,692,800 cycles # 2.758 GHz + 12,120,362,498 instructions # 2.11 insn per cycle + 2.082196362 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2363) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.884211e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.526121e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.526121e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.452653 sec -INFO: No Floating Point Exceptions have been reported - 4,842,197,135 cycles # 3.327 GHz - 8,210,718,102 instructions # 1.70 insn per cycle - 1.455527947 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1461) (512y: 100) (512z: 1804) +EvtsPerSec[Rmb+ME] (23) = ( 3.496236e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.681187e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.681187e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.148263 sec +INFO: No Floating Point Exceptions have been reported + 5,893,231,770 cycles # 1.865 GHz + 8,460,083,225 instructions # 1.44 insn per cycle + 3.160116132 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1468) (512y: 122) (512z: 1806) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index 0b957aca8..b863aa4b8 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_10:14:33 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_12:15:34 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.254248e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.331434e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002046e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.532593 sec +INFO: No Floating Point Exceptions have been reported + 2,206,521,572 cycles # 2.874 GHz + 3,181,038,873 instructions # 1.44 insn per cycle + 0.824867346 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015836e+00 +Avg ME (F77/GPU) = 2.0158358666195562 +Relative difference = 6.616631711254798e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.439573e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.496721e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.496721e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 4.425043 sec -INFO: No Floating Point Exceptions have been reported - 15,487,510,898 cycles # 3.498 GHz - 44,984,365,577 instructions # 2.90 insn per cycle - 4.428008430 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.847951e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.897550e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.897550e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.824198 sec +INFO: No Floating Point Exceptions have been reported + 17,074,611,956 cycles # 2.927 GHz + 45,037,522,622 instructions # 2.64 insn per cycle + 5.835488505 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 566) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.526356e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.741279e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.741279e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.441641 sec -INFO: No Floating Point Exceptions have been reported - 8,551,659,131 cycles # 3.499 GHz - 26,606,326,211 instructions # 3.11 insn per cycle - 2.444586586 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.339846e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.517622e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.517622e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.286951 sec +INFO: No Floating Point Exceptions have been reported + 9,688,702,526 cycles # 2.938 GHz + 26,805,473,197 instructions # 2.77 insn per cycle + 3.298888236 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2327) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.421573e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.848356e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.848356e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.753601 sec -INFO: No Floating Point Exceptions have been reported - 5,838,232,155 cycles # 3.324 GHz - 14,053,999,011 instructions # 2.41 insn per cycle - 1.756562397 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.556976e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.882070e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.882070e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.448521 sec +INFO: No Floating Point Exceptions have been reported + 6,771,268,311 cycles # 2.753 GHz + 14,227,806,494 instructions # 2.10 insn per cycle + 2.460277833 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2704) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.662082e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.118885e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.118885e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.696280 sec -INFO: No Floating Point Exceptions have been reported - 5,649,153,542 cycles # 3.325 GHz - 13,619,007,594 instructions # 2.41 insn per cycle - 1.699385729 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.776530e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.130549e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.130549e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.341767 sec +INFO: No Floating Point Exceptions have been reported + 6,488,711,878 cycles # 2.758 GHz + 13,822,301,429 instructions # 2.13 insn per cycle + 2.353629315 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2355) (512y: 297) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.843575e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.482984e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.482984e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.458946 sec -INFO: No Floating Point Exceptions have been reported - 4,852,628,227 cycles # 3.323 GHz - 10,027,017,335 instructions # 2.07 insn per cycle - 1.461937024 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.365014e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.535721e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.535721e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.263382 sec +INFO: No Floating Point Exceptions have been reported + 6,085,804,948 cycles # 1.859 GHz + 10,219,161,569 instructions # 1.68 insn per cycle + 3.275179492 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1318) (512y: 208) (512z: 1986) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index b38525a55..cf83c07d4 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_10:14:46 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_12:16:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.223710e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.732879e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.860967e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 +TOTAL : 0.489964 sec +INFO: No Floating Point Exceptions have been reported + 2,041,886,975 cycles # 2.853 GHz + 2,932,689,889 instructions # 1.44 insn per cycle + 0.773926194 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015841e+00 +Avg ME (F77/GPU) = 2.0158787037944421 +Relative difference = 1.870375413642407e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.674577e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.752131e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.752131e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 -TOTAL : 4.035639 sec -INFO: No Floating Point Exceptions have been reported - 14,119,422,643 cycles # 3.497 GHz - 45,275,385,351 instructions # 3.21 insn per cycle - 4.038355934 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.929290e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.984521e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984521e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 +TOTAL : 5.538462 sec +INFO: No Floating Point Exceptions have been reported + 16,282,391,613 cycles # 2.936 GHz + 45,369,954,990 instructions # 2.79 insn per cycle + 5.546087919 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 Avg ME (F77/C++) = 2.0158491701586172 Relative difference = 8.441039850630506e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.062686e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.484598e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.484598e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065823e+00 +- 3.352516e-03 ) GeV^0 -TOTAL : 1.842941 sec -INFO: No Floating Point Exceptions have been reported - 6,454,176,138 cycles # 3.497 GHz - 17,762,074,536 instructions # 2.75 insn per cycle - 1.845696380 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.517678e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.857555e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.857555e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 +TOTAL : 2.425578 sec +INFO: No Floating Point Exceptions have been reported + 7,146,342,805 cycles # 2.938 GHz + 17,820,817,556 instructions # 2.49 insn per cycle + 2.433499088 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3136) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 Avg ME (F77/C++) = 2.0158486895961687 Relative difference = 1.539816876576819e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.139418e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.279878e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279878e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.065802e+00 +- 3.352029e-03 ) GeV^0 -TOTAL : 1.029197 sec -INFO: No Floating Point Exceptions have been reported - 3,442,104,814 cycles # 3.337 GHz - 8,238,873,342 instructions # 2.39 insn per cycle - 1.031932368 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.300139e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.447005e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.447005e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.366410 sec +INFO: No Floating Point Exceptions have been reported + 3,812,530,133 cycles # 2.776 GHz + 8,314,531,864 instructions # 2.18 insn per cycle + 1.374237525 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3369) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015847e+00 Avg ME (F77/C++) = 2.0158474864438176 Relative difference = 2.4130988992271984e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.172249e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.319465e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.319465e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.065802e+00 +- 3.352029e-03 ) GeV^0 -TOTAL : 1.004591 sec -INFO: No Floating Point Exceptions have been reported - 3,360,921,594 cycles # 3.337 GHz - 7,931,540,926 instructions # 2.36 insn per cycle - 1.007270029 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3268) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.645853e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.897799e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.897799e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.316285 sec +INFO: No Floating Point Exceptions have been reported + 3,675,425,998 cycles # 2.778 GHz + 7,974,219,247 instructions # 2.17 insn per cycle + 1.323972787 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3213) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015847e+00 Avg ME (F77/C++) = 2.0158474864438176 Relative difference = 2.4130988992271984e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.377803e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.595065e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.595065e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.065802e+00 +- 3.352029e-03 ) GeV^0 -TOTAL : 0.871377 sec -INFO: No Floating Point Exceptions have been reported - 2,919,581,056 cycles # 3.341 GHz - 6,022,898,723 instructions # 2.06 insn per cycle - 0.874100289 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2259) (512y: 0) (512z: 2156) +EvtsPerSec[Rmb+ME] (23) = ( 6.513578e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.178216e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.178216e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.714470 sec +INFO: No Floating Point Exceptions have been reported + 3,315,579,741 cycles # 1.925 GHz + 6,150,343,295 instructions # 1.85 insn per cycle + 1.722857238 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2256) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015848e+00 Avg ME (F77/C++) = 2.0158476348733529 Relative difference = 1.8112806478434436e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index 52f6be244..60f2dad34 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_10:14:56 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_12:16:21 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.969923e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.737348e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.863422e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 +TOTAL : 0.488943 sec +INFO: No Floating Point Exceptions have been reported + 2,058,928,554 cycles # 2.875 GHz + 2,909,617,560 instructions # 1.41 insn per cycle + 0.773317754 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015841e+00 +Avg ME (F77/GPU) = 2.0158787037944421 +Relative difference = 1.870375413642407e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.731319e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.811910e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.811910e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 -TOTAL : 3.953519 sec -INFO: No Floating Point Exceptions have been reported - 13,840,298,045 cycles # 3.499 GHz - 44,379,624,369 instructions # 3.21 insn per cycle - 3.956172476 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.962212e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.019252e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019252e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 +TOTAL : 5.446348 sec +INFO: No Floating Point Exceptions have been reported + 16,014,544,982 cycles # 2.937 GHz + 44,474,347,041 instructions # 2.78 insn per cycle + 5.454124254 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 533) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 Avg ME (F77/C++) = 2.0158491701586172 Relative difference = 8.441039850630506e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.257817e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.888610e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.888610e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065823e+00 +- 3.352516e-03 ) GeV^0 -TOTAL : 1.557707 sec -INFO: No Floating Point Exceptions have been reported - 5,450,190,072 cycles # 3.493 GHz - 17,062,633,912 instructions # 3.13 insn per cycle - 1.560495863 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.286669e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.759924e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.759924e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 +TOTAL : 2.086643 sec +INFO: No Floating Point Exceptions have been reported + 6,135,728,749 cycles # 2.931 GHz + 17,120,648,230 instructions # 2.79 insn per cycle + 2.094524948 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 Avg ME (F77/C++) = 2.0158486895961687 Relative difference = 1.539816876576819e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.566698e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.329409e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.329409e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065802e+00 +- 3.352029e-03 ) GeV^0 -TOTAL : 1.331242 sec -INFO: No Floating Point Exceptions have been reported - 4,438,805,258 cycles # 3.328 GHz - 10,197,135,415 instructions # 2.30 insn per cycle - 1.333935378 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.052770e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.643067e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.643067e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.835441 sec +INFO: No Floating Point Exceptions have been reported + 5,101,873,696 cycles # 2.769 GHz + 10,273,156,361 instructions # 2.01 insn per cycle + 1.843297684 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3906) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015847e+00 Avg ME (F77/C++) = 2.0158474864438176 Relative difference = 2.4130988992271984e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.617081e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.395826e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.395826e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065802e+00 +- 3.352029e-03 ) GeV^0 -TOTAL : 1.327006 sec -INFO: No Floating Point Exceptions have been reported - 4,425,415,926 cycles # 3.329 GHz - 9,931,492,159 instructions # 2.24 insn per cycle - 1.329718661 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.133897e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.741009e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.741009e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.812216 sec +INFO: No Floating Point Exceptions have been reported + 5,041,846,676 cycles # 2.771 GHz + 10,042,915,318 instructions # 1.99 insn per cycle + 1.820042823 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3805) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015847e+00 Avg ME (F77/C++) = 2.0158474864438176 Relative difference = 2.4130988992271984e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.608719e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.061283e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.061283e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.065802e+00 +- 3.352029e-03 ) GeV^0 -TOTAL : 1.201246 sec -INFO: No Floating Point Exceptions have been reported - 4,005,537,794 cycles # 3.328 GHz - 8,384,661,017 instructions # 2.09 insn per cycle - 1.204032413 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.642094e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.969408e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.969408e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 2.363033 sec +INFO: No Floating Point Exceptions have been reported + 4,430,997,247 cycles # 1.870 GHz + 8,493,309,798 instructions # 1.92 insn per cycle + 2.370917653 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2744) (512y: 4) (512z: 2754) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015848e+00 Avg ME (F77/C++) = 2.0158476348733529 Relative difference = 1.8112806478434436e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 50df466f7..62fab95ac 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_10:15:07 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_12:16:43 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.370448e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.371457e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.004604e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.536026 sec +INFO: No Floating Point Exceptions have been reported + 2,214,409,122 cycles # 2.867 GHz + 3,163,292,335 instructions # 1.43 insn per cycle + 0.830149622 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015836e+00 +Avg ME (F77/GPU) = 2.0158358639104246 +Relative difference = 6.751024171044779e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.378219e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.432672e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.432672e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 4.536607 sec -INFO: No Floating Point Exceptions have been reported - 15,879,037,347 cycles # 3.498 GHz - 46,166,689,914 instructions # 2.91 insn per cycle - 4.539508312 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 623) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.787519e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.833765e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.833765e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 6.014473 sec +INFO: No Floating Point Exceptions have been reported + 17,675,190,561 cycles # 2.934 GHz + 46,198,484,525 instructions # 2.61 insn per cycle + 6.025789457 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.318867e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.514187e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.514187e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.553822 sec -INFO: No Floating Point Exceptions have been reported - 8,942,818,379 cycles # 3.498 GHz - 27,524,019,542 instructions # 3.08 insn per cycle - 2.556810607 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.209372e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.373008e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.373008e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.414727 sec +INFO: No Floating Point Exceptions have been reported + 10,062,586,014 cycles # 2.937 GHz + 27,715,049,037 instructions # 2.75 insn per cycle + 3.427097999 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.132755e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.661874e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.661874e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.591518 sec -INFO: No Floating Point Exceptions have been reported - 5,303,285,283 cycles # 3.327 GHz - 12,390,689,278 instructions # 2.34 insn per cycle - 1.594391507 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.030622e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.429639e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.429639e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.231035 sec +INFO: No Floating Point Exceptions have been reported + 6,157,448,048 cycles # 2.747 GHz + 12,606,647,104 instructions # 2.05 insn per cycle + 2.242669652 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2777) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359178371690 Relative difference = 4.0758688308634e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.352522e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.914254e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.914254e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.548716 sec -INFO: No Floating Point Exceptions have been reported - 5,162,301,088 cycles # 3.328 GHz - 11,924,116,295 instructions # 2.31 insn per cycle - 1.551679763 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2573) (512y: 126) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.510413e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.987462e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.987462e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.049793 sec +INFO: No Floating Point Exceptions have been reported + 5,651,790,254 cycles # 2.742 GHz + 12,043,922,780 instructions # 2.13 insn per cycle + 2.061986198 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2522) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359178371690 Relative difference = 4.0758688308634e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.106587e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.787271e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.787271e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.416209 sec -INFO: No Floating Point Exceptions have been reported - 4,723,432,620 cycles # 3.329 GHz - 7,939,751,052 instructions # 1.68 insn per cycle - 1.419154388 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1664) (512y: 104) (512z: 1864) +EvtsPerSec[Rmb+ME] (23) = ( 3.559881e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.752268e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.752268e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.093339 sec +INFO: No Floating Point Exceptions have been reported + 5,777,925,002 cycles # 1.861 GHz + 8,230,989,757 instructions # 1.42 insn per cycle + 3.105063126 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1866) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359178371690 Relative difference = 4.0758688308634e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index 2c2c57b78..bde416a88 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,46 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_10:15:20 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + +DATE: 2024-09-15_12:17:08 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.300920e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.324653e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.961570e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.533607 sec +INFO: No Floating Point Exceptions have been reported + 2,198,649,887 cycles # 2.849 GHz + 3,081,934,025 instructions # 1.40 insn per cycle + 0.827713906 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015836e+00 +Avg ME (F77/GPU) = 2.0158358639104246 +Relative difference = 6.751024171044779e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.401977e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.457097e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.457097e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 4.492526 sec -INFO: No Floating Point Exceptions have been reported - 15,712,085,061 cycles # 3.495 GHz - 45,169,053,605 instructions # 2.87 insn per cycle - 4.495521178 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.850472e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.899391e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.899391e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.814253 sec +INFO: No Floating Point Exceptions have been reported + 17,118,477,102 cycles # 2.939 GHz + 45,207,445,046 instructions # 2.64 insn per cycle + 5.826249043 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -48,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.489380e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.701058e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.701058e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.461731 sec -INFO: No Floating Point Exceptions have been reported - 8,617,180,567 cycles # 3.497 GHz - 26,135,921,387 instructions # 3.03 insn per cycle - 2.464780892 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.320488e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.495447e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.495447e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.305181 sec +INFO: No Floating Point Exceptions have been reported + 9,752,287,704 cycles # 2.941 GHz + 26,369,462,343 instructions # 2.70 insn per cycle + 3.316567159 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -80,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.338772e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.753740e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.753740e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.774295 sec -INFO: No Floating Point Exceptions have been reported - 5,906,185,145 cycles # 3.324 GHz - 13,915,040,499 instructions # 2.36 insn per cycle - 1.777269517 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.466984e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.783441e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.783441e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.496548 sec +INFO: No Floating Point Exceptions have been reported + 6,902,736,140 cycles # 2.753 GHz + 14,146,955,352 instructions # 2.05 insn per cycle + 2.508688639 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -112,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359178371690 Relative difference = 4.0758688308634e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.536459e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.976165e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.976165e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.725511 sec -INFO: No Floating Point Exceptions have been reported - 5,745,807,239 cycles # 3.325 GHz - 13,392,946,317 instructions # 2.33 insn per cycle - 1.728434172 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.747061e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.095585e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.095585e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.355880 sec +INFO: No Floating Point Exceptions have been reported + 6,536,932,805 cycles # 2.762 GHz + 13,633,905,312 instructions # 2.09 insn per cycle + 2.367915662 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2531) (512y: 302) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -144,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359178371690 Relative difference = 4.0758688308634e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.411194e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.151496e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.151496e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.368036 sec -INFO: No Floating Point Exceptions have been reported - 4,555,301,930 cycles # 3.323 GHz - 9,073,665,151 instructions # 1.99 insn per cycle - 1.370948998 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1452) (512y: 212) (512z: 2059) +EvtsPerSec[Rmb+ME] (23) = ( 3.589352e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.785622e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.785622e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.069165 sec +INFO: No Floating Point Exceptions have been reported + 5,741,871,289 cycles # 1.864 GHz + 9,325,593,834 instructions # 1.62 insn per cycle + 3.081760977 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2059) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -176,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359178371690 Relative difference = 4.0758688308634e-08