Skip to content

Commit

Permalink
[cmsdy] in dsample.f of pp_dy3j.mad P0_gux_taptamggux, cache xbin_min…
Browse files Browse the repository at this point in the history
… for xmin=0 and xbin_max for xmax=1 (part2 of madgraph5#969)

There is indeed another clear and not too small improvement

CUDACPP_RUNTIME_DISABLEFPE=1 ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_dy3j_x1_cudacpp
 [COUNTERS] PROGRAM TOTAL                         :    4.2184s
 [COUNTERS] Fortran Other                  (  0 ) :    0.1695s
 [COUNTERS] Fortran Initialise(I/O)        (  1 ) :    0.0672s
 [COUNTERS] Fortran Random2Momenta         (  3 ) :    2.9293s for  1170103 events => throughput is 2.50E-06 events/s
 [COUNTERS] Fortran PDFs                   (  4 ) :    0.1094s for    49152 events => throughput is 2.23E-06 events/s
 [COUNTERS] Fortran UpdateScaleCouplings   (  5 ) :    0.1379s for    16384 events => throughput is 8.42E-06 events/s
 [COUNTERS] Fortran Reweight               (  6 ) :    0.0560s for    16384 events => throughput is 3.42E-06 events/s
 [COUNTERS] Fortran Unweight(LHE-I/O)      (  7 ) :    0.0707s for    16384 events => throughput is 4.31E-06 events/s
 [COUNTERS] Fortran SamplePutPoint         (  8 ) :    0.1447s for  1170103 events => throughput is 1.24E-07 events/s
 [COUNTERS] CudaCpp Initialise             ( 11 ) :    0.4719s
 [COUNTERS] CudaCpp Finalise               ( 12 ) :    0.0267s
 [COUNTERS] CudaCpp MEs                    ( 19 ) :    0.0350s for    16384 events => throughput is 2.13E-06 events/s
 [COUNTERS] OVERALL NON-MEs                ( 21 ) :    4.1834s
 [COUNTERS] OVERALL MEs                    ( 22 ) :    0.0350s for    16384 events => throughput is 2.13E-06 events/s
  • Loading branch information
valassi committed Aug 15, 2024
1 parent 9a65860 commit a6d57a8
Showing 1 changed file with 36 additions and 5 deletions.
41 changes: 36 additions & 5 deletions epochX/cudacpp/pp_dy3j.mad/Source/dsample.f
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,20 @@ subroutine sample_get_x(wgt, x, j, ipole, xmin, xmax)
c Local
c
integer im, ip,ij,icount,it_warned
double precision xbin_min,xbin_max,ddum(maxdim),xo,y
double precision xbin_min,xbin_max,ddum(maxdim),xo,y
c
c Local (performance optimization #969)
c
integer xbinarraydim
parameter (xbinarraydim=maxdim*lmaxconfigs)
double precision xbin_min0_array(maxdim, lmaxconfigs)
double precision xbin_max1_array(maxdim, lmaxconfigs)
logical xbin_min0_saved(maxdim, lmaxconfigs)
logical xbin_max1_saved(maxdim, lmaxconfigs)
save xbin_min0_array, xbin_max1_array
save xbin_min0_saved, xbin_max1_saved
data xbin_min0_saved/xbinarraydim*.false./
data xbin_max1_saved/xbinarraydim*.false./
c
c External
c
Expand Down Expand Up @@ -1301,10 +1314,28 @@ subroutine sample_get_x(wgt, x, j, ipole, xmin, xmax)
endif
if (ituple .eq. 1) then
c write(*,*) 'Getting variable',ipole,j,minvar(j,ipole)
xbin_min = xbin(xmin,minvar(j,ipole))
xbin_max = xbin(xmax,minvar(j,ipole))
if (xbin_min .gt. xbin_max-1) then
xbin_min = min(xbin_min, xbin_max)

if(xmax.ne.1 .or. .not.xbin_max1_saved(j,ipole)) then
xbin_max = xbin(xmax, minvar(j,ipole))
if(xmax.eq.1) then
xbin_max1_array(j,ipole) = xbin_max
xbin_max1_saved(j,ipole) = .true.
endif
else
xbin_max = xbin_max1_array(j,ipole)
endif

if(xmin.ne.0 .or. .not.xbin_min0_saved(j,ipole)) then
xbin_min = xbin(xmin, minvar(j,ipole))
if (xbin_min .gt. xbin_max-1) then
xbin_min = min(xbin_min, xbin_max)
endif
if(xmin.eq.0) then
xbin_min0_array(j,ipole) = xbin_min
xbin_min0_saved(j,ipole) = .true.
endif
else
xbin_min = xbin_min0_array(j,ipole)
endif
c
c Line which allows us to keep choosing same x
Expand Down

0 comments on commit a6d57a8

Please sign in to comment.