Skip to content

Commit

Permalink
[cmsdy] regenerate pp_dy3j.mad (with new timers/counters and with mad…
Browse files Browse the repository at this point in the history
…graph5#969 improvements in dsample.f) on itscrd90

Code generation completed in 245 seconds
Code generation and additional checks completed in 372 seconds
  • Loading branch information
valassi committed Aug 19, 2024
1 parent ffd6937 commit 3bdec2c
Show file tree
Hide file tree
Showing 174 changed files with 3,922 additions and 2,468 deletions.
188 changes: 94 additions & 94 deletions epochX/cudacpp/pp_dy3j.mad/CODEGEN_mad_pp_dy3j_log.txt

Large diffs are not rendered by default.

23 changes: 2 additions & 21 deletions epochX/cudacpp/pp_dy3j.mad/Source/dsample.f
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,7 @@ subroutine sample_init(p1, p2, p3, p4, p5, VECSIZE_USED)
common/read_grid_file/read_grid_file

data use_cut/2/ !Grid: 0=fixed , 1=standard, 2=non-zero
data ituple/1/ !1=htuple, 2=sobel
data ituple/1/ !1=ntuple(ranmar or htuple), 2=sobel
data Minvar(1,1)/-1/ !No special variable mapping

c-----
Expand Down Expand Up @@ -1264,12 +1264,6 @@ subroutine sample_get_x(wgt, x, j, ipole, xmin, xmax)
save xbin_min0_saved, xbin_max1_saved
data xbin_min0_saved/xbinarraydim*.false./
data xbin_max1_saved/xbinarraydim*.false./

character*255 env_name, env_value
integer env_length, env_status
logical first, skipxbinchecks
data first, skipxbinchecks/.true., .false./
save first, skipxbinchecks
c
c External
c
Expand Down Expand Up @@ -1421,19 +1415,7 @@ subroutine sample_get_x(wgt, x, j, ipole, xmin, xmax)
c double precision is about 18 digits, we expect things to agree to
c 3 digit accuracy.
c
if (first) then
env_name = 'CUDACPP_RUNTIME_SKIPXBINCHECKS'
call get_environment_variable(env_name, env_value, env_length, env_status)
if( env_status.eq.0 ) then
skipxbinchecks = .true.
endif
endif

if (skipxbinchecks) then
if (first) then
write(6,*) 'WARNING: skipping xbin checks (CUDACPP_RUNTIME_SKIPXBINCHECKS is set)'
endif
else if (abs(ddum(j)-xbin(x,ij))/(ddum(j)+1d-22) .gt. 1e-3) then
if (abs(ddum(j)-xbin(x,ij))/(ddum(j)+1d-22) .gt. 1e-3) then
if (icount .lt. 5) then
write(*,'(a,i4,2e14.6,1e12.4)')
& 'Warning xbin not returning correct x', ij,
Expand All @@ -1444,7 +1426,6 @@ subroutine sample_get_x(wgt, x, j, ipole, xmin, xmax)
endif
icount=icount+1
endif
first = .false.
c if (x .lt. xmin .or. x .gt. xmax) then
c write(*,'(a,4i4,2f24.16,1e10.2)') 'Bad x',ij,int(xbin_min),ip,
c & int(xbin_max),xmin,x,xmax-xmin
Expand Down
66 changes: 40 additions & 26 deletions epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_epemgdc/check_sa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,10 @@ main( int argc, char** argv )
DeviceBufferSelectedColor devSelCol( nevt );
#endif

std::unique_ptr<double[]> genrtimes( new double[niter] );
std::unique_ptr<double[]> rambtimes( new double[niter] );
std::unique_ptr<double[]> wavetimes( new double[niter] );
std::unique_ptr<double[]> wv3atimes( new double[niter] );
std::unique_ptr<uint64_t[]> genrcounts( new uint64_t[niter] );
std::unique_ptr<uint64_t[]> rambcounts( new uint64_t[niter] );
std::unique_ptr<uint64_t[]> wavecounts( new uint64_t[niter] );
std::unique_ptr<uint64_t[]> wv3acounts( new uint64_t[niter] );

// --- 0c. Create curand, hiprand or common generator
const std::string cgenKey = "0c GenCreat";
Expand Down Expand Up @@ -527,7 +527,7 @@ main( int argc, char** argv )
// === STEP 1 OF 3

// *** START THE OLD-STYLE TIMER FOR RANDOM GEN ***
double genrtime = 0;
uint64_t genrcount = 0;

// --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand)
// [NB This should not be necessary using the host API: "Generation functions
Expand All @@ -538,7 +538,7 @@ main( int argc, char** argv )
const std::string sgenKey = "1a GenSeed ";
timermap.start( sgenKey );
prnk->seedGenerator( seed + iiter );
genrtime += timermap.stop();
genrcount += timermap.stop();

// --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host
const std::string rngnKey = "1b GenRnGen";
Expand All @@ -553,19 +553,19 @@ main( int argc, char** argv )
{
// --- 1c. Copy rndmom from host to device
const std::string htodKey = "1c CpHTDrnd";
genrtime += timermap.start( htodKey );
genrcount += timermap.start( htodKey );
copyDeviceFromHost( devRndmom, hstRndmom );
}
#endif

// *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN ***
genrtime += timermap.stop();
genrcount += timermap.stop();

// === STEP 2 OF 3
// Fill in particle momenta for each of nevt events on the device

// *** START THE OLD-STYLE TIMER FOR RAMBO ***
double rambtime = 0;
uint64_t rambcount = 0;

// --- 2a. Fill in momenta of initial state particles on the device
const std::string riniKey = "2a RamboIni";
Expand All @@ -576,7 +576,7 @@ main( int argc, char** argv )
// --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device
// (i.e. map random numbers to final-state particle momenta for each of nevt events)
const std::string rfinKey = "2b RamboFin";
rambtime += timermap.start( rfinKey );
rambcount += timermap.start( rfinKey );
prsk->getMomentaFinal();
//std::cout << "Got final momenta" << std::endl;

Expand All @@ -585,30 +585,30 @@ main( int argc, char** argv )
{
// --- 2c. CopyDToH Weights
const std::string cwgtKey = "2c CpDTHwgt";
rambtime += timermap.start( cwgtKey );
rambcount += timermap.start( cwgtKey );
copyHostFromDevice( hstWeights, devWeights );

// --- 2d. CopyDToH Momenta
const std::string cmomKey = "2d CpDTHmom";
rambtime += timermap.start( cmomKey );
rambcount += timermap.start( cmomKey );
copyHostFromDevice( hstMomenta, devMomenta );
}
else // only if ( ! bridge ) ???
{
// --- 2c. CopyHToD Weights
const std::string cwgtKey = "2c CpHTDwgt";
rambtime += timermap.start( cwgtKey );
rambcount += timermap.start( cwgtKey );
copyDeviceFromHost( devWeights, hstWeights );

// --- 2d. CopyHToD Momenta
const std::string cmomKey = "2d CpHTDmom";
rambtime += timermap.start( cmomKey );
rambcount += timermap.start( cmomKey );
copyDeviceFromHost( devMomenta, hstMomenta );
}
#endif

// *** STOP THE OLD-STYLE TIMER FOR RAMBO ***
rambtime += timermap.stop();
rambcount += timermap.stop();

// === STEP 3 OF 3
// Evaluate matrix elements for all nevt events
Expand All @@ -628,7 +628,7 @@ main( int argc, char** argv )
#ifdef MGONGPUCPP_GPUIMPL
// --- 2d. CopyHToD Momenta
const std::string gKey = "0.. CpHTDg";
rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER!
rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER!
copyDeviceFromHost( devGs, hstGs );
#endif

Expand All @@ -641,8 +641,8 @@ main( int argc, char** argv )
}

// *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) ***
double wavetime = 0; // calc plus copy
double wv3atime = 0; // calc only
uint64_t wavecount = 0; // calc plus copy
uint64_t wv3acount = 0; // calc only

// --- 3a. SigmaKin
const std::string skinKey = "3a SigmaKin";
Expand All @@ -651,8 +651,8 @@ main( int argc, char** argv )
pmek->computeMatrixElements( channelId );

// *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) ***
wv3atime += timermap.stop(); // calc only
wavetime += wv3atime; // calc plus copy
wv3acount += timermap.stop(); // calc only
wavecount += wv3acount; // calc plus copy

#ifdef MGONGPUCPP_GPUIMPL
if( !bridge )
Expand All @@ -662,7 +662,7 @@ main( int argc, char** argv )
timermap.start( cmesKey );
copyHostFromDevice( hstMatrixElements, devMatrixElements );
// *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) ***
wavetime += timermap.stop(); // calc plus copy
wavecount += timermap.stop(); // calc plus copy
}
#endif

Expand All @@ -675,16 +675,16 @@ main( int argc, char** argv )
// --- 4a Dump within the loop
const std::string loopKey = "4a DumpLoop";
timermap.start( loopKey );
genrtimes[iiter] = genrtime;
rambtimes[iiter] = rambtime;
wavetimes[iiter] = wavetime;
wv3atimes[iiter] = wv3atime;
genrcounts[iiter] = genrcount;
rambcounts[iiter] = rambcount;
wavecounts[iiter] = wavecount;
wv3acounts[iiter] = wv3acount;

if( verbose )
{
std::cout << std::string( SEP79, '*' ) << std::endl
<< "Iteration #" << iiter + 1 << " of " << niter << std::endl;
if( perf ) std::cout << "Wave function time: " << wavetime << std::endl;
if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl;
}

for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration
Expand Down Expand Up @@ -723,6 +723,20 @@ main( int argc, char** argv )
// *** END MAIN LOOP ON #ITERATIONS ***
// **************************************

// Calibrate seconds per count
float secPerCount = timermap.secondsPerCount();
std::unique_ptr<double[]> genrtimes( new double[niter] );
std::unique_ptr<double[]> rambtimes( new double[niter] );
std::unique_ptr<double[]> wavetimes( new double[niter] );
std::unique_ptr<double[]> wv3atimes( new double[niter] );
for( unsigned int iiter = 0; iiter < niter; ++iiter )
{
genrtimes[iiter] = genrcounts[iiter] * secPerCount;
rambtimes[iiter] = rambcounts[iiter] * secPerCount;
wavetimes[iiter] = wavecounts[iiter] * secPerCount;
wv3atimes[iiter] = wv3acounts[iiter] * secPerCount;
}

// === STEP 8 ANALYSIS
// --- 8a Analysis: compute stats after the loop
const std::string statKey = "8a CompStat";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ Program DRIVER
CALL COUNTERS_INITIALISE()
c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm)
CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) )
CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) )
CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta
CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) )
CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) )
CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) )
Expand All @@ -106,6 +106,7 @@ Program DRIVER
CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) )
CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) )
CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) )
c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) )
#ifdef MG5AMC_MEEXPORTER_CUDACPP
fbridge_mode = 1 ! CppOnly=1, default for CUDACPP
#else
Expand Down
Loading

0 comments on commit 3bdec2c

Please sign in to comment.