diff --git a/examples/gpu/eemumu/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cu b/examples/gpu/eemumu/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cu index bbcceb2ebd..796fcc7da7 100644 --- a/examples/gpu/eemumu/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cu +++ b/examples/gpu/eemumu/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cu @@ -89,14 +89,11 @@ int main(int argc, char **argv) { int dim = gpublocks * gputhreads; - // GPU memory - /* - double(*tp)[4][4]; - gpuErrchk3(cudaMalloc(&tp, dim * 4 * 4 * sizeof(double))); -*/ // Local Memory double lp[dim][4][4]; + // GPU memory + // from http://www.orangeowlsolutions.com/archives/817 cudaExtent extent = make_cudaExtent(4 * sizeof(double), 4, dim); cudaPitchedPtr devPitchedPtr; gpuErrchk3(cudaMalloc3D(&devPitchedPtr, extent)); diff --git a/examples/gpu/eemumu/src/HelAmps_sm.cu b/examples/gpu/eemumu/src/HelAmps_sm.cu index 7725fc55e9..2facacadd0 100644 --- a/examples/gpu/eemumu/src/HelAmps_sm.cu +++ b/examples/gpu/eemumu/src/HelAmps_sm.cu @@ -60,27 +60,14 @@ __global__ void sigmaKin(cudaPitchedPtr tp, double *meDevPtr, size_t mePitch, thrust::complex amp[2]; double t[1]; - /* - printf("%d : %f %f %f %f, %f %f %f %f, %f %f %f %f, %f %f %f %f\n", dim, - dp[0][0], dp[0][1], dp[0][2], dp[0][3], dp[1][0], dp[1][1], dp[1][2], - dp[1][3], dp[2][0], dp[2][1], dp[2][2], dp[2][3], dp[3][0], dp[3][1], - dp[3][2], dp[3][3]); - */ // // Local variables and constants const int ncomb = 16; static bool goodhel[ncomb] = {ncomb * false}; - static int ntry = 0, sum_hel = 0, ngood = 0; + static int ntry = 0, ngood = 0; static int igood[ncomb]; - static int jhel; // - // Reset color flows - /* sr fixme - for (int i = 0; i < 1; i++) - jamp2[0][i] = 0.; -*/ - // Denominators: spins, colors and identical particles const int denominators[1] = {4}; // nprocesses @@ -92,44 +79,26 @@ __global__ void sigmaKin(cudaPitchedPtr tp, double *meDevPtr, size_t mePitch, } // sr fixme // better to run the first n calculations serial? - if (sum_hel == 0 || ntry < 10) { - // Calculate the matrix element for all helicities - for (int ihel = 0; ihel < ncomb; ihel++) { - if (goodhel[ihel] || ntry < 2) { - - calculate_wavefunctions(ihel, dps, dpt, amp, debug, verbose); - matrix_1_epem_mupmum(t[0], amp); - - double tsum = 0; - for (int iproc = 0; iproc < nprocesses; iproc++) { - matrix_element[iproc] += t[iproc]; - tsum += t[iproc]; - } - // Store which helicities give non-zero result - if (tsum != 0. && !goodhel[ihel]) { - goodhel[ihel] = true; - ngood++; - igood[ngood] = ihel; - } - } - } - jhel = 0; - sum_hel = min(sum_hel, ngood); - } else { - // Only use the "good" helicities - // sr fixme // is the calculation of good helicities parralelizable? - for (int j = 0; j < sum_hel; j++) { - jhel++; - if (jhel >= ngood) - jhel = 0; - double hwgt = double(ngood) / double(sum_hel); - int ihel = igood[jhel]; + // if (sum_hel == 0 || ntry < 10) { + // Calculate the matrix element for all helicities + + for (int ihel = 0; ihel < ncomb; ihel++) { + if (goodhel[ihel] || ntry < 2) { calculate_wavefunctions(ihel, dps, dpt, amp, debug, verbose); matrix_1_epem_mupmum(t[0], amp); + double tsum = 0; for (int iproc = 0; iproc < nprocesses; iproc++) { - matrix_element[iproc] += t[iproc] * hwgt; + matrix_element[iproc] += t[iproc]; + tsum += t[iproc]; + } + + // Store which helicities give non-zero result + if (tsum != 0. && !goodhel[ihel]) { + goodhel[ihel] = true; + ngood++; + igood[ngood] = ihel; } } } @@ -137,7 +106,7 @@ __global__ void sigmaKin(cudaPitchedPtr tp, double *meDevPtr, size_t mePitch, for (int i = 0; i < nprocesses; ++i) { matrix_element[i] /= denominators[i]; } - //} + // } // printf("%d - %e\n", dim, t[0]); }