From 59d7500c11cd55c2f1429a64b1c60c7c518ac706 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 2 Aug 2024 09:40:01 +0100 Subject: [PATCH 01/19] first draft at calc_bonds_idx cpp implementation --- libdistopia/include/distopia.h | 3 + libdistopia/src/distopia.cpp | 119 +++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) diff --git a/libdistopia/include/distopia.h b/libdistopia/include/distopia.h index 692922af..e1ba1ebd 100644 --- a/libdistopia/include/distopia.h +++ b/libdistopia/include/distopia.h @@ -31,6 +31,9 @@ namespace distopia { template void CalcSelfDistanceArrayNoBox(const T *a, int n, T *out); template void CalcSelfDistanceArrayOrtho(const T *a, int n, const T *box, T *out); template void CalcSelfDistanceArrayTriclinic(const T *a, int n, const T *box, T *out); + template void CalcBondsNoBoxIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, int n, T *out); + template void CalcBondsOrthoIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, int n, const T *box, T *out); + template void CalcBondsTriclinicIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, int n, const T *box, T *out); int GetNFloatLanes(); int GetNDoubleLanes(); std::vector DistopiaSupportedAndGeneratedTargets(); diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index 4f3dc4a8..ec047bda 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -685,6 +685,103 @@ namespace distopia { } } + template + HWY_INLINE void LoadInterleavedIdx(const unsigned int *idx, const float *src, + V &x_dst, V &y_dst, V &z_dst) { + hn::ScalableTag d; + hn::ScalableTag d_int; + auto vidx = hn::LoadU(d_int, (int*)idx); + auto Vthree = hn::Set(d_int, 3); + auto Vone = hn::Set(d_int, 1); + // convert indices to 3D coordinate indices, i.e. index 10 at pointer 30 etc + vidx = hn::Mul(vidx, Vthree); + x_dst = hn::GatherIndex(d, src, vidx); + vidx = hn::Add(vidx, Vone); + y_dst = hn::GatherIndex(d, src, vidx); + vidx = hn::Add(vidx, Vone); + z_dst = hn::GatherIndex(d, src, vidx); + } + + template + HWY_INLINE void LoadInterleavedIdx(const unsigned int *idx, const double *src, + V &x_dst, V &y_dst, V &z_dst) { + hn::ScalableTag d; + hn::ScalableTag d_int; + hn::ScalableTag d_long; + // can't call gather to doubles with int indices so: + // load int32s and cast them up to int64s so they match width of double values + auto vidx_narrow = hn::LoadU(d_int, (int*)idx); + auto vidx = hn::ZeroExtendResizeBitCast(d_long, d_int, vidx_narrow); + + auto Vthree = hn::Set(d_long, 3); + auto Vone = hn::Set(d_long, 1); + + vidx = hn::Mul(vidx, Vthree); + x_dst = hn::GatherIndex(d, src, vidx); + vidx = hn::Add(vidx, Vone); + y_dst = hn::GatherIndex(d, src, vidx); + vidx = hn::Add(vidx, Vone); + z_dst = hn::GatherIndex(d, src, vidx); + } + + template + void CalcBondsIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, int n, + T *out, const B &box) { + const hn::ScalableTag d; + int nlanes = hn::Lanes(d); + + unsigned int a_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + unsigned int b_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + T out_sub[HWY_MAX_LANES_D(hn::ScalableTag)]; + T *dst; + + if (HWY_UNLIKELY(n < nlanes)) { + // zero out idx arrays, so we can safely load 0th values when we spill + memset(a_sub, 0, sizeof(int) * nlanes); + memset(b_sub, 0, sizeof(int) * nlanes); + memcpy(a_sub, a_idx, sizeof(int) * n); + memcpy(b_sub, b_idx, sizeof(int) * n); + // switch to use a_sub as index array now we've copied contents + a_idx = a_sub; + b_idx = b_sub; + dst = out_sub; + } else { + dst = out; + } + + auto a_x = hn::Undefined(d); + auto a_y = hn::Undefined(d); + auto a_z = hn::Undefined(d); + auto b_x = hn::Undefined(d); + auto b_y = hn::Undefined(d); + auto b_z = hn::Undefined(d); + + for (size_t i=0; i <= n - nlanes; i += nlanes) { + // load N indices of each source + // interleaved gather these indices + LoadInterleavedIdx(a_idx + i, coords, a_x, a_y, a_z); + LoadInterleavedIdx(b_idx + i, coords, b_x, b_y, b_z); + + auto result = box.Distance(a_x, a_y, a_z, b_x, b_y, b_z); + + hn::StoreU(result, d, dst + i); + } + size_t rem = n % nlanes; + if (rem) { // if we had a non-multiple of nlanes, do final nlanes values again + LoadInterleavedIdx(a_idx + n - nlanes, coords, a_x, a_y, a_z); + LoadInterleavedIdx(b_idx + n - nlanes, coords, b_x, b_y, b_z); + + auto result = box.Distance(a_x, a_y, a_z, b_x, b_y, b_z); + + hn::StoreU(result, d, dst + n - nlanes); + } + + if (HWY_UNLIKELY(n < nlanes)) { + // copy results back from temp array into actual output array + memcpy(out, out_sub, n * sizeof(T)); + } + } + void CalcBondsNoBoxDouble(const double *a, const double *b, int n, double *out) { hn::ScalableTag d; const NoBox vbox(d); @@ -852,6 +949,18 @@ namespace distopia { const TriclinicBox vbox(d, box); CalcSelfDistanceArray(a, n, out, vbox); } + void CalcBondsNoBoxIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, + int n, float *out) { + hn::ScalableTag d; + const NoBox box(d); + CalcBondsIdx(coords, a_idx, b_idx, n, out, box); + } + void CalcBondsNoBoxIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, + int n, double *out) { + hn::ScalableTag d; + const NoBox box(d); + CalcBondsIdx(coords, a_idx, b_idx, n, out, box); + } int GetNFloatLanes() { hn::ScalableTag d; @@ -901,6 +1010,8 @@ namespace distopia { HWY_EXPORT(CalcSelfDistanceArrayOrthoSingle); HWY_EXPORT(CalcSelfDistanceArrayTriclinicDouble); HWY_EXPORT(CalcSelfDistanceArrayTriclinicSingle); + HWY_EXPORT(CalcBondsNoBoxIdxSingle); + HWY_EXPORT(CalcBondsNoBoxIdxDouble); HWY_EXPORT(GetNFloatLanes); HWY_EXPORT(GetNDoubleLanes); @@ -1038,6 +1149,14 @@ namespace distopia { } return HWY_DYNAMIC_DISPATCH(CalcSelfDistanceArrayTriclinicDouble)(a, n, box, out); } + HWY_DLLEXPORT template <> void CalcBondsNoBoxIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, + int n, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcBondsNoBoxIdxSingle)(coords, a_idx, b_idx, n, out); + } + HWY_DLLEXPORT template <> void CalcBondsNoBoxIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, + int n, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcBondsNoBoxIdxDouble)(coords, a_idx, b_idx, n, out); + } std::vector DistopiaSupportedAndGeneratedTargets() { std::vector targets = hwy::SupportedAndGeneratedTargets(); From a3883298891bf2c4e2c5995518a247d332070df7 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 2 Aug 2024 12:08:54 +0100 Subject: [PATCH 02/19] test for calcbondsidx --- libdistopia/test/test_fixtures.h | 75 +++++++++++++++++++++++++++++ libdistopia/test/test_mda_match.cpp | 21 ++++++++ 2 files changed, 96 insertions(+) diff --git a/libdistopia/test/test_fixtures.h b/libdistopia/test/test_fixtures.h index aefdb949..5939c87e 100644 --- a/libdistopia/test/test_fixtures.h +++ b/libdistopia/test/test_fixtures.h @@ -122,4 +122,79 @@ class DistanceArrayCoordinates : public ::testing::Test { } }; + +template +class CoordinatesIdx : public ::testing::Test { + // similar to coordinates, but create random indices, then create contiguous coordinate array that matches + // can then run idx/non-idx back to back for validation +public: + int ncoords; + int nidx; + + T *coords = nullptr; + T *a_coords_contig = nullptr; + T *b_coords_contig = nullptr; + size_t *big_idx; + unsigned int *a_idx = nullptr; + unsigned int *b_idx = nullptr; + T *ref_results = nullptr; + T *results = nullptr; + T box[3]; + T triclinic_box[9]; + + void SetUp(int ncoords_, int nidx_, double boxsize, double delta) { + ncoords = ncoords_; + nidx = nidx_; + + coords = new T[ncoords * 3]; + a_coords_contig = new T[nidx * 3]; + b_coords_contig = new T[nidx * 3]; + big_idx = new size_t[nidx]; + a_idx = new unsigned int[nidx]; + b_idx = new unsigned int[nidx]; + ref_results = new T[nidx]; + results = new T[nidx]; + + RandomFloatingPoint(coords, ncoords, 0 - delta, boxsize + delta); + + RandomInt(big_idx, nidx, 0, ncoords); + // copy bigidx into smaller, and also make contig coords array + for (size_t i=0; iref[i], this->results[i], abs_err); } +} + + +TYPED_TEST_SUITE(CoordinatesIdx, ScalarTypes); + + +TYPED_TEST(CoordinatesIdx, CalcBondsIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // reference result + distopia::CalcBondsNoBox(this->a_coords_contig, this->b_coords_contig, this->nidx, this->ref_results); + + // test the idx + distopia::CalcBondsNoBoxIdx(this->coords, this->a_idx, this->b_idx, this->nidx, this->results); + + for (std::size_t i=0; inidx; i++) { + EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); + } } \ No newline at end of file From 7eac3cc2796433220480785da2d0c50ad49cec12 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 2 Aug 2024 13:23:50 +0100 Subject: [PATCH 03/19] loadinterleaved double case need to load fewer int values to cast to longs, to then load as doubles --- libdistopia/src/distopia.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index ec047bda..9465630a 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -708,9 +708,10 @@ namespace distopia { hn::ScalableTag d; hn::ScalableTag d_int; hn::ScalableTag d_long; + size_t nlong_lanes = hn::Lanes(d_long); // can't call gather to doubles with int indices so: // load int32s and cast them up to int64s so they match width of double values - auto vidx_narrow = hn::LoadU(d_int, (int*)idx); + auto vidx_narrow = hn::LoadN(d_int, (int*)idx, nlong_lanes); auto vidx = hn::ZeroExtendResizeBitCast(d_long, d_int, vidx_narrow); auto Vthree = hn::Set(d_long, 3); From 8400abc6cba951df8c6d9d0b3f9b4588f44b057f Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 2 Aug 2024 15:29:49 +0100 Subject: [PATCH 04/19] fixes loadinterleaved idx double case this is the right combo of loading ints and promoting to longlongs --- libdistopia/src/distopia.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index 9465630a..459ebeaf 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -706,14 +706,16 @@ namespace distopia { HWY_INLINE void LoadInterleavedIdx(const unsigned int *idx, const double *src, V &x_dst, V &y_dst, V &z_dst) { hn::ScalableTag d; - hn::ScalableTag d_int; hn::ScalableTag d_long; + hn::ScalableTag d_int; size_t nlong_lanes = hn::Lanes(d_long); - // can't call gather to doubles with int indices so: - // load int32s and cast them up to int64s so they match width of double values - auto vidx_narrow = hn::LoadN(d_int, (int*)idx, nlong_lanes); - auto vidx = hn::ZeroExtendResizeBitCast(d_long, d_int, vidx_narrow); + // can't call Gather to doubles with int indices so: load int32s and cast them up to int64s so widths match + // first load NLONG values from idx + // !! important to not segfault here by loading LONG*2 (i.e. a full vector of 32s) !! + auto vidx_narrow = hn::LoadN(d_int, (int*)idx, nlong_lanes); + // then cast int32 values to int64, taking only lower half of vector + auto vidx = hn::PromoteLowerTo(d_long, vidx_narrow); auto Vthree = hn::Set(d_long, 3); auto Vone = hn::Set(d_long, 1); From 9342283f7b3bb8aabccbc17bfb85e2f7b36a8af7 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 2 Aug 2024 15:54:24 +0100 Subject: [PATCH 05/19] orthogonal and triclinic calcbondsidx and tests --- libdistopia/src/distopia.cpp | 48 +++++++++++++++++++++++++++++ libdistopia/test/test_mda_match.cpp | 38 ++++++++++++++++++++++- 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index 459ebeaf..6208bec7 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -964,6 +964,34 @@ namespace distopia { const NoBox box(d); CalcBondsIdx(coords, a_idx, b_idx, n, out, box); } + void CalcBondsOrthoIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, + int n, const float *box, float *out) { + hn::ScalableTag d; + const OrthogonalBox vbox(d, box); + + CalcBondsIdx(coords, a_idx, b_idx, n, out, vbox); + } + void CalcBondsOrthoIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, + int n, const double *box, double *out) { + hn::ScalableTag d; + const OrthogonalBox vbox(d, box); + + CalcBondsIdx(coords, a_idx, b_idx, n, out, vbox); + } + void CalcBondsTriclinicIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, + int n, const float *box, float *out) { + hn::ScalableTag d; + const TriclinicBox vbox(d, box); + + CalcBondsIdx(coords, a_idx, b_idx, n, out, vbox); + } + void CalcBondsTriclinicIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, + int n, const double *box, double *out) { + hn::ScalableTag d; + const TriclinicBox vbox(d, box); + + CalcBondsIdx(coords, a_idx, b_idx, n, out, vbox); + } int GetNFloatLanes() { hn::ScalableTag d; @@ -1015,6 +1043,10 @@ namespace distopia { HWY_EXPORT(CalcSelfDistanceArrayTriclinicSingle); HWY_EXPORT(CalcBondsNoBoxIdxSingle); HWY_EXPORT(CalcBondsNoBoxIdxDouble); + HWY_EXPORT(CalcBondsOrthoIdxSingle); + HWY_EXPORT(CalcBondsOrthoIdxDouble); + HWY_EXPORT(CalcBondsTriclinicIdxSingle); + HWY_EXPORT(CalcBondsTriclinicIdxDouble); HWY_EXPORT(GetNFloatLanes); HWY_EXPORT(GetNDoubleLanes); @@ -1160,6 +1192,22 @@ namespace distopia { int n, double *out) { return HWY_DYNAMIC_DISPATCH(CalcBondsNoBoxIdxDouble)(coords, a_idx, b_idx, n, out); } + HWY_DLLEXPORT template <> void CalcBondsOrthoIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, + int n, const float *box, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcBondsOrthoIdxSingle)(coords, a_idx, b_idx, n, box, out); + } + HWY_DLLEXPORT template <> void CalcBondsOrthoIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, + int n, const double *box, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcBondsOrthoIdxDouble)(coords, a_idx, b_idx, n, box, out); + } + HWY_DLLEXPORT template <> void CalcBondsTriclinicIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, + int n, const float *box, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcBondsTriclinicIdxSingle)(coords, a_idx, b_idx, n, box, out); + } + HWY_DLLEXPORT template <> void CalcBondsTriclinicIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, + int n, const double *box, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcBondsTriclinicIdxDouble)(coords, a_idx, b_idx, n, box, out); + } std::vector DistopiaSupportedAndGeneratedTargets() { std::vector targets = hwy::SupportedAndGeneratedTargets(); diff --git a/libdistopia/test/test_mda_match.cpp b/libdistopia/test/test_mda_match.cpp index dee4d3e2..a1b5a719 100644 --- a/libdistopia/test/test_mda_match.cpp +++ b/libdistopia/test/test_mda_match.cpp @@ -485,7 +485,7 @@ TYPED_TEST(DistanceArrayCoordinates, CalcSelfDistanceArrayTriclinicScalar) { TYPED_TEST_SUITE(CoordinatesIdx, ScalarTypes); -TYPED_TEST(CoordinatesIdx, CalcBondsIdx) { +TYPED_TEST(CoordinatesIdx, CalcBondsNoBoxIdx) { int ncoords = 250; int nidx = 100; @@ -497,6 +497,42 @@ TYPED_TEST(CoordinatesIdx, CalcBondsIdx) { // test the idx distopia::CalcBondsNoBoxIdx(this->coords, this->a_idx, this->b_idx, this->nidx, this->results); + for (std::size_t i=0; inidx; i++) { + EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); + } +} + + +TYPED_TEST(CoordinatesIdx, CalcBondsOrthoIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // reference result + distopia::CalcBondsOrtho(this->a_coords_contig, this->b_coords_contig, this->nidx, this->box, this->ref_results); + + // test the idx + distopia::CalcBondsOrthoIdx(this->coords, this->a_idx, this->b_idx, this->nidx, this->box, this->results); + + for (std::size_t i=0; inidx; i++) { + EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); + } +} + + +TYPED_TEST(CoordinatesIdx, CalcBondsTriclinicIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // reference result + distopia::CalcBondsTriclinic(this->a_coords_contig, this->b_coords_contig, this->nidx, this->triclinic_box, this->ref_results); + + // test the idx + distopia::CalcBondsTriclinicIdx(this->coords, this->a_idx, this->b_idx, this->nidx, this->triclinic_box, this->results); + for (std::size_t i=0; inidx; i++) { EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); } From e725f8cf3b56556e5331db7c4a6afccd3d4798c0 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 2 Aug 2024 16:16:50 +0100 Subject: [PATCH 06/19] boilerplate for calcanglesidx --- libdistopia/include/distopia.h | 3 ++ libdistopia/src/distopia.cpp | 80 +++++++++++++++++++++++++++++++++- 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/libdistopia/include/distopia.h b/libdistopia/include/distopia.h index e1ba1ebd..8405abe7 100644 --- a/libdistopia/include/distopia.h +++ b/libdistopia/include/distopia.h @@ -34,6 +34,9 @@ namespace distopia { template void CalcBondsNoBoxIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, int n, T *out); template void CalcBondsOrthoIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, int n, const T *box, T *out); template void CalcBondsTriclinicIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, int n, const T *box, T *out); + template void CalcAnglesNoBoxIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, int n, T *out); + template void CalcAnglesOrthoIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, int n, const T *box, T *out); + template void CalcAnglesTriclinicIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, int n, const T *box, T *out); int GetNFloatLanes(); int GetNDoubleLanes(); std::vector DistopiaSupportedAndGeneratedTargets(); diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index 6208bec7..180d8fc2 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -785,6 +785,12 @@ namespace distopia { } } + template + void CalcAnglesIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, int n, + T *out, const B &box) { + + } + void CalcBondsNoBoxDouble(const double *a, const double *b, int n, double *out) { hn::ScalableTag d; const NoBox vbox(d); @@ -992,6 +998,48 @@ namespace distopia { CalcBondsIdx(coords, a_idx, b_idx, n, out, vbox); } + void CalcAnglesNoBoxIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + int n, float *out) { + hn::ScalableTag d; + const NoBox vbox(d); + + CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); + } + void CalcAnglesNoBoxIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + int n, double *out) { + hn::ScalableTag d; + const NoBox vbox(d); + + CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); + } + void CalcAnglesOrthoIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + int n, const float *box, float *out) { + hn::ScalableTag d; + const OrthogonalBox vbox(d, box); + + CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); + } + void CalcAnglesOrthoIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + int n, const double *box, double *out) { + hn::ScalableTag d; + const OrthogonalBox vbox(d, box); + + CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); + } + void CalcAnglesTriclinicIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + int n, const float *box, float *out) { + hn::ScalableTag d; + const TriclinicBox vbox(d, box); + + CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); + } + void CalcAnglesTriclinicIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + int n, const double *box, double *out) { + hn::ScalableTag d; + const TriclinicBox vbox(d, box); + + CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); + } int GetNFloatLanes() { hn::ScalableTag d; @@ -1047,6 +1095,12 @@ namespace distopia { HWY_EXPORT(CalcBondsOrthoIdxDouble); HWY_EXPORT(CalcBondsTriclinicIdxSingle); HWY_EXPORT(CalcBondsTriclinicIdxDouble); + HWY_EXPORT(CalcAnglesNoBoxIdxSingle); + HWY_EXPORT(CalcAnglesNoBoxIdxDouble); + HWY_EXPORT(CalcAnglesOrthoIdxSingle); + HWY_EXPORT(CalcAnglesOrthoIdxDouble); + HWY_EXPORT(CalcAnglesTriclinicIdxSingle); + HWY_EXPORT(CalcAnglesTriclinicIdxDouble); HWY_EXPORT(GetNFloatLanes); HWY_EXPORT(GetNDoubleLanes); @@ -1208,8 +1262,32 @@ namespace distopia { int n, const double *box, double *out) { return HWY_DYNAMIC_DISPATCH(CalcBondsTriclinicIdxDouble)(coords, a_idx, b_idx, n, box, out); } + HWY_DLLEXPORT template <> void CalcAnglesNoBoxIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + int n, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcAnglesNoBoxIdxSingle)(coords, a_idx, b_idx, c_idx, n, out); + } + HWY_DLLEXPORT template <> void CalcAnglesNoBoxIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + int n, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcAnglesNoBoxIdxDouble)(coords, a_idx, b_idx, c_idx, n, out); + } + HWY_DLLEXPORT template <> void CalcAnglesOrthoIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + int n, const float *box, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcAnglesOrthoIdxSingle)(coords, a_idx, b_idx, c_idx, n, box, out); + } + HWY_DLLEXPORT template <> void CalcAnglesOrthoIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + int n, const double *box, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcAnglesOrthoIdxDouble)(coords, a_idx, b_idx, c_idx, n, box, out); + } + HWY_DLLEXPORT template <> void CalcAnglesTriclinicIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + int n, const float *box, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcAnglesTriclinicIdxSingle)(coords, a_idx, b_idx, c_idx, n, box, out); + } + HWY_DLLEXPORT template <> void CalcAnglesTriclinicIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + int n, const double *box, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcAnglesTriclinicIdxDouble)(coords, a_idx, b_idx, c_idx, n, box, out); + } - std::vector DistopiaSupportedAndGeneratedTargets() { + std::vector DistopiaSupportedAndGeneratedTargets() { std::vector targets = hwy::SupportedAndGeneratedTargets(); // for each print the name std::vector names; From ae6817d08b26d77b79fb392c6be9d6cb4c40ff76 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 2 Aug 2024 16:35:57 +0100 Subject: [PATCH 07/19] calcanglesidx implementation and tests --- libdistopia/src/distopia.cpp | 57 +++++++++++++++++++++++++++++ libdistopia/test/test_fixtures.h | 27 +++++++++++++- libdistopia/test/test_mda_match.cpp | 56 +++++++++++++++++++++++++++- 3 files changed, 138 insertions(+), 2 deletions(-) diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index 180d8fc2..06198b95 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -788,7 +788,64 @@ namespace distopia { template void CalcAnglesIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, int n, T *out, const B &box) { + // Logic here closely follows BondsIdx, except with an additional coordinate + const hn::ScalableTag d; + int nlanes = hn::Lanes(d); + + unsigned int a_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + unsigned int b_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + unsigned int c_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + T out_sub[HWY_MAX_LANES_D(hn::ScalableTag)]; + T *dst; + + if (HWY_UNLIKELY(n < nlanes)) { + memset(a_sub, 0, sizeof(int) * nlanes); + memset(b_sub, 0, sizeof(int) * nlanes); + memset(c_sub, 0, sizeof(int) * nlanes); + memcpy(a_sub, a_idx, sizeof(int) * n); + memcpy(b_sub, b_idx, sizeof(int) * n); + memcpy(c_sub, c_idx, sizeof(int) * n); + a_idx = a_sub; + b_idx = b_sub; + c_idx = c_sub; + dst = out_sub; + } else { + dst = out; + } + auto a_x = hn::Undefined(d); + auto a_y = hn::Undefined(d); + auto a_z = hn::Undefined(d); + auto b_x = hn::Undefined(d); + auto b_y = hn::Undefined(d); + auto b_z = hn::Undefined(d); + auto c_x = hn::Undefined(d); + auto c_y = hn::Undefined(d); + auto c_z = hn::Undefined(d); + + for (size_t i=0; i <= n - nlanes; i += nlanes) { + LoadInterleavedIdx(a_idx + i, coords, a_x, a_y, a_z); + LoadInterleavedIdx(b_idx + i, coords, b_x, b_y, b_z); + LoadInterleavedIdx(c_idx + i, coords, c_x, c_y, c_z); + + auto result = Angle(a_x, a_y, a_z, b_x, b_y, b_z, c_x, c_y, c_z, box); + + hn::StoreU(result, d, dst + i); + } + size_t rem = n % nlanes; + if (rem) { + LoadInterleavedIdx(a_idx + n - nlanes, coords, a_x, a_y, a_z); + LoadInterleavedIdx(b_idx + n - nlanes, coords, b_x, b_y, b_z); + LoadInterleavedIdx(c_idx + n - nlanes, coords, c_x, c_y, c_z); + + auto result = Angle(a_x, a_y, a_z, b_x, b_y, b_z, c_x, c_y, c_z, box); + + hn::StoreU(result, d, dst + n - nlanes); + } + + if (HWY_UNLIKELY(n < nlanes)) { + memcpy(out, out_sub, n * sizeof(T)); + } } void CalcBondsNoBoxDouble(const double *a, const double *b, int n, double *out) { diff --git a/libdistopia/test/test_fixtures.h b/libdistopia/test/test_fixtures.h index 5939c87e..bc2db44b 100644 --- a/libdistopia/test/test_fixtures.h +++ b/libdistopia/test/test_fixtures.h @@ -134,9 +134,13 @@ class CoordinatesIdx : public ::testing::Test { T *coords = nullptr; T *a_coords_contig = nullptr; T *b_coords_contig = nullptr; + T *c_coords_contig = nullptr; + T *d_coords_contig = nullptr; size_t *big_idx; unsigned int *a_idx = nullptr; unsigned int *b_idx = nullptr; + unsigned int *c_idx = nullptr; + unsigned int *d_idx = nullptr; T *ref_results = nullptr; T *results = nullptr; T box[3]; @@ -149,9 +153,13 @@ class CoordinatesIdx : public ::testing::Test { coords = new T[ncoords * 3]; a_coords_contig = new T[nidx * 3]; b_coords_contig = new T[nidx * 3]; + c_coords_contig = new T[nidx * 3]; + d_coords_contig = new T[nidx * 3]; big_idx = new size_t[nidx]; a_idx = new unsigned int[nidx]; b_idx = new unsigned int[nidx]; + c_idx = new unsigned int[nidx]; + d_idx = new unsigned int[nidx]; ref_results = new T[nidx]; results = new T[nidx]; @@ -166,13 +174,26 @@ class CoordinatesIdx : public ::testing::Test { a_coords_contig[i*3 + 2] = coords[a_idx[i] * 3 + 2]; } RandomInt(big_idx, nidx, 0, ncoords); - // copy bigidx into smaller, and also make contig coords array for (size_t i=0; inidx; i++) { EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); } -} \ No newline at end of file +} + + +TYPED_TEST(CoordinatesIdx, CalcAnglesNoBoxIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // reference result + distopia::CalcAnglesNoBox(this->a_coords_contig, this->b_coords_contig, this->c_coords_contig, this->nidx, this->ref_results); + + // test the idx + distopia::CalcAnglesNoBoxIdx(this->coords, this->a_idx, this->b_idx, this->c_idx, this->nidx, this->results); + + for (std::size_t i=0; inidx; i++) { + EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); + } +} + + +TYPED_TEST(CoordinatesIdx, CalcAnglesOrthoIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // reference result + distopia::CalcAnglesOrtho(this->a_coords_contig, this->b_coords_contig, this->c_coords_contig, this->nidx, this->box, this->ref_results); + + // test the idx + distopia::CalcAnglesOrthoIdx(this->coords, this->a_idx, this->b_idx, this->c_idx, this->nidx, this->box, this->results); + + for (std::size_t i=0; inidx; i++) { + EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); + } +} + + +TYPED_TEST(CoordinatesIdx, CalcAnglesTriclinicIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // reference result + distopia::CalcAnglesTriclinic(this->a_coords_contig, this->b_coords_contig, this->c_coords_contig, this->nidx, this->triclinic_box, this->ref_results); + + // test the idx + distopia::CalcAnglesTriclinicIdx(this->coords, this->a_idx, this->b_idx, this->c_idx, this->nidx, this->triclinic_box, this->results); + + for (std::size_t i=0; inidx; i++) { + EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); + } +} From 978386c1457dab7a91150892057264d88d656924 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Mon, 5 Aug 2024 11:42:39 +0100 Subject: [PATCH 08/19] more generic int32 to int64 conversion --- libdistopia/src/distopia.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index 06198b95..ddff483a 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -706,16 +706,16 @@ namespace distopia { HWY_INLINE void LoadInterleavedIdx(const unsigned int *idx, const double *src, V &x_dst, V &y_dst, V &z_dst) { hn::ScalableTag d; - hn::ScalableTag d_long; - hn::ScalableTag d_int; - size_t nlong_lanes = hn::Lanes(d_long); + hn::ScalableTag d_long; + // "int64_t" number of lanes but "int" type + hn::Rebind> d_int; // can't call Gather to doubles with int indices so: load int32s and cast them up to int64s so widths match // first load NLONG values from idx // !! important to not segfault here by loading LONG*2 (i.e. a full vector of 32s) !! - auto vidx_narrow = hn::LoadN(d_int, (int*)idx, nlong_lanes); + auto vidx_narrow = hn::LoadU(d_int, (int*)idx); // then cast int32 values to int64, taking only lower half of vector - auto vidx = hn::PromoteLowerTo(d_long, vidx_narrow); + auto vidx = hn::PromoteTo(d_long, vidx_narrow); auto Vthree = hn::Set(d_long, 3); auto Vone = hn::Set(d_long, 1); From cb3464237c7be5436feb5dcf71fb199eeb3c3bde Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Tue, 6 Aug 2024 10:22:19 +0100 Subject: [PATCH 09/19] calc dihedrals idx function stubs --- libdistopia/include/distopia.h | 3 ++ libdistopia/src/distopia.cpp | 77 ++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/libdistopia/include/distopia.h b/libdistopia/include/distopia.h index 8405abe7..8d0d2ed7 100644 --- a/libdistopia/include/distopia.h +++ b/libdistopia/include/distopia.h @@ -37,6 +37,9 @@ namespace distopia { template void CalcAnglesNoBoxIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, int n, T *out); template void CalcAnglesOrthoIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, int n, const T *box, T *out); template void CalcAnglesTriclinicIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, int n, const T *box, T *out); + template void CalcDihedralsNoBoxIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, int n, T *out); + template void CalcDihedralsOrthoIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, int n, const T *box, T *out); + template void CalcDihedralsTriclinicIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, int n, const T *box, T *out); int GetNFloatLanes(); int GetNDoubleLanes(); std::vector DistopiaSupportedAndGeneratedTargets(); diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index ddff483a..61ee43fb 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -848,6 +848,11 @@ namespace distopia { } } + template + void CalcDihedralsIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, int n, + T *out, const B &box) { + } + void CalcBondsNoBoxDouble(const double *a, const double *b, int n, double *out) { hn::ScalableTag d; const NoBox vbox(d); @@ -1097,6 +1102,48 @@ namespace distopia { CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); } + void CalcDihedralsNoBoxIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + int n, float *out) { + hn::ScalableTag d; + const NoBox vbox(d); + + CalcDihedralsIdx(coords, a_idx, b_idx, c_idx, d_idx, n, out, vbox); + } + void CalcDihedralsNoBoxIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + int n, double *out) { + hn::ScalableTag d; + const NoBox vbox(d); + + CalcDihedralsIdx(coords, a_idx, b_idx, c_idx, d_idx, n, out, vbox); + } + void CalcDihedralsOrthoIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + int n, const float *box, float *out) { + hn::ScalableTag d; + const OrthogonalBox vbox(d, box); + + CalcDihedralsIdx(coords, a_idx, b_idx, c_idx, d_idx, n, out, vbox); + } + void CalcDihedralsOrthoIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + int n, const double *box, double *out) { + hn::ScalableTag d; + const OrthogonalBox vbox(d, box); + + CalcDihedralsIdx(coords, a_idx, b_idx, c_idx, d_idx, n, out, vbox); + } + void CalcDihedralsTriclinicIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + int n, const float *box, float *out) { + hn::ScalableTag d; + const TriclinicBox vbox(d, box); + + CalcDihedralsIdx(coords, a_idx, b_idx, c_idx, d_idx, n, out, vbox); + } + void CalcDihedralsTriclinicIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + int n, const double *box, double *out) { + hn::ScalableTag d; + const TriclinicBox vbox(d, box); + + CalcDihedralsIdx(coords, a_idx, b_idx, c_idx, d_idx, n, out, vbox); + } int GetNFloatLanes() { hn::ScalableTag d; @@ -1158,6 +1205,12 @@ namespace distopia { HWY_EXPORT(CalcAnglesOrthoIdxDouble); HWY_EXPORT(CalcAnglesTriclinicIdxSingle); HWY_EXPORT(CalcAnglesTriclinicIdxDouble); + HWY_EXPORT(CalcDihedralsNoBoxIdxSingle); + HWY_EXPORT(CalcDihedralsNoBoxIdxDouble); + HWY_EXPORT(CalcDihedralsOrthoIdxSingle); + HWY_EXPORT(CalcDihedralsOrthoIdxDouble); + HWY_EXPORT(CalcDihedralsTriclinicIdxSingle); + HWY_EXPORT(CalcDihedralsTriclinicIdxDouble); HWY_EXPORT(GetNFloatLanes); HWY_EXPORT(GetNDoubleLanes); @@ -1343,6 +1396,30 @@ namespace distopia { int n, const double *box, double *out) { return HWY_DYNAMIC_DISPATCH(CalcAnglesTriclinicIdxDouble)(coords, a_idx, b_idx, c_idx, n, box, out); } + HWY_DLLEXPORT template <> void CalcDihedralsNoBoxIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + int n, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcDihedralsNoBoxIdxSingle)(coords, a_idx, b_idx, c_idx, d_idx, n, out); + } + HWY_DLLEXPORT template <> void CalcDihedralsNoBoxIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + int n, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcDihedralsNoBoxIdxDouble)(coords, a_idx, b_idx, c_idx, d_idx, n, out); + } + HWY_DLLEXPORT template <> void CalcDihedralsOrthoIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + int n, const float *box, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcDihedralsOrthoIdxSingle)(coords, a_idx, b_idx, c_idx, d_idx, n, box, out); + } + HWY_DLLEXPORT template <> void CalcDihedralsOrthoIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + int n, const double *box, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcDihedralsOrthoIdxDouble)(coords, a_idx, b_idx, c_idx, d_idx, n, box, out); + } + HWY_DLLEXPORT template <> void CalcDihedralsTriclinicIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + int n, const float *box, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcDihedralsTriclinicIdxSingle)(coords, a_idx, b_idx, c_idx, d_idx, n, box, out); + } + HWY_DLLEXPORT template <> void CalcDihedralsTriclinicIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + int n, const double *box, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcDihedralsTriclinicIdxDouble)(coords, a_idx, b_idx, c_idx, d_idx, n, box, out); + } std::vector DistopiaSupportedAndGeneratedTargets() { std::vector targets = hwy::SupportedAndGeneratedTargets(); From 0cfe9e618922e89f49369ee7704e8e0735377b23 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Tue, 6 Aug 2024 11:07:28 +0100 Subject: [PATCH 10/19] fix test fixture to initialise all coordinates previously only populated 1/3 of the coordinate array with values... --- libdistopia/test/test_fixtures.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdistopia/test/test_fixtures.h b/libdistopia/test/test_fixtures.h index bc2db44b..2d406eeb 100644 --- a/libdistopia/test/test_fixtures.h +++ b/libdistopia/test/test_fixtures.h @@ -163,7 +163,7 @@ class CoordinatesIdx : public ::testing::Test { ref_results = new T[nidx]; results = new T[nidx]; - RandomFloatingPoint(coords, ncoords, 0 - delta, boxsize + delta); + RandomFloatingPoint(coords, ncoords * 3, 0 - delta, boxsize + delta); RandomInt(big_idx, nidx, 0, ncoords); // copy bigidx into smaller, and also make contig coords array From 2ca3adfb96f9c714a14103076f577c2c21236ec8 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Tue, 6 Aug 2024 11:07:44 +0100 Subject: [PATCH 11/19] adds dihedrals idx implementation and tests --- libdistopia/src/distopia.cpp | 75 +++++++++++++++++++++++++++++ libdistopia/test/test_mda_match.cpp | 54 +++++++++++++++++++++ 2 files changed, 129 insertions(+) diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index 61ee43fb..944d23b2 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -851,6 +851,81 @@ namespace distopia { template void CalcDihedralsIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, int n, T *out, const B &box) { + // Logic here closely follows BondsIdx, except with an additional coordinate + const hn::ScalableTag d; + int nlanes = hn::Lanes(d); + + unsigned int a_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + unsigned int b_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + unsigned int c_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + unsigned int d_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + T out_sub[HWY_MAX_LANES_D(hn::ScalableTag)]; + T *dst; + + if (HWY_UNLIKELY(n < nlanes)) { + memset(a_sub, 0, sizeof(int) * nlanes); + memset(b_sub, 0, sizeof(int) * nlanes); + memset(c_sub, 0, sizeof(int) * nlanes); + memset(d_sub, 0, sizeof(int) * nlanes); + memcpy(a_sub, a_idx, sizeof(int) * n); + memcpy(b_sub, b_idx, sizeof(int) * n); + memcpy(c_sub, c_idx, sizeof(int) * n); + memcpy(d_sub, d_idx, sizeof(int) * n); + a_idx = a_sub; + b_idx = b_sub; + c_idx = c_sub; + d_idx = d_sub; + dst = out_sub; + } else { + dst = out; + } + + auto a_x = hn::Undefined(d); + auto a_y = hn::Undefined(d); + auto a_z = hn::Undefined(d); + auto b_x = hn::Undefined(d); + auto b_y = hn::Undefined(d); + auto b_z = hn::Undefined(d); + auto c_x = hn::Undefined(d); + auto c_y = hn::Undefined(d); + auto c_z = hn::Undefined(d); + auto d_x = hn::Undefined(d); + auto d_y = hn::Undefined(d); + auto d_z = hn::Undefined(d); + + for (size_t i=0; i <= n - nlanes; i += nlanes) { + LoadInterleavedIdx(a_idx + i, coords, a_x, a_y, a_z); + LoadInterleavedIdx(b_idx + i, coords, b_x, b_y, b_z); + LoadInterleavedIdx(c_idx + i, coords, c_x, c_y, c_z); + LoadInterleavedIdx(d_idx + i, coords, d_x, d_y, d_z); + + auto result = Dihedral(a_x, a_y, a_z, + b_x, b_y, b_z, + c_x, c_y, c_z, + d_x, d_y, d_z, + box); + + hn::StoreU(result, d, dst + i); + } + size_t rem = n % nlanes; + if (rem) { + LoadInterleavedIdx(a_idx + n - nlanes, coords, a_x, a_y, a_z); + LoadInterleavedIdx(b_idx + n - nlanes, coords, b_x, b_y, b_z); + LoadInterleavedIdx(c_idx + n - nlanes, coords, c_x, c_y, c_z); + LoadInterleavedIdx(d_idx + n - nlanes, coords, d_x, d_y, d_z); + + auto result = Dihedral(a_x, a_y, a_z, + b_x, b_y, b_z, + c_x, c_y, c_z, + d_x, d_y, d_z, + box); + + hn::StoreU(result, d, dst + n - nlanes); + } + + if (HWY_UNLIKELY(n < nlanes)) { + memcpy(out, out_sub, n * sizeof(T)); + } } void CalcBondsNoBoxDouble(const double *a, const double *b, int n, double *out) { diff --git a/libdistopia/test/test_mda_match.cpp b/libdistopia/test/test_mda_match.cpp index 4fe1bdf2..637bba0b 100644 --- a/libdistopia/test/test_mda_match.cpp +++ b/libdistopia/test/test_mda_match.cpp @@ -591,3 +591,57 @@ TYPED_TEST(CoordinatesIdx, CalcAnglesTriclinicIdx) { EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); } } + + +TYPED_TEST(CoordinatesIdx, CalcDihedralsNoBoxIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // reference result + distopia::CalcDihedralsNoBox(this->a_coords_contig, this->b_coords_contig, this->c_coords_contig, this->d_coords_contig, this->nidx, this->ref_results); + + // test the idx + distopia::CalcDihedralsNoBoxIdx(this->coords, this->a_idx, this->b_idx, this->c_idx, this->d_idx, this->nidx, this->results); + + for (std::size_t i=0; inidx; i++) { + EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); + } +} + + +TYPED_TEST(CoordinatesIdx, CalcDihedralsOrthoIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // reference result + distopia::CalcDihedralsOrtho(this->a_coords_contig, this->b_coords_contig, this->c_coords_contig, this->d_coords_contig, this->nidx, this->box, this->ref_results); + + // test the idx + distopia::CalcDihedralsOrthoIdx(this->coords, this->a_idx, this->b_idx, this->c_idx, this->d_idx, this->nidx, this->box, this->results); + + for (std::size_t i=0; inidx; i++) { + EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); + } +} + + +TYPED_TEST(CoordinatesIdx, CalcDihedralsTriclinicIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // reference result + distopia::CalcDihedralsTriclinic(this->a_coords_contig, this->b_coords_contig, this->c_coords_contig, this->d_coords_contig, this->nidx, this->triclinic_box, this->ref_results); + + // test the idx + distopia::CalcDihedralsTriclinicIdx(this->coords, this->a_idx, this->b_idx, this->c_idx, this->d_idx, this->nidx, this->triclinic_box, this->results); + + for (std::size_t i=0; inidx; i++) { + EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); + } +} \ No newline at end of file From a52b01539b4063c86d6b576e2919e829cc19f25c Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Tue, 6 Aug 2024 14:55:38 +0100 Subject: [PATCH 12/19] function stubs for distance array idx --- libdistopia/include/distopia.h | 3 ++ libdistopia/src/distopia.cpp | 66 ++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/libdistopia/include/distopia.h b/libdistopia/include/distopia.h index 8d0d2ed7..472ab2c4 100644 --- a/libdistopia/include/distopia.h +++ b/libdistopia/include/distopia.h @@ -40,6 +40,9 @@ namespace distopia { template void CalcDihedralsNoBoxIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, int n, T *out); template void CalcDihedralsOrthoIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, int n, const T *box, T *out); template void CalcDihedralsTriclinicIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, int n, const T *box, T *out); + template void CalcDistanceArrayNoBoxIdx(const T *coords, const int *a_idx, const int *b_idx, int na, int nb, T *out); + template void CalcDistanceArrayOrthoIdx(const T *coords, const int *a_idx, const int *b_idx, int na, int nb, const T *box, T *out); + template void CalcDistanceArrayTriclinicIdx(const T *coords, const int *a_idx, const int *b_idx, int na, int nb, const T *box, T *out); int GetNFloatLanes(); int GetNDoubleLanes(); std::vector DistopiaSupportedAndGeneratedTargets(); diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index 944d23b2..46828562 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -928,6 +928,12 @@ namespace distopia { } } + template + void CalcDistanceArrayIdx(const T *coords, const int *a_idx, const int *b_idx, int na, int nb, + T *out, const B &box) { + + } + void CalcBondsNoBoxDouble(const double *a, const double *b, int n, double *out) { hn::ScalableTag d; const NoBox vbox(d); @@ -1219,6 +1225,42 @@ namespace distopia { CalcDihedralsIdx(coords, a_idx, b_idx, c_idx, d_idx, n, out, vbox); } + void CalcDistanceArrayNoBoxIdxSingle(const float *coords, const int *a_idx, const int *b_idx, int na, int nb, float *out) { + hn::ScalableTag d; + const NoBox vbox(d); + + CalcDistanceArrayIdx(coords, a_idx, b_idx, na, nb, out, vbox); + } + void CalcDistanceArrayNoBoxIdxDouble(const double *coords, const int *a_idx, const int *b_idx, int na, int nb, double *out) { + hn::ScalableTag d; + const NoBox vbox(d); + + CalcDistanceArrayIdx(coords, a_idx, b_idx, na, nb, out, vbox); + } + void CalcDistanceArrayOrthoIdxSingle(const float *coords, const int *a_idx, const int *b_idx, int na, int nb, const float *box, float *out) { + hn::ScalableTag d; + const OrthogonalBox vbox(d, box); + + CalcDistanceArrayIdx(coords, a_idx, b_idx, na, nb, out, vbox); + } + void CalcDistanceArrayOrthoIdxDouble(const double *coords, const int *a_idx, const int *b_idx, int na, int nb, const double *box, double *out) { + hn::ScalableTag d; + const OrthogonalBox vbox(d, box); + + CalcDistanceArrayIdx(coords, a_idx, b_idx, na, nb, out, vbox); + } + void CalcDistanceArrayTriclinicIdxSingle(const float *coords, const int *a_idx, const int *b_idx, int na, int nb, const float *box, float *out) { + hn::ScalableTag d; + const TriclinicBox vbox(d, box); + + CalcDistanceArrayIdx(coords, a_idx, b_idx, na, nb, out, vbox); + } + void CalcDistanceArrayTriclinicIdxDouble(const double *coords, const int *a_idx, const int *b_idx, int na, int nb, const double *box, double *out) { + hn::ScalableTag d; + const TriclinicBox vbox(d, box); + + CalcDistanceArrayIdx(coords, a_idx, b_idx, na, nb, out, vbox); + } int GetNFloatLanes() { hn::ScalableTag d; @@ -1286,6 +1328,12 @@ namespace distopia { HWY_EXPORT(CalcDihedralsOrthoIdxDouble); HWY_EXPORT(CalcDihedralsTriclinicIdxSingle); HWY_EXPORT(CalcDihedralsTriclinicIdxDouble); + HWY_EXPORT(CalcDistanceArrayNoBoxIdxSingle); + HWY_EXPORT(CalcDistanceArrayNoBoxIdxDouble); + HWY_EXPORT(CalcDistanceArrayOrthoIdxSingle); + HWY_EXPORT(CalcDistanceArrayOrthoIdxDouble); + HWY_EXPORT(CalcDistanceArrayTriclinicIdxSingle); + HWY_EXPORT(CalcDistanceArrayTriclinicIdxDouble); HWY_EXPORT(GetNFloatLanes); HWY_EXPORT(GetNDoubleLanes); @@ -1495,6 +1543,24 @@ namespace distopia { int n, const double *box, double *out) { return HWY_DYNAMIC_DISPATCH(CalcDihedralsTriclinicIdxDouble)(coords, a_idx, b_idx, c_idx, d_idx, n, box, out); } + HWY_DLLEXPORT template <> void CalcDistanceArrayNoBoxIdx(const float *coords, const int *a_idx, const int *b_idx, int na, int nb, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayNoBoxIdxSingle)(coords, a_idx, b_idx, na, nb, out); + } + HWY_DLLEXPORT template <> void CalcDistanceArrayNoBoxIdx(const double *coords, const int *a_idx, const int *b_idx, int na, int nb, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayNoBoxIdxDouble)(coords, a_idx, b_idx, na, nb, out); + } + HWY_DLLEXPORT template <> void CalcDistanceArrayOrthoIdx(const float *coords, const int *a_idx, const int *b_idx, int na, int nb, const float *box, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayOrthoIdxSingle)(coords, a_idx, b_idx, na, nb, box, out); + } + HWY_DLLEXPORT template <> void CalcDistanceArrayOrthoIdx(const double *coords, const int *a_idx, const int *b_idx, int na, int nb, const double *box, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayOrthoIdxDouble)(coords, a_idx, b_idx, na, nb, box, out); + } + HWY_DLLEXPORT template <> void CalcDistanceArrayTriclinicIdx(const float *coords, const int *a_idx, const int *b_idx, int na, int nb, const float *box, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayTriclinicIdxSingle)(coords, a_idx, b_idx, na, nb, box, out); + } + HWY_DLLEXPORT template <> void CalcDistanceArrayTriclinicIdx(const double *coords, const int *a_idx, const int *b_idx, int na, int nb, const double *box, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayTriclinicIdxDouble)(coords, a_idx, b_idx, na, nb, box, out); + } std::vector DistopiaSupportedAndGeneratedTargets() { std::vector targets = hwy::SupportedAndGeneratedTargets(); From ef57f2feebe11bdcb57f30fa981e60a368dfe010 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Tue, 6 Aug 2024 15:13:40 +0100 Subject: [PATCH 13/19] change to int* not uint* for idx functions loading ints takes int anyway, so don't confuse signed-ness of these --- libdistopia/include/distopia.h | 18 +++--- libdistopia/src/distopia.cpp | 104 +++++++++++++++---------------- libdistopia/test/test_fixtures.h | 16 ++--- 3 files changed, 69 insertions(+), 69 deletions(-) diff --git a/libdistopia/include/distopia.h b/libdistopia/include/distopia.h index 472ab2c4..f640819f 100644 --- a/libdistopia/include/distopia.h +++ b/libdistopia/include/distopia.h @@ -31,15 +31,15 @@ namespace distopia { template void CalcSelfDistanceArrayNoBox(const T *a, int n, T *out); template void CalcSelfDistanceArrayOrtho(const T *a, int n, const T *box, T *out); template void CalcSelfDistanceArrayTriclinic(const T *a, int n, const T *box, T *out); - template void CalcBondsNoBoxIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, int n, T *out); - template void CalcBondsOrthoIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, int n, const T *box, T *out); - template void CalcBondsTriclinicIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, int n, const T *box, T *out); - template void CalcAnglesNoBoxIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, int n, T *out); - template void CalcAnglesOrthoIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, int n, const T *box, T *out); - template void CalcAnglesTriclinicIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, int n, const T *box, T *out); - template void CalcDihedralsNoBoxIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, int n, T *out); - template void CalcDihedralsOrthoIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, int n, const T *box, T *out); - template void CalcDihedralsTriclinicIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, int n, const T *box, T *out); + template void CalcBondsNoBoxIdx(const T *coords, const int *a_idx, const int *b_idx, int n, T *out); + template void CalcBondsOrthoIdx(const T *coords, const int *a_idx, const int *b_idx, int n, const T *box, T *out); + template void CalcBondsTriclinicIdx(const T *coords, const int *a_idx, const int *b_idx, int n, const T *box, T *out); + template void CalcAnglesNoBoxIdx(const T *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, T *out); + template void CalcAnglesOrthoIdx(const T *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, const T *box, T *out); + template void CalcAnglesTriclinicIdx(const T *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, const T *box, T *out); + template void CalcDihedralsNoBoxIdx(const T *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, T *out); + template void CalcDihedralsOrthoIdx(const T *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, const T *box, T *out); + template void CalcDihedralsTriclinicIdx(const T *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, const T *box, T *out); template void CalcDistanceArrayNoBoxIdx(const T *coords, const int *a_idx, const int *b_idx, int na, int nb, T *out); template void CalcDistanceArrayOrthoIdx(const T *coords, const int *a_idx, const int *b_idx, int na, int nb, const T *box, T *out); template void CalcDistanceArrayTriclinicIdx(const T *coords, const int *a_idx, const int *b_idx, int na, int nb, const T *box, T *out); diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index 46828562..fdeca9ff 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -686,11 +686,11 @@ namespace distopia { } template - HWY_INLINE void LoadInterleavedIdx(const unsigned int *idx, const float *src, + HWY_INLINE void LoadInterleavedIdx(const int *idx, const float *src, V &x_dst, V &y_dst, V &z_dst) { hn::ScalableTag d; hn::ScalableTag d_int; - auto vidx = hn::LoadU(d_int, (int*)idx); + auto vidx = hn::LoadU(d_int, idx); auto Vthree = hn::Set(d_int, 3); auto Vone = hn::Set(d_int, 1); // convert indices to 3D coordinate indices, i.e. index 10 at pointer 30 etc @@ -703,7 +703,7 @@ namespace distopia { } template - HWY_INLINE void LoadInterleavedIdx(const unsigned int *idx, const double *src, + HWY_INLINE void LoadInterleavedIdx(const int *idx, const double *src, V &x_dst, V &y_dst, V &z_dst) { hn::ScalableTag d; hn::ScalableTag d_long; @@ -713,7 +713,7 @@ namespace distopia { // can't call Gather to doubles with int indices so: load int32s and cast them up to int64s so widths match // first load NLONG values from idx // !! important to not segfault here by loading LONG*2 (i.e. a full vector of 32s) !! - auto vidx_narrow = hn::LoadU(d_int, (int*)idx); + auto vidx_narrow = hn::LoadU(d_int, idx); // then cast int32 values to int64, taking only lower half of vector auto vidx = hn::PromoteTo(d_long, vidx_narrow); auto Vthree = hn::Set(d_long, 3); @@ -728,13 +728,13 @@ namespace distopia { } template - void CalcBondsIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, int n, + void CalcBondsIdx(const T *coords, const int *a_idx, const int *b_idx, int n, T *out, const B &box) { const hn::ScalableTag d; int nlanes = hn::Lanes(d); - unsigned int a_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; - unsigned int b_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + int a_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + int b_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; T out_sub[HWY_MAX_LANES_D(hn::ScalableTag)]; T *dst; @@ -786,15 +786,15 @@ namespace distopia { } template - void CalcAnglesIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, int n, + void CalcAnglesIdx(const T *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, T *out, const B &box) { // Logic here closely follows BondsIdx, except with an additional coordinate const hn::ScalableTag d; int nlanes = hn::Lanes(d); - unsigned int a_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; - unsigned int b_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; - unsigned int c_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + int a_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + int b_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + int c_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; T out_sub[HWY_MAX_LANES_D(hn::ScalableTag)]; T *dst; @@ -849,16 +849,16 @@ namespace distopia { } template - void CalcDihedralsIdx(const T *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, int n, + void CalcDihedralsIdx(const T *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, T *out, const B &box) { // Logic here closely follows BondsIdx, except with an additional coordinate const hn::ScalableTag d; int nlanes = hn::Lanes(d); - unsigned int a_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; - unsigned int b_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; - unsigned int c_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; - unsigned int d_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + int a_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + int b_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + int c_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; + int d_sub[3 * HWY_MAX_LANES_D(hn::ScalableTag)]; T out_sub[HWY_MAX_LANES_D(hn::ScalableTag)]; T *dst; @@ -1101,124 +1101,124 @@ namespace distopia { const TriclinicBox vbox(d, box); CalcSelfDistanceArray(a, n, out, vbox); } - void CalcBondsNoBoxIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, + void CalcBondsNoBoxIdxSingle(const float *coords, const int *a_idx, const int *b_idx, int n, float *out) { hn::ScalableTag d; const NoBox box(d); CalcBondsIdx(coords, a_idx, b_idx, n, out, box); } - void CalcBondsNoBoxIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, + void CalcBondsNoBoxIdxDouble(const double *coords, const int *a_idx, const int *b_idx, int n, double *out) { hn::ScalableTag d; const NoBox box(d); CalcBondsIdx(coords, a_idx, b_idx, n, out, box); } - void CalcBondsOrthoIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, + void CalcBondsOrthoIdxSingle(const float *coords, const int *a_idx, const int *b_idx, int n, const float *box, float *out) { hn::ScalableTag d; const OrthogonalBox vbox(d, box); CalcBondsIdx(coords, a_idx, b_idx, n, out, vbox); } - void CalcBondsOrthoIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, + void CalcBondsOrthoIdxDouble(const double *coords, const int *a_idx, const int *b_idx, int n, const double *box, double *out) { hn::ScalableTag d; const OrthogonalBox vbox(d, box); CalcBondsIdx(coords, a_idx, b_idx, n, out, vbox); } - void CalcBondsTriclinicIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, + void CalcBondsTriclinicIdxSingle(const float *coords, const int *a_idx, const int *b_idx, int n, const float *box, float *out) { hn::ScalableTag d; const TriclinicBox vbox(d, box); CalcBondsIdx(coords, a_idx, b_idx, n, out, vbox); } - void CalcBondsTriclinicIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, + void CalcBondsTriclinicIdxDouble(const double *coords, const int *a_idx, const int *b_idx, int n, const double *box, double *out) { hn::ScalableTag d; const TriclinicBox vbox(d, box); CalcBondsIdx(coords, a_idx, b_idx, n, out, vbox); } - void CalcAnglesNoBoxIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + void CalcAnglesNoBoxIdxSingle(const float *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, float *out) { hn::ScalableTag d; const NoBox vbox(d); CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); } - void CalcAnglesNoBoxIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + void CalcAnglesNoBoxIdxDouble(const double *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, double *out) { hn::ScalableTag d; const NoBox vbox(d); CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); } - void CalcAnglesOrthoIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + void CalcAnglesOrthoIdxSingle(const float *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, const float *box, float *out) { hn::ScalableTag d; const OrthogonalBox vbox(d, box); CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); } - void CalcAnglesOrthoIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + void CalcAnglesOrthoIdxDouble(const double *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, const double *box, double *out) { hn::ScalableTag d; const OrthogonalBox vbox(d, box); CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); } - void CalcAnglesTriclinicIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + void CalcAnglesTriclinicIdxSingle(const float *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, const float *box, float *out) { hn::ScalableTag d; const TriclinicBox vbox(d, box); CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); } - void CalcAnglesTriclinicIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + void CalcAnglesTriclinicIdxDouble(const double *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, const double *box, double *out) { hn::ScalableTag d; const TriclinicBox vbox(d, box); CalcAnglesIdx(coords, a_idx, b_idx, c_idx, n, out, vbox); } - void CalcDihedralsNoBoxIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + void CalcDihedralsNoBoxIdxSingle(const float *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, float *out) { hn::ScalableTag d; const NoBox vbox(d); CalcDihedralsIdx(coords, a_idx, b_idx, c_idx, d_idx, n, out, vbox); } - void CalcDihedralsNoBoxIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + void CalcDihedralsNoBoxIdxDouble(const double *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, double *out) { hn::ScalableTag d; const NoBox vbox(d); CalcDihedralsIdx(coords, a_idx, b_idx, c_idx, d_idx, n, out, vbox); } - void CalcDihedralsOrthoIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + void CalcDihedralsOrthoIdxSingle(const float *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, const float *box, float *out) { hn::ScalableTag d; const OrthogonalBox vbox(d, box); CalcDihedralsIdx(coords, a_idx, b_idx, c_idx, d_idx, n, out, vbox); } - void CalcDihedralsOrthoIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + void CalcDihedralsOrthoIdxDouble(const double *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, const double *box, double *out) { hn::ScalableTag d; const OrthogonalBox vbox(d, box); CalcDihedralsIdx(coords, a_idx, b_idx, c_idx, d_idx, n, out, vbox); } - void CalcDihedralsTriclinicIdxSingle(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + void CalcDihedralsTriclinicIdxSingle(const float *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, const float *box, float *out) { hn::ScalableTag d; const TriclinicBox vbox(d, box); CalcDihedralsIdx(coords, a_idx, b_idx, c_idx, d_idx, n, out, vbox); } - void CalcDihedralsTriclinicIdxDouble(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + void CalcDihedralsTriclinicIdxDouble(const double *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, const double *box, double *out) { hn::ScalableTag d; const TriclinicBox vbox(d, box); @@ -1471,75 +1471,75 @@ namespace distopia { } return HWY_DYNAMIC_DISPATCH(CalcSelfDistanceArrayTriclinicDouble)(a, n, box, out); } - HWY_DLLEXPORT template <> void CalcBondsNoBoxIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, + HWY_DLLEXPORT template <> void CalcBondsNoBoxIdx(const float *coords, const int *a_idx, const int *b_idx, int n, float *out) { return HWY_DYNAMIC_DISPATCH(CalcBondsNoBoxIdxSingle)(coords, a_idx, b_idx, n, out); } - HWY_DLLEXPORT template <> void CalcBondsNoBoxIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, + HWY_DLLEXPORT template <> void CalcBondsNoBoxIdx(const double *coords, const int *a_idx, const int *b_idx, int n, double *out) { return HWY_DYNAMIC_DISPATCH(CalcBondsNoBoxIdxDouble)(coords, a_idx, b_idx, n, out); } - HWY_DLLEXPORT template <> void CalcBondsOrthoIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, + HWY_DLLEXPORT template <> void CalcBondsOrthoIdx(const float *coords, const int *a_idx, const int *b_idx, int n, const float *box, float *out) { return HWY_DYNAMIC_DISPATCH(CalcBondsOrthoIdxSingle)(coords, a_idx, b_idx, n, box, out); } - HWY_DLLEXPORT template <> void CalcBondsOrthoIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, + HWY_DLLEXPORT template <> void CalcBondsOrthoIdx(const double *coords, const int *a_idx, const int *b_idx, int n, const double *box, double *out) { return HWY_DYNAMIC_DISPATCH(CalcBondsOrthoIdxDouble)(coords, a_idx, b_idx, n, box, out); } - HWY_DLLEXPORT template <> void CalcBondsTriclinicIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, + HWY_DLLEXPORT template <> void CalcBondsTriclinicIdx(const float *coords, const int *a_idx, const int *b_idx, int n, const float *box, float *out) { return HWY_DYNAMIC_DISPATCH(CalcBondsTriclinicIdxSingle)(coords, a_idx, b_idx, n, box, out); } - HWY_DLLEXPORT template <> void CalcBondsTriclinicIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, + HWY_DLLEXPORT template <> void CalcBondsTriclinicIdx(const double *coords, const int *a_idx, const int *b_idx, int n, const double *box, double *out) { return HWY_DYNAMIC_DISPATCH(CalcBondsTriclinicIdxDouble)(coords, a_idx, b_idx, n, box, out); } - HWY_DLLEXPORT template <> void CalcAnglesNoBoxIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + HWY_DLLEXPORT template <> void CalcAnglesNoBoxIdx(const float *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, float *out) { return HWY_DYNAMIC_DISPATCH(CalcAnglesNoBoxIdxSingle)(coords, a_idx, b_idx, c_idx, n, out); } - HWY_DLLEXPORT template <> void CalcAnglesNoBoxIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + HWY_DLLEXPORT template <> void CalcAnglesNoBoxIdx(const double *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, double *out) { return HWY_DYNAMIC_DISPATCH(CalcAnglesNoBoxIdxDouble)(coords, a_idx, b_idx, c_idx, n, out); } - HWY_DLLEXPORT template <> void CalcAnglesOrthoIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + HWY_DLLEXPORT template <> void CalcAnglesOrthoIdx(const float *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, const float *box, float *out) { return HWY_DYNAMIC_DISPATCH(CalcAnglesOrthoIdxSingle)(coords, a_idx, b_idx, c_idx, n, box, out); } - HWY_DLLEXPORT template <> void CalcAnglesOrthoIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + HWY_DLLEXPORT template <> void CalcAnglesOrthoIdx(const double *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, const double *box, double *out) { return HWY_DYNAMIC_DISPATCH(CalcAnglesOrthoIdxDouble)(coords, a_idx, b_idx, c_idx, n, box, out); } - HWY_DLLEXPORT template <> void CalcAnglesTriclinicIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + HWY_DLLEXPORT template <> void CalcAnglesTriclinicIdx(const float *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, const float *box, float *out) { return HWY_DYNAMIC_DISPATCH(CalcAnglesTriclinicIdxSingle)(coords, a_idx, b_idx, c_idx, n, box, out); } - HWY_DLLEXPORT template <> void CalcAnglesTriclinicIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, + HWY_DLLEXPORT template <> void CalcAnglesTriclinicIdx(const double *coords, const int *a_idx, const int *b_idx, const int *c_idx, int n, const double *box, double *out) { return HWY_DYNAMIC_DISPATCH(CalcAnglesTriclinicIdxDouble)(coords, a_idx, b_idx, c_idx, n, box, out); } - HWY_DLLEXPORT template <> void CalcDihedralsNoBoxIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + HWY_DLLEXPORT template <> void CalcDihedralsNoBoxIdx(const float *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, float *out) { return HWY_DYNAMIC_DISPATCH(CalcDihedralsNoBoxIdxSingle)(coords, a_idx, b_idx, c_idx, d_idx, n, out); } - HWY_DLLEXPORT template <> void CalcDihedralsNoBoxIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + HWY_DLLEXPORT template <> void CalcDihedralsNoBoxIdx(const double *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, double *out) { return HWY_DYNAMIC_DISPATCH(CalcDihedralsNoBoxIdxDouble)(coords, a_idx, b_idx, c_idx, d_idx, n, out); } - HWY_DLLEXPORT template <> void CalcDihedralsOrthoIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + HWY_DLLEXPORT template <> void CalcDihedralsOrthoIdx(const float *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, const float *box, float *out) { return HWY_DYNAMIC_DISPATCH(CalcDihedralsOrthoIdxSingle)(coords, a_idx, b_idx, c_idx, d_idx, n, box, out); } - HWY_DLLEXPORT template <> void CalcDihedralsOrthoIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + HWY_DLLEXPORT template <> void CalcDihedralsOrthoIdx(const double *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, const double *box, double *out) { return HWY_DYNAMIC_DISPATCH(CalcDihedralsOrthoIdxDouble)(coords, a_idx, b_idx, c_idx, d_idx, n, box, out); } - HWY_DLLEXPORT template <> void CalcDihedralsTriclinicIdx(const float *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + HWY_DLLEXPORT template <> void CalcDihedralsTriclinicIdx(const float *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, const float *box, float *out) { return HWY_DYNAMIC_DISPATCH(CalcDihedralsTriclinicIdxSingle)(coords, a_idx, b_idx, c_idx, d_idx, n, box, out); } - HWY_DLLEXPORT template <> void CalcDihedralsTriclinicIdx(const double *coords, const unsigned int *a_idx, const unsigned int *b_idx, const unsigned int *c_idx, const unsigned int *d_idx, + HWY_DLLEXPORT template <> void CalcDihedralsTriclinicIdx(const double *coords, const int *a_idx, const int *b_idx, const int *c_idx, const int *d_idx, int n, const double *box, double *out) { return HWY_DYNAMIC_DISPATCH(CalcDihedralsTriclinicIdxDouble)(coords, a_idx, b_idx, c_idx, d_idx, n, box, out); } diff --git a/libdistopia/test/test_fixtures.h b/libdistopia/test/test_fixtures.h index 2d406eeb..395aec93 100644 --- a/libdistopia/test/test_fixtures.h +++ b/libdistopia/test/test_fixtures.h @@ -137,10 +137,10 @@ class CoordinatesIdx : public ::testing::Test { T *c_coords_contig = nullptr; T *d_coords_contig = nullptr; size_t *big_idx; - unsigned int *a_idx = nullptr; - unsigned int *b_idx = nullptr; - unsigned int *c_idx = nullptr; - unsigned int *d_idx = nullptr; + int *a_idx = nullptr; + int *b_idx = nullptr; + int *c_idx = nullptr; + int *d_idx = nullptr; T *ref_results = nullptr; T *results = nullptr; T box[3]; @@ -156,10 +156,10 @@ class CoordinatesIdx : public ::testing::Test { c_coords_contig = new T[nidx * 3]; d_coords_contig = new T[nidx * 3]; big_idx = new size_t[nidx]; - a_idx = new unsigned int[nidx]; - b_idx = new unsigned int[nidx]; - c_idx = new unsigned int[nidx]; - d_idx = new unsigned int[nidx]; + a_idx = new int[nidx]; + b_idx = new int[nidx]; + c_idx = new int[nidx]; + d_idx = new int[nidx]; ref_results = new T[nidx]; results = new T[nidx]; From 8ee6d25e14fa2189e7f6c7456c59c684a57ffc39 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Tue, 6 Aug 2024 15:32:46 +0100 Subject: [PATCH 14/19] push DAidx down scalar path for small problem sizes --- libdistopia/src/distopia.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index fdeca9ff..97869dd6 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -1544,21 +1544,39 @@ namespace distopia { return HWY_DYNAMIC_DISPATCH(CalcDihedralsTriclinicIdxDouble)(coords, a_idx, b_idx, c_idx, d_idx, n, box, out); } HWY_DLLEXPORT template <> void CalcDistanceArrayNoBoxIdx(const float *coords, const int *a_idx, const int *b_idx, int na, int nb, float *out) { + if (nb < GetNFloatLanes()) { + return distopia::N_SCALAR::CalcDistanceArrayNoBoxIdxSingle(coords, a_idx, b_idx, na, nb, out); + } return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayNoBoxIdxSingle)(coords, a_idx, b_idx, na, nb, out); } HWY_DLLEXPORT template <> void CalcDistanceArrayNoBoxIdx(const double *coords, const int *a_idx, const int *b_idx, int na, int nb, double *out) { + if (nb < GetNFloatLanes()) { + return distopia::N_SCALAR::CalcDistanceArrayNoBoxIdxDouble(coords, a_idx, b_idx, na, nb, out); + } return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayNoBoxIdxDouble)(coords, a_idx, b_idx, na, nb, out); } HWY_DLLEXPORT template <> void CalcDistanceArrayOrthoIdx(const float *coords, const int *a_idx, const int *b_idx, int na, int nb, const float *box, float *out) { + if (nb < GetNFloatLanes()) { + return distopia::N_SCALAR::CalcDistanceArrayOrthoIdxSingle(coords, a_idx, b_idx, na, nb, box, out); + } return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayOrthoIdxSingle)(coords, a_idx, b_idx, na, nb, box, out); } HWY_DLLEXPORT template <> void CalcDistanceArrayOrthoIdx(const double *coords, const int *a_idx, const int *b_idx, int na, int nb, const double *box, double *out) { + if (nb < GetNFloatLanes()) { + return distopia::N_SCALAR::CalcDistanceArrayOrthoIdxDouble(coords, a_idx, b_idx, na, nb, box, out); + } return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayOrthoIdxDouble)(coords, a_idx, b_idx, na, nb, box, out); } HWY_DLLEXPORT template <> void CalcDistanceArrayTriclinicIdx(const float *coords, const int *a_idx, const int *b_idx, int na, int nb, const float *box, float *out) { + if (nb < GetNFloatLanes()) { + return distopia::N_SCALAR::CalcDistanceArrayTriclinicIdxSingle(coords, a_idx, b_idx, na, nb, box, out); + } return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayTriclinicIdxSingle)(coords, a_idx, b_idx, na, nb, box, out); } HWY_DLLEXPORT template <> void CalcDistanceArrayTriclinicIdx(const double *coords, const int *a_idx, const int *b_idx, int na, int nb, const double *box, double *out) { + if (nb < GetNFloatLanes()) { + return distopia::N_SCALAR::CalcDistanceArrayTriclinicIdxDouble(coords, a_idx, b_idx, na, nb, box, out); + } return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayTriclinicIdxDouble)(coords, a_idx, b_idx, na, nb, box, out); } From dcfd3204270992ec66a250f2a48f8cecea997c5f Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Tue, 6 Aug 2024 15:33:33 +0100 Subject: [PATCH 15/19] DAidx implementation --- libdistopia/src/distopia.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index 97869dd6..260767d9 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -931,7 +931,37 @@ namespace distopia { template void CalcDistanceArrayIdx(const T *coords, const int *a_idx, const int *b_idx, int na, int nb, T *out, const B &box) { + hn::ScalableTag d; + + int nlanes = hn::Lanes(d); + auto b_x = hn::Undefined(d); + auto b_y = hn::Undefined(d); + auto b_z = hn::Undefined(d); + + size_t rem = nb % nlanes; + for (size_t i=0; i Date: Tue, 6 Aug 2024 15:39:20 +0100 Subject: [PATCH 16/19] embiggen the idx fixture to allow distance array results nidx * nidx shouldn't be too large... --- libdistopia/test/test_fixtures.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libdistopia/test/test_fixtures.h b/libdistopia/test/test_fixtures.h index 395aec93..ce87075a 100644 --- a/libdistopia/test/test_fixtures.h +++ b/libdistopia/test/test_fixtures.h @@ -160,8 +160,8 @@ class CoordinatesIdx : public ::testing::Test { b_idx = new int[nidx]; c_idx = new int[nidx]; d_idx = new int[nidx]; - ref_results = new T[nidx]; - results = new T[nidx]; + ref_results = new T[nidx * nidx]; + results = new T[nidx * nidx]; RandomFloatingPoint(coords, ncoords * 3, 0 - delta, boxsize + delta); From 6d6be9f70f31ae8ce34b4567e4571d2890a25f8a Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Tue, 6 Aug 2024 15:39:26 +0100 Subject: [PATCH 17/19] DAidx tests --- libdistopia/test/test_mda_match.cpp | 60 +++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/libdistopia/test/test_mda_match.cpp b/libdistopia/test/test_mda_match.cpp index 637bba0b..43202a38 100644 --- a/libdistopia/test/test_mda_match.cpp +++ b/libdistopia/test/test_mda_match.cpp @@ -644,4 +644,64 @@ TYPED_TEST(CoordinatesIdx, CalcDihedralsTriclinicIdx) { for (std::size_t i=0; inidx; i++) { EXPECT_NEAR(this->ref_results[i], this->results[i], abs_err); } +} + + +TYPED_TEST(CoordinatesIdx, DistanceArrayNoBoxIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // ref + distopia::CalcDistanceArrayNoBox(this->a_coords_contig, this->b_coords_contig, this->nidx, this->nidx, this->ref_results); + + // test + distopia::CalcDistanceArrayNoBoxIdx(this->coords, this->a_idx, this->b_idx, this->nidx, this->nidx, this->results); + + size_t n_results = nidx * nidx; + for (std::size_t i=0; iref_results[i], this->results[i], abs_err); + } + +} + + +TYPED_TEST(CoordinatesIdx, DistanceArrayOrthoIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // ref + distopia::CalcDistanceArrayOrtho(this->a_coords_contig, this->b_coords_contig, this->nidx, this->nidx, this->box, this->ref_results); + + // test + distopia::CalcDistanceArrayOrthoIdx(this->coords, this->a_idx, this->b_idx, this->nidx, this->nidx, this->box, this->results); + + size_t n_results = nidx * nidx; + for (std::size_t i=0; iref_results[i], this->results[i], abs_err); + } + +} + + +TYPED_TEST(CoordinatesIdx, DistanceArrayTriclinicIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // ref + distopia::CalcDistanceArrayTriclinic(this->a_coords_contig, this->b_coords_contig, this->nidx, this->nidx, this->triclinic_box, this->ref_results); + + // test + distopia::CalcDistanceArrayTriclinicIdx(this->coords, this->a_idx, this->b_idx, this->nidx, this->nidx, this->triclinic_box, this->results); + + size_t n_results = nidx * nidx; + for (std::size_t i=0; iref_results[i], this->results[i], abs_err); + } + } \ No newline at end of file From 8f9aca66428c951051b60073320822e335cb8d6c Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Thu, 15 Aug 2024 09:24:41 +0100 Subject: [PATCH 18/19] function stubs for SDAidx --- libdistopia/include/distopia.h | 3 ++ libdistopia/src/distopia.cpp | 65 ++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/libdistopia/include/distopia.h b/libdistopia/include/distopia.h index f640819f..0f66dac0 100644 --- a/libdistopia/include/distopia.h +++ b/libdistopia/include/distopia.h @@ -43,6 +43,9 @@ namespace distopia { template void CalcDistanceArrayNoBoxIdx(const T *coords, const int *a_idx, const int *b_idx, int na, int nb, T *out); template void CalcDistanceArrayOrthoIdx(const T *coords, const int *a_idx, const int *b_idx, int na, int nb, const T *box, T *out); template void CalcDistanceArrayTriclinicIdx(const T *coords, const int *a_idx, const int *b_idx, int na, int nb, const T *box, T *out); + template void CalcSelfDistanceArrayNoBoxIdx(const T *coords, const int *idx, int n, T *out); + template void CalcSelfDistanceArrayOrthoIdx(const T *coords, const int *idx, int n, const T *box, T *out); + template void CalcSelfDistanceArrayTriclinicIdx(const T *coords, const int *idx, int n, const T *box, T *out); int GetNFloatLanes(); int GetNDoubleLanes(); std::vector DistopiaSupportedAndGeneratedTargets(); diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index 260767d9..7826ece8 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -964,6 +964,11 @@ namespace distopia { } } + template + void CalcSelfDistanceArrayIdx(const T *coords, const int *idx, int n, T *out, const B &box) { + + } + void CalcBondsNoBoxDouble(const double *a, const double *b, int n, double *out) { hn::ScalableTag d; const NoBox vbox(d); @@ -1291,6 +1296,42 @@ namespace distopia { CalcDistanceArrayIdx(coords, a_idx, b_idx, na, nb, out, vbox); } + void CalcSelfDistanceArrayNoBoxIdxSingle(const float *coords, const int *idx, int n, float *out) { + hn::ScalableTag d; + const NoBox vbox(d); + + CalcSelfDistanceArrayIdx(coords, idx, n, out, vbox); + } + void CalcSelfDistanceArrayNoBoxIdxDouble(const double *coords, const int *idx, int n, double *out) { + hn::ScalableTag d; + const NoBox vbox(d); + + CalcSelfDistanceArrayIdx(coords, idx, n, out, vbox); + } + void CalcSelfDistanceArrayOrthoIdxSingle(const float *coords, const int *idx, int n, const float *box, float *out) { + hn::ScalableTag d; + const OrthogonalBox vbox(d, box); + + CalcSelfDistanceArrayIdx(coords, idx, n, out, vbox); + } + void CalcSelfDistanceArrayOrthoIdxDouble(const double *coords, const int *idx, int n, const double *box, double *out) { + hn::ScalableTag d; + const OrthogonalBox vbox(d, box); + + CalcSelfDistanceArrayIdx(coords, idx, n, out, vbox); + } + void CalcSelfDistanceArrayTriclinicIdxSingle(const float *coords, const int *idx, int n, const float *box, float *out) { + hn::ScalableTag d; + const TriclinicBox vbox(d, box); + + CalcSelfDistanceArrayIdx(coords, idx, n, out, vbox); + } + void CalcSelfDistanceArrayTriclinicIdxDouble(const double *coords, const int *idx, int n, const double *box, double *out) { + hn::ScalableTag d; + const TriclinicBox vbox(d, box); + + CalcSelfDistanceArrayIdx(coords, idx, n, out, vbox); + } int GetNFloatLanes() { hn::ScalableTag d; @@ -1364,6 +1405,12 @@ namespace distopia { HWY_EXPORT(CalcDistanceArrayOrthoIdxDouble); HWY_EXPORT(CalcDistanceArrayTriclinicIdxSingle); HWY_EXPORT(CalcDistanceArrayTriclinicIdxDouble); + HWY_EXPORT(CalcSelfDistanceArrayNoBoxIdxSingle); + HWY_EXPORT(CalcSelfDistanceArrayNoBoxIdxDouble); + HWY_EXPORT(CalcSelfDistanceArrayOrthoIdxSingle); + HWY_EXPORT(CalcSelfDistanceArrayOrthoIdxDouble); + HWY_EXPORT(CalcSelfDistanceArrayTriclinicIdxSingle); + HWY_EXPORT(CalcSelfDistanceArrayTriclinicIdxDouble); HWY_EXPORT(GetNFloatLanes); HWY_EXPORT(GetNDoubleLanes); @@ -1609,6 +1656,24 @@ namespace distopia { } return HWY_DYNAMIC_DISPATCH(CalcDistanceArrayTriclinicIdxDouble)(coords, a_idx, b_idx, na, nb, box, out); } + HWY_DLLEXPORT template <> void CalcSelfDistanceArrayNoBoxIdx(const float *coords, const int *idx, int n, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcSelfDistanceArrayNoBoxIdxSingle)(coords, idx, n, out); + } + HWY_DLLEXPORT template <> void CalcSelfDistanceArrayNoBoxIdx(const double *coords, const int *idx, int n, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcSelfDistanceArrayNoBoxIdxDouble)(coords, idx, n, out); + } + HWY_DLLEXPORT template <> void CalcSelfDistanceArrayOrthoIdx(const float *coords, const int *idx, int n, const float *box, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcSelfDistanceArrayOrthoIdxSingle)(coords, idx, n, box, out); + } + HWY_DLLEXPORT template <> void CalcSelfDistanceArrayOrthoIdx(const double *coords, const int *idx, int n, const double *box, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcSelfDistanceArrayOrthoIdxDouble)(coords, idx, n, box, out); + } + HWY_DLLEXPORT template <> void CalcSelfDistanceArrayTriclinicIdx(const float *coords, const int *idx, int n, const float *box, float *out) { + return HWY_DYNAMIC_DISPATCH(CalcSelfDistanceArrayTriclinicIdxSingle)(coords, idx, n, box, out); + } + HWY_DLLEXPORT template <> void CalcSelfDistanceArrayTriclinicIdx(const double *coords, const int *idx, int n, const double *box, double *out) { + return HWY_DYNAMIC_DISPATCH(CalcSelfDistanceArrayTriclinicIdxDouble)(coords, idx, n, box, out); + } std::vector DistopiaSupportedAndGeneratedTargets() { std::vector targets = hwy::SupportedAndGeneratedTargets(); From 8b07ef426b1c2167ded1c5ce4808bb8b9fe7644a Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Thu, 15 Aug 2024 12:19:16 +0100 Subject: [PATCH 19/19] SDAidx implementation and tests --- libdistopia/src/distopia.cpp | 57 +++++++++++++++++++++++++++ libdistopia/test/test_mda_match.cpp | 60 +++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/libdistopia/src/distopia.cpp b/libdistopia/src/distopia.cpp index 7826ece8..b1187fa0 100644 --- a/libdistopia/src/distopia.cpp +++ b/libdistopia/src/distopia.cpp @@ -966,7 +966,64 @@ namespace distopia { template void CalcSelfDistanceArrayIdx(const T *coords, const int *idx, int n, T *out, const B &box) { + hn::ScalableTag d; + + int tmpidx[HWY_MAX_LANES_D(hn::ScalableTag)]; + T tmpout[HWY_MAX_LANES_D(hn::ScalableTag)]; + T stubout[HWY_MAX_LANES_D(hn::ScalableTag)]; + T *dst; + + int nlanes = hn::Lanes(d); + // if n undersized, copy to new array + if (HWY_UNLIKELY(n < nlanes)) { + memset(tmpidx, 0, sizeof(int) * nlanes); + memcpy(tmpidx, idx, sizeof(int) * n); + idx = tmpidx; + dst = tmpout; + } else { + dst = out; + } + + auto b_x = hn::Undefined(d); + auto b_y = hn::Undefined(d); + auto b_z = hn::Undefined(d); + + size_t dstptr = 0; // start of "row" in output + for (size_t i=0; iref_results[i], this->results[i], abs_err); } +} + + +TYPED_TEST(CoordinatesIdx, SelfDistanceArrayNoBoxIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // ref + distopia::CalcSelfDistanceArrayNoBox(this->a_coords_contig, this->nidx, this->ref_results); + + // test + distopia::CalcSelfDistanceArrayNoBoxIdx(this->coords, this->a_idx, this->nidx, this->results); + + + size_t n_results = nidx * (nidx-1) / 2; + for (std::size_t i=0; iref_results[i], this->results[i], abs_err); + } +} + + +TYPED_TEST(CoordinatesIdx, SelfDistanceArrayOrthoIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // ref + distopia::CalcSelfDistanceArrayOrtho(this->a_coords_contig, this->nidx, this->box, this->ref_results); + + // test + distopia::CalcSelfDistanceArrayOrthoIdx(this->coords, this->a_idx, this->nidx, this->box, this->results); + + + size_t n_results = nidx * (nidx-1) / 2; + for (std::size_t i=0; iref_results[i], this->results[i], abs_err); + } +} + + +TYPED_TEST(CoordinatesIdx, SelfDistanceArrayTriclinicIdx) { + int ncoords = 250; + int nidx = 100; + + this->SetUp(ncoords, nidx, BOXSIZE, 3 * BOXSIZE); + + // ref + distopia::CalcSelfDistanceArrayTriclinic(this->a_coords_contig, this->nidx, this->triclinic_box, this->ref_results); + + // test + distopia::CalcSelfDistanceArrayTriclinicIdx(this->coords, this->a_idx, this->nidx, this->triclinic_box, this->results); + + + size_t n_results = nidx * (nidx-1) / 2; + for (std::size_t i=0; iref_results[i], this->results[i], abs_err); + } } \ No newline at end of file