From 73331671414eb7f537db1e535a4291a2983391d8 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Thu, 20 Jul 2023 21:22:48 -0700 Subject: [PATCH 1/5] test deserialize sketches from Java --- .github/workflows/serde_compat.yml | 27 +++++++ kll/test/CMakeLists.txt | 7 ++ .../kll_sketch_deserialize_from_java_test.cpp | 76 +++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 .github/workflows/serde_compat.yml create mode 100644 kll/test/kll_sketch_deserialize_from_java_test.cpp diff --git a/.github/workflows/serde_compat.yml b/.github/workflows/serde_compat.yml new file mode 100644 index 00000000..b64e3b6a --- /dev/null +++ b/.github/workflows/serde_compat.yml @@ -0,0 +1,27 @@ +name: Java SerDe Compatibility Test + +on: + push: + branches: + - master + +jobs: + build: + name: SerDe Test + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Checkout Java + uses: actions/checkout@v3 + with: + repository: apache/datasketches-java + path: java + - name: Run Java + run: cd java && mvn test + - name: Run cmake + run: cd build && cmake .. -DSERDE_COMPAT=true + - name: Build C++ unit tests + run: cmake --build build --config Release + - name: Run C++ tests + run: cmake --build build --config Release --target test diff --git a/kll/test/CMakeLists.txt b/kll/test/CMakeLists.txt index a778c31c..17018531 100644 --- a/kll/test/CMakeLists.txt +++ b/kll/test/CMakeLists.txt @@ -42,3 +42,10 @@ target_sources(kll_test kll_sketch_validation.cpp kolmogorov_smirnov_test.cpp ) + +if (SERDE_COMPAT) +target_sources(kll_test + PRIVATE + kll_sketch_deserialize_from_java_test.cpp +) +endif() diff --git a/kll/test/kll_sketch_deserialize_from_java_test.cpp b/kll/test/kll_sketch_deserialize_from_java_test.cpp new file mode 100644 index 00000000..37703424 --- /dev/null +++ b/kll/test/kll_sketch_deserialize_from_java_test.cpp @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; + +TEST_CASE("kll float", "[serde_compat]") { + unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (unsigned n: n_arr) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open(testBinaryInputPath + "kll_float_n" + std::to_string(n) + ".sk", std::ios::binary); + auto sketch = kll_sketch::deserialize(is); + REQUIRE(sketch.is_empty() == (n == 0)); + REQUIRE(sketch.is_estimation_mode() == (n > kll_constants::DEFAULT_K)); + REQUIRE(sketch.get_n() == n); + if (n > 0) { + REQUIRE(sketch.get_min_item() == 0.0); + REQUIRE(sketch.get_max_item() == static_cast(n - 1)); + uint64_t weight = 0; + for (auto pair: sketch) { + REQUIRE(pair.first >= sketch.get_min_item()); + REQUIRE(pair.first <= sketch.get_max_item()); + weight += pair.second; + } + REQUIRE(weight == sketch.get_n()); + } + } +} + +TEST_CASE("kll double", "[serde_compat]") { + unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (unsigned n: n_arr) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open(testBinaryInputPath + "kll_double_n" + std::to_string(n) + ".sk", std::ios::binary); + auto sketch = kll_sketch::deserialize(is); + REQUIRE(sketch.is_empty() == (n == 0)); + REQUIRE(sketch.is_estimation_mode() == (n > kll_constants::DEFAULT_K)); + REQUIRE(sketch.get_n() == n); + if (n > 0) { + REQUIRE(sketch.get_min_item() == 0.0); + REQUIRE(sketch.get_max_item() == static_cast(n - 1)); + uint64_t weight = 0; + for (auto pair: sketch) { + REQUIRE(pair.first >= sketch.get_min_item()); + REQUIRE(pair.first <= sketch.get_max_item()); + weight += pair.second; + } + REQUIRE(weight == sketch.get_n()); + } + } +} + +} /* namespace datasketches */ From 997581999ff3f2db22cba3f5d8e05628b4389850 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Fri, 21 Jul 2023 15:11:53 -0700 Subject: [PATCH 2/5] overrige excluded generate group --- .github/workflows/serde_compat.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/serde_compat.yml b/.github/workflows/serde_compat.yml index b64e3b6a..e5f964fb 100644 --- a/.github/workflows/serde_compat.yml +++ b/.github/workflows/serde_compat.yml @@ -18,7 +18,7 @@ jobs: repository: apache/datasketches-java path: java - name: Run Java - run: cd java && mvn test + run: cd java && mvn test -Dtestng.excludedgroups= - name: Run cmake run: cd build && cmake .. -DSERDE_COMPAT=true - name: Build C++ unit tests From c8f289f3ababf6bc0dd136fa8dd02a9b04986b9d Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Fri, 21 Jul 2023 17:27:30 -0700 Subject: [PATCH 3/5] deserialize CPC sketches from Java --- cpc/test/CMakeLists.txt | 19 ++++++--- .../cpc_sketch_deserialize_from_java_test.cpp | 40 +++++++++++++++++++ 2 files changed, 53 insertions(+), 6 deletions(-) create mode 100644 cpc/test/cpc_sketch_deserialize_from_java_test.cpp diff --git a/cpc/test/CMakeLists.txt b/cpc/test/CMakeLists.txt index eff78b1a..2d3af9bb 100644 --- a/cpc/test/CMakeLists.txt +++ b/cpc/test/CMakeLists.txt @@ -23,12 +23,12 @@ set_target_properties(cpc_test PROPERTIES CXX_STANDARD_REQUIRED YES ) -#file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" CPC_TEST_BINARY_PATH) -#string(APPEND CPC_TEST_BINARY_PATH "/") -#target_compile_definitions(cpc_test -# PRIVATE -# TEST_BINARY_INPUT_PATH="${CPC_TEST_BINARY_PATH}" -#) +file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" CPC_TEST_BINARY_PATH) +string(APPEND CPC_TEST_BINARY_PATH "/") +target_compile_definitions(cpc_test + PRIVATE + TEST_BINARY_INPUT_PATH="${CPC_TEST_BINARY_PATH}" +) add_test( NAME cpc_test @@ -42,3 +42,10 @@ target_sources(cpc_test compression_test.cpp cpc_sketch_allocation_test.cpp ) + +if (SERDE_COMPAT) +target_sources(cpc_test + PRIVATE + cpc_sketch_deserialize_from_java_test.cpp +) +endif() diff --git a/cpc/test/cpc_sketch_deserialize_from_java_test.cpp b/cpc/test/cpc_sketch_deserialize_from_java_test.cpp new file mode 100644 index 00000000..938efae9 --- /dev/null +++ b/cpc/test/cpc_sketch_deserialize_from_java_test.cpp @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; + +TEST_CASE("cpc sketch", "[serde_compat]") { + unsigned n_arr[] = {0, 100, 200, 2000, 20000}; + for (unsigned n: n_arr) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open(testBinaryInputPath + "cpc_n" + std::to_string(n) + ".sk", std::ios::binary); + auto sketch = cpc_sketch::deserialize(is); + REQUIRE(sketch.is_empty() == (n == 0)); + REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02)); + } +} + +} /* namespace datasketches */ From f825098ccdeb72b4d92711e73c4eaa603d1d8676 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Tue, 25 Jul 2023 11:14:12 -0700 Subject: [PATCH 4/5] added manual trigger --- .github/workflows/serde_compat.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/serde_compat.yml b/.github/workflows/serde_compat.yml index e5f964fb..d3f3bd14 100644 --- a/.github/workflows/serde_compat.yml +++ b/.github/workflows/serde_compat.yml @@ -4,6 +4,7 @@ on: push: branches: - master + workflow_dispatch: jobs: build: From f8774b00274596ee415eab77f350ba3280d5d6b0 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Tue, 25 Jul 2023 11:18:49 -0700 Subject: [PATCH 5/5] added a comment --- cpc/test/cpc_sketch_deserialize_from_java_test.cpp | 2 ++ kll/test/kll_sketch_deserialize_from_java_test.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/cpc/test/cpc_sketch_deserialize_from_java_test.cpp b/cpc/test/cpc_sketch_deserialize_from_java_test.cpp index 938efae9..7fb0e90b 100644 --- a/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +++ b/cpc/test/cpc_sketch_deserialize_from_java_test.cpp @@ -23,6 +23,8 @@ namespace datasketches { +// assume the binary sketches for this test have been generated by datasketches-java code +// in the subdirectory called "java" in the root directory of this project static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; TEST_CASE("cpc sketch", "[serde_compat]") { diff --git a/kll/test/kll_sketch_deserialize_from_java_test.cpp b/kll/test/kll_sketch_deserialize_from_java_test.cpp index 37703424..2b25899a 100644 --- a/kll/test/kll_sketch_deserialize_from_java_test.cpp +++ b/kll/test/kll_sketch_deserialize_from_java_test.cpp @@ -23,6 +23,8 @@ namespace datasketches { +// assume the binary sketches for this test have been generated by datasketches-java code +// in the subdirectory called "java" in the root directory of this project static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; TEST_CASE("kll float", "[serde_compat]") {