diff --git a/.github/workflows/serde_compat.yml b/.github/workflows/serde_compat.yml index d3f3bd14..f6ae2109 100644 --- a/.github/workflows/serde_compat.yml +++ b/.github/workflows/serde_compat.yml @@ -19,7 +19,9 @@ jobs: repository: apache/datasketches-java path: java - name: Run Java - run: cd java && mvn test -Dtestng.excludedgroups= + run: cd java && mvn test -P generate-java-files + - name: Copy files + run: cp java/target/java_generated_files/*.sk java - name: Run cmake run: cd build && cmake .. -DSERDE_COMPAT=true - name: Build C++ unit tests diff --git a/cpc/test/cpc_sketch_deserialize_from_java_test.cpp b/cpc/test/cpc_sketch_deserialize_from_java_test.cpp index 7fb0e90b..deaff5e1 100644 --- a/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +++ b/cpc/test/cpc_sketch_deserialize_from_java_test.cpp @@ -28,12 +28,12 @@ namespace datasketches { static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; TEST_CASE("cpc sketch", "[serde_compat]") { - unsigned n_arr[] = {0, 100, 200, 2000, 20000}; - for (unsigned n: n_arr) { + const unsigned n_arr[] = {0, 100, 200, 2000, 20000}; + for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "cpc_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = cpc_sketch::deserialize(is); + is.open(testBinaryInputPath + "cpc_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = cpc_sketch::deserialize(is); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02)); } diff --git a/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp b/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp index 399017d0..221f6a62 100644 --- a/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +++ b/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp @@ -28,12 +28,12 @@ namespace datasketches { static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; TEST_CASE("frequent longs", "[serde_compat]") { - unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "frequent_long_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = frequent_items_sketch::deserialize(is); + is.open(testBinaryInputPath + "frequent_long_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = frequent_items_sketch::deserialize(is); REQUIRE(sketch.is_empty() == (n == 0)); if (n > 10) { REQUIRE(sketch.get_maximum_error() > 0); @@ -45,12 +45,12 @@ TEST_CASE("frequent longs", "[serde_compat]") { } TEST_CASE("frequent strings", "[serde_compat]") { - unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "frequent_string_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = frequent_items_sketch::deserialize(is); + is.open(testBinaryInputPath + "frequent_string_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = frequent_items_sketch::deserialize(is); REQUIRE(sketch.is_empty() == (n == 0)); if (n > 10) { REQUIRE(sketch.get_maximum_error() > 0); @@ -64,8 +64,8 @@ TEST_CASE("frequent strings", "[serde_compat]") { TEST_CASE("frequent strings ascii", "[serde_compat]") { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "frequent_string_ascii.sk", std::ios::binary); - auto sketch = frequent_items_sketch::deserialize(is); + is.open(testBinaryInputPath + "frequent_string_ascii_java.sk", std::ios::binary); + const auto sketch = frequent_items_sketch::deserialize(is); REQUIRE_FALSE(sketch.is_empty()); REQUIRE(sketch.get_maximum_error() == 0); REQUIRE(sketch.get_total_weight() == 10); @@ -78,8 +78,8 @@ TEST_CASE("frequent strings ascii", "[serde_compat]") { TEST_CASE("frequent strings utf8", "[serde_compat]") { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "frequent_string_utf8.sk", std::ios::binary); - auto sketch = frequent_items_sketch::deserialize(is); + is.open(testBinaryInputPath + "frequent_string_utf8_java.sk", std::ios::binary); + const auto sketch = frequent_items_sketch::deserialize(is); REQUIRE_FALSE(sketch.is_empty()); REQUIRE(sketch.get_maximum_error() == 0); REQUIRE(sketch.get_total_weight() == 28); diff --git a/fi/test/frequent_items_sketch_test.cpp b/fi/test/frequent_items_sketch_test.cpp index e1aecd5a..2d44575f 100644 --- a/fi/test/frequent_items_sketch_test.cpp +++ b/fi/test/frequent_items_sketch_test.cpp @@ -70,6 +70,7 @@ TEST_CASE("frequent items: several items, no resize, no purge", "[frequent_items REQUIRE(sketch.get_estimate("b") == 3); REQUIRE(sketch.get_estimate("c") == 2); REQUIRE(sketch.get_estimate("d") == 1); + REQUIRE(sketch.get_maximum_error() == 0); } TEST_CASE("frequent items: several items, with resize, no purge", "[frequent_items_sketch]") { @@ -96,6 +97,7 @@ TEST_CASE("frequent items: several items, with resize, no purge", "[frequent_ite REQUIRE(sketch.get_estimate("b") == 3); REQUIRE(sketch.get_estimate("c") == 2); REQUIRE(sketch.get_estimate("d") == 1); + REQUIRE(sketch.get_maximum_error() == 0); } TEST_CASE("frequent items: estimation mode", "[frequent_items_sketch]") { @@ -149,6 +151,7 @@ TEST_CASE("frequent items: merge exact mode", "[frequent_items_sketch]") { REQUIRE(sketch1.get_estimate(2) == 3); REQUIRE(sketch1.get_estimate(3) == 2); REQUIRE(sketch1.get_estimate(4) == 1); + REQUIRE(sketch1.get_maximum_error() == 0); } TEST_CASE("frequent items: merge estimation mode", "[frequent_items_sketch]") { diff --git a/hll/test/ToFromByteArrayTest.cpp b/hll/test/ToFromByteArrayTest.cpp index 4bf18e7e..9c3bb46d 100644 --- a/hll/test/ToFromByteArrayTest.cpp +++ b/hll/test/ToFromByteArrayTest.cpp @@ -53,74 +53,6 @@ TEST_CASE("hll to/from byte array: double serialize", "[hll_byte_array]") { } } -TEST_CASE("hll to/from byte array: deserialize from java", "[hll_byte_array]") { - std::string inputPath; -#ifdef TEST_BINARY_INPUT_PATH - inputPath = TEST_BINARY_INPUT_PATH; -#else - inputPath = "test/"; -#endif - - std::ifstream ifs; - ifs.open(inputPath + "list_from_java.sk", std::ios::binary); - hll_sketch sk = hll_sketch::deserialize(ifs); - REQUIRE(sk.is_empty() == false); - REQUIRE(sk.get_lg_config_k() == 8); - REQUIRE(sk.get_lower_bound(1) == 7.0); - REQUIRE(sk.get_estimate() == Approx(7.0).margin(1e-6)); - REQUIRE(sk.get_upper_bound(1) == Approx(7.000350).margin(1e-5)); - ifs.close(); - - ifs.open(inputPath + "compact_set_from_java.sk", std::ios::binary); - sk = hll_sketch::deserialize(ifs); - REQUIRE(sk.is_empty() == false); - REQUIRE(sk.get_lg_config_k() == 8); - REQUIRE(sk.get_lower_bound(1) == 24.0); - REQUIRE(sk.get_estimate() == Approx(24.0).margin(1e-5)); - REQUIRE(sk.get_upper_bound(1) == Approx(24.001200).margin(1e-5)); - ifs.close(); - - ifs.open(inputPath + "updatable_set_from_java.sk", std::ios::binary); - sk = hll_sketch::deserialize(ifs); - REQUIRE(sk.is_empty() == false); - REQUIRE(sk.get_lg_config_k() == 8); - REQUIRE(sk.get_lower_bound(1) == 24.0); - REQUIRE(sk.get_estimate() == Approx(24.0).margin(1e-5)); - REQUIRE(sk.get_upper_bound(1) == Approx(24.001200).margin(1e-5)); - ifs.close(); - - - ifs.open(inputPath + "array6_from_java.sk", std::ios::binary); - sk = hll_sketch::deserialize(ifs); - REQUIRE(sk.is_empty() == false); - REQUIRE(sk.get_lg_config_k() == 8); - REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5)); - REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5)); - REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5)); - ifs.close(); - - - ifs.open(inputPath + "compact_array4_from_java.sk", std::ios::binary); - sk = hll_sketch::deserialize(ifs); - REQUIRE(sk.is_empty() == false); - REQUIRE(sk.get_lg_config_k() == 8); - REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5)); - REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5)); - REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5)); - - ifs.close(); - - - ifs.open(inputPath + "updatable_array4_from_java.sk", std::ios::binary); - sk = hll_sketch::deserialize(ifs); - REQUIRE(sk.is_empty() == false); - REQUIRE(sk.get_lg_config_k() == 8); - REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5)); - REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5)); - REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5)); - ifs.close(); -} - static void checkSketchEquality(hll_sketch& sk1, hll_sketch& sk2) { REQUIRE(sk1.get_lg_config_k() == sk2.get_lg_config_k()); REQUIRE(sk1.get_lower_bound(1) == sk2.get_lower_bound(1)); diff --git a/hll/test/array6_from_java.sk b/hll/test/array6_from_java.sk deleted file mode 100644 index 2787623b..00000000 Binary files a/hll/test/array6_from_java.sk and /dev/null differ diff --git a/hll/test/compact_array4_from_java.sk b/hll/test/compact_array4_from_java.sk deleted file mode 100644 index e760416b..00000000 Binary files a/hll/test/compact_array4_from_java.sk and /dev/null differ diff --git a/hll/test/compact_set_from_java.sk b/hll/test/compact_set_from_java.sk deleted file mode 100644 index db96e731..00000000 Binary files a/hll/test/compact_set_from_java.sk and /dev/null differ diff --git a/hll/test/hll_sketch_deserialize_from_java_test.cpp b/hll/test/hll_sketch_deserialize_from_java_test.cpp index 5139e0b8..828af544 100644 --- a/hll/test/hll_sketch_deserialize_from_java_test.cpp +++ b/hll/test/hll_sketch_deserialize_from_java_test.cpp @@ -28,12 +28,12 @@ namespace datasketches { static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; TEST_CASE("hll4 sketch", "[serde_compat]") { - unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000}; - for (unsigned n: n_arr) { + const unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "hll4_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = hll_sketch::deserialize(is); + is.open(testBinaryInputPath + "hll4_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = hll_sketch::deserialize(is); REQUIRE(sketch.get_lg_config_k() == 12); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02)); @@ -41,12 +41,12 @@ TEST_CASE("hll4 sketch", "[serde_compat]") { } TEST_CASE("hll6 sketch", "[serde_compat]") { - unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000}; - for (unsigned n: n_arr) { + const unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "hll6_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = hll_sketch::deserialize(is); + is.open(testBinaryInputPath + "hll6_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = hll_sketch::deserialize(is); REQUIRE(sketch.get_lg_config_k() == 12); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02)); @@ -54,12 +54,12 @@ TEST_CASE("hll6 sketch", "[serde_compat]") { } TEST_CASE("hll8 sketch", "[serde_compat]") { - unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000}; - for (unsigned n: n_arr) { + const unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "hll8_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = hll_sketch::deserialize(is); + is.open(testBinaryInputPath + "hll8_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = hll_sketch::deserialize(is); REQUIRE(sketch.get_lg_config_k() == 12); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02)); diff --git a/hll/test/list_from_java.sk b/hll/test/list_from_java.sk deleted file mode 100644 index 53e88925..00000000 Binary files a/hll/test/list_from_java.sk and /dev/null differ diff --git a/hll/test/updatable_array4_from_java.sk b/hll/test/updatable_array4_from_java.sk deleted file mode 100644 index 413ad782..00000000 Binary files a/hll/test/updatable_array4_from_java.sk and /dev/null differ diff --git a/hll/test/updatable_set_from_java.sk b/hll/test/updatable_set_from_java.sk deleted file mode 100644 index a1459e81..00000000 Binary files a/hll/test/updatable_set_from_java.sk and /dev/null differ diff --git a/kll/test/kll_sketch_deserialize_from_java_test.cpp b/kll/test/kll_sketch_deserialize_from_java_test.cpp index 5c73fb4b..795486ae 100644 --- a/kll/test/kll_sketch_deserialize_from_java_test.cpp +++ b/kll/test/kll_sketch_deserialize_from_java_test.cpp @@ -28,12 +28,12 @@ namespace datasketches { static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; TEST_CASE("kll float", "[serde_compat]") { - unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "kll_float_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = kll_sketch::deserialize(is); + is.open(testBinaryInputPath + "kll_float_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = kll_sketch::deserialize(is); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.is_estimation_mode() == (n > kll_constants::DEFAULT_K)); REQUIRE(sketch.get_n() == n); @@ -52,12 +52,12 @@ TEST_CASE("kll float", "[serde_compat]") { } TEST_CASE("kll double", "[serde_compat]") { - unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "kll_double_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = kll_sketch::deserialize(is); + is.open(testBinaryInputPath + "kll_double_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = kll_sketch::deserialize(is); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.is_estimation_mode() == (n > kll_constants::DEFAULT_K)); REQUIRE(sketch.get_n() == n); @@ -75,4 +75,29 @@ TEST_CASE("kll double", "[serde_compat]") { } } +// numbers are padded with leading spaces so that natural order works +TEST_CASE("kll string", "[serde_compat]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open(testBinaryInputPath + "kll_string_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = kll_sketch::deserialize(is); + REQUIRE(sketch.is_empty() == (n == 0)); + REQUIRE(sketch.is_estimation_mode() == (n > kll_constants::DEFAULT_K)); + REQUIRE(sketch.get_n() == n); + if (n > 0) { + REQUIRE(std::stoul(sketch.get_min_item()) == 1); + REQUIRE(std::stoul(sketch.get_max_item()) == n); + uint64_t weight = 0; + for (const auto pair: sketch) { + REQUIRE(pair.first >= sketch.get_min_item()); + REQUIRE(pair.first <= sketch.get_max_item()); + weight += pair.second; + } + REQUIRE(weight == sketch.get_n()); + } + } +} + } /* namespace datasketches */ diff --git a/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp b/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp index 81b434b8..1123d8fd 100644 --- a/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +++ b/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp @@ -28,12 +28,12 @@ namespace datasketches { static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; TEST_CASE("quantiles double", "[serde_compat]") { - unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "quantiles_double_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = quantiles_sketch::deserialize(is); + is.open(testBinaryInputPath + "quantiles_double_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = quantiles_sketch::deserialize(is); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.is_estimation_mode() == (n > quantiles_constants::DEFAULT_K)); REQUIRE(sketch.get_n() == n); @@ -58,12 +58,12 @@ struct string_as_number_less { }; TEST_CASE("quantiles string", "[serde_compat]") { - unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "quantiles_string_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = quantiles_sketch::deserialize(is); + is.open(testBinaryInputPath + "quantiles_string_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = quantiles_sketch::deserialize(is); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.is_estimation_mode() == (n > quantiles_constants::DEFAULT_K)); REQUIRE(sketch.get_n() == n); diff --git a/req/test/req_float_empty_from_java.sk b/req/test/req_float_empty_from_java.sk deleted file mode 100644 index 9b24bcc9..00000000 Binary files a/req/test/req_float_empty_from_java.sk and /dev/null differ diff --git a/req/test/req_float_estimation_from_java.sk b/req/test/req_float_estimation_from_java.sk deleted file mode 100644 index d063b418..00000000 Binary files a/req/test/req_float_estimation_from_java.sk and /dev/null differ diff --git a/req/test/req_float_exact_from_java.sk b/req/test/req_float_exact_from_java.sk deleted file mode 100644 index d144ac8d..00000000 Binary files a/req/test/req_float_exact_from_java.sk and /dev/null differ diff --git a/req/test/req_float_raw_items_from_java.sk b/req/test/req_float_raw_items_from_java.sk deleted file mode 100644 index 0bfe5a90..00000000 Binary files a/req/test/req_float_raw_items_from_java.sk and /dev/null differ diff --git a/req/test/req_float_single_item_from_java.sk b/req/test/req_float_single_item_from_java.sk deleted file mode 100644 index 774db9f1..00000000 Binary files a/req/test/req_float_single_item_from_java.sk and /dev/null differ diff --git a/req/test/req_sketch_deserialize_from_java_test.cpp b/req/test/req_sketch_deserialize_from_java_test.cpp index 4e83c3bd..ef3c1123 100644 --- a/req/test/req_sketch_deserialize_from_java_test.cpp +++ b/req/test/req_sketch_deserialize_from_java_test.cpp @@ -28,12 +28,12 @@ namespace datasketches { static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; TEST_CASE("req float", "[serde_compat]") { - unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "req_float_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = req_sketch::deserialize(is); + is.open(testBinaryInputPath + "req_float_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = req_sketch::deserialize(is); REQUIRE(sketch.is_HRA()); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.is_estimation_mode() == (n > 10)); diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp index 9af96bea..3aa411b5 100755 --- a/req/test/req_sketch_test.cpp +++ b/req/test/req_sketch_test.cpp @@ -356,76 +356,6 @@ TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[re REQUIRE(sketch2.get_max_item() == sketch.get_max_item()); } -TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(input_path + "req_float_empty_from_java.sk", std::ios::binary); - auto sketch = req_sketch::deserialize(is); - REQUIRE(sketch.is_empty()); - REQUIRE_FALSE(sketch.is_estimation_mode()); - REQUIRE(sketch.get_n() == 0); - REQUIRE(sketch.get_num_retained() == 0); - REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error); - REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error); -} - -TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(input_path + "req_float_single_item_from_java.sk", std::ios::binary); - auto sketch = req_sketch::deserialize(is); - REQUIRE_FALSE(sketch.is_empty()); - REQUIRE_FALSE(sketch.is_estimation_mode()); - REQUIRE(sketch.get_n() == 1); - REQUIRE(sketch.get_num_retained() == 1); - REQUIRE(sketch.get_min_item() == 1); - REQUIRE(sketch.get_max_item() == 1); - REQUIRE(sketch.get_rank(1.0f, false) == 0); - REQUIRE(sketch.get_rank(1.0f) == 1); -} - -TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(input_path + "req_float_raw_items_from_java.sk", std::ios::binary); - auto sketch = req_sketch::deserialize(is); - REQUIRE_FALSE(sketch.is_empty()); - REQUIRE_FALSE(sketch.is_estimation_mode()); - REQUIRE(sketch.get_n() == 4); - REQUIRE(sketch.get_num_retained() == 4); - REQUIRE(sketch.get_min_item() == 0); - REQUIRE(sketch.get_max_item() == 3); - REQUIRE(sketch.get_rank(2.0f, false) == 0.5); -} - -TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(input_path + "req_float_exact_from_java.sk", std::ios::binary); - auto sketch = req_sketch::deserialize(is); - REQUIRE_FALSE(sketch.is_empty()); - REQUIRE_FALSE(sketch.is_estimation_mode()); - REQUIRE(sketch.get_n() == 100); - REQUIRE(sketch.get_num_retained() == 100); - REQUIRE(sketch.get_min_item() == 0); - REQUIRE(sketch.get_max_item() == 99); - REQUIRE(sketch.get_rank(50.0f, false) == 0.5); -} - -TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(input_path + "req_float_estimation_from_java.sk", std::ios::binary); - auto sketch = req_sketch::deserialize(is); - REQUIRE_FALSE(sketch.is_empty()); - REQUIRE(sketch.is_estimation_mode()); - REQUIRE(sketch.get_n() == 10000); - REQUIRE(sketch.get_num_retained() == 2942); - REQUIRE(sketch.get_min_item() == 0); - REQUIRE(sketch.get_max_item() == 9999); - REQUIRE(sketch.get_rank(5000.0f, false) == 0.5); -} - TEST_CASE("req sketch: merge into empty", "[req_sketch]") { req_sketch sketch1(40); diff --git a/sampling/test/CMakeLists.txt b/sampling/test/CMakeLists.txt index 51764eb4..f62a1442 100644 --- a/sampling/test/CMakeLists.txt +++ b/sampling/test/CMakeLists.txt @@ -46,5 +46,6 @@ if (SERDE_COMPAT) target_sources(sampling_test PRIVATE var_opt_sketch_deserialize_from_java_test.cpp + var_opt_union_deserialize_from_java_test.cpp ) endif() diff --git a/sampling/test/binaries_from_java.txt b/sampling/test/binaries_from_java.txt deleted file mode 100644 index eb3ea304..00000000 --- a/sampling/test/binaries_from_java.txt +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -Code snippets used to generate to generate the binary images from Java. -Heavy items have negative weights to allow a simple predicate to filter -heavy vs light sketch entires. - - -varopt_sketch_long_sampling.bin: -final VarOptItemsSketch sk = VarOptItemsSketch.newInstance(1024); -for (int i = 1; i <= 200; ++i) { - sk.update(Integer.toString(i), 1000.0 / i); -} -byte[] bytes = sk.toByteArray(new ArrayOfStringsSerDe()); - - -varopt_sketch_string_exact.bin: -final VarOptItemsSketch sk = VarOptItemsSketch.newInstance(1024); -for (long i = 0; i < 2000; ++i) { - sk.update(i, 1.0); -} -sk.update(-1L, 100000.0); -sk.update(-2L, 110000.0); -sk.update(-3L, 120000.0); -byte[] bytes = sk.toByteArray(new ArrayOfLongsSerDe()); - - -varopt_union_double_sampling.bin: -// parallels small samplign sketch test -final int kSmall = 16; -final int n1 = 32; -final int n2 = 64; -final int kMax = 128; - -// small k sketch, but sampling -VarOptItemsSketch sketch = VarOptItemsSketch.newInstance(kSmall); -for (int i = 0; i < n1; ++i) { - sketch.update(1.0 * i, 1.0); -} -sketch.update(-1.0, n1 * n1); // add a heavy item - -final VarOptItemsUnion union = VarOptItemsUnion.newInstance(kMax); -union.update(sketch); - -// another one, but different n to get a different per-item weight -sketch = VarOptItemsSketch.newInstance(kSmall); -for (int i = 0; i < n2; ++i) { - sketch.update(1.0 * i, 1.0); -} -union.update(sketch); -byte[] bytes = union.toByteArray(new ArrayOfDoublesSerDe()); diff --git a/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp b/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp index aecc12fe..e121eeeb 100644 --- a/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +++ b/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp @@ -27,16 +27,55 @@ namespace datasketches { // in the subdirectory called "java" in the root directory of this project static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; -TEST_CASE("var opt long", "[serde_compat]") { - unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; +TEST_CASE("var opt sketch long", "[serde_compat]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "varopt_long_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = var_opt_sketch::deserialize(is); + is.open(testBinaryInputPath + "varopt_sketch_long_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = var_opt_sketch::deserialize(is); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.get_num_samples() == (n > 10 ? 32 : n)); } } +TEST_CASE("var opt sketch: deserialize exact from java", "[serde_compat]") { + const double EPS = 1e-13; + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open(testBinaryInputPath + "varopt_sketch_string_exact_java.sk", std::ios::binary); + const auto sketch = var_opt_sketch::deserialize(is); + REQUIRE_FALSE(sketch.is_empty()); + REQUIRE(sketch.get_k() == 1024); + REQUIRE(sketch.get_n() == 200); + REQUIRE(sketch.get_num_samples() == 200); + const subset_summary ss = sketch.estimate_subset_sum([](std::string){ return true; }); + + double tgt_wt = 0.0; + for (int i = 1; i <= 200; ++i) { tgt_wt += 1000.0 / i; } + REQUIRE(ss.total_sketch_weight == Approx(tgt_wt).margin(EPS)); +} + + +TEST_CASE("var opt sketch: deserialize sampling from java", "[serde_compat]") { + const double EPS = 1e-13; + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open(testBinaryInputPath + "varopt_sketch_long_sampling_java.sk", std::ios::binary); + const auto sketch = var_opt_sketch::deserialize(is); + REQUIRE_FALSE(sketch.is_empty()); + REQUIRE(sketch.get_k() == 1024); + REQUIRE(sketch.get_n() == 2003); + REQUIRE(sketch.get_num_samples() == sketch.get_k()); + subset_summary ss = sketch.estimate_subset_sum([](int64_t){ return true; }); + REQUIRE(ss.estimate == Approx(332000.0).margin(EPS)); + REQUIRE(ss.total_sketch_weight == Approx(332000.0).margin(EPS)); + + ss = sketch.estimate_subset_sum([](int64_t x){ return x < 0; }); + REQUIRE(ss.estimate == 330000.0); // heavy item, weight is exact + + ss = sketch.estimate_subset_sum([](int64_t x){ return x >= 0; }); + REQUIRE(ss.estimate == Approx(2000.0).margin(EPS)); +} + } /* namespace datasketches */ diff --git a/sampling/test/var_opt_sketch_test.cpp b/sampling/test/var_opt_sketch_test.cpp index 29fcb978..71d16e91 100644 --- a/sampling/test/var_opt_sketch_test.cpp +++ b/sampling/test/var_opt_sketch_test.cpp @@ -489,41 +489,4 @@ TEST_CASE("varopt sketch: estimate subset sum", "[var_opt_sketch]") { REQUIRE(summary.estimate < total_weight); // exact mode, so know it must be strictly less } -TEST_CASE("varopt sketch: deserialize exact from java", "[var_opt_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "varopt_sketch_string_exact.sk", std::ios::binary); - var_opt_sketch sketch = var_opt_sketch::deserialize(is); - REQUIRE_FALSE(sketch.is_empty()); - REQUIRE(sketch.get_k() == 1024); - REQUIRE(sketch.get_n() == 200); - REQUIRE(sketch.get_num_samples() == 200); - subset_summary ss = sketch.estimate_subset_sum([](std::string){ return true; }); - - double tgt_wt = 0.0; - for (int i = 1; i <= 200; ++i) { tgt_wt += 1000.0 / i; } - REQUIRE(ss.total_sketch_weight == Approx(tgt_wt).margin(EPS)); -} - - -TEST_CASE("varopt sketch: deserialize sampling from java", "[var_opt_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "varopt_sketch_long_sampling.sk", std::ios::binary); - var_opt_sketch sketch = var_opt_sketch::deserialize(is); - REQUIRE_FALSE(sketch.is_empty()); - REQUIRE(sketch.get_k() == 1024); - REQUIRE(sketch.get_n() == 2003); - REQUIRE(sketch.get_num_samples() == sketch.get_k()); - subset_summary ss = sketch.estimate_subset_sum([](int64_t){ return true; }); - REQUIRE(ss.estimate == Approx(332000.0).margin(EPS)); - REQUIRE(ss.total_sketch_weight == Approx(332000.0).margin(EPS)); - - ss = sketch.estimate_subset_sum([](int64_t x){ return x < 0; }); - REQUIRE(ss.estimate == 330000.0); // heavy item, weight is exact - - ss = sketch.estimate_subset_sum([](int64_t x){ return x >= 0; }); - REQUIRE(ss.estimate == Approx(2000.0).margin(EPS)); -} - } diff --git a/sampling/test/var_opt_union_deserialize_from_java_test.cpp b/sampling/test/var_opt_union_deserialize_from_java_test.cpp new file mode 100644 index 00000000..915f230f --- /dev/null +++ b/sampling/test/var_opt_union_deserialize_from_java_test.cpp @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +// assume the binary sketches for this test have been generated by datasketches-java code +// in the subdirectory called "java" in the root directory of this project +static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; + +TEST_CASE("var opt union double", "[serde_compat]") { + const double EPS = 1e-13; + + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open(testBinaryInputPath + "varopt_union_double_sampling_java.sk", std::ios::binary); + auto u = var_opt_union::deserialize(is); + + // must reduce k in the process + const auto result = u.get_result(); + REQUIRE_FALSE(result.is_empty()); + REQUIRE(result.get_n() == 97); + + const double expected_wt = 96.0; // light items -- ignoring the heavy one + const subset_summary ss = result.estimate_subset_sum([](double x){return x >= 0;}); + REQUIRE(ss.estimate == Approx(expected_wt).margin(EPS)); + REQUIRE(ss.total_sketch_weight == Approx(expected_wt + 1024.0).margin(EPS)); + REQUIRE(result.get_k() < 128); +} + +} /* namespace datasketches */ diff --git a/sampling/test/var_opt_union_test.cpp b/sampling/test/var_opt_union_test.cpp index f478ff38..b17d8fa4 100644 --- a/sampling/test/var_opt_union_test.cpp +++ b/sampling/test/var_opt_union_test.cpp @@ -305,22 +305,4 @@ TEST_CASE("varopt union: serialize sampling", "[var_opt_union]") { compare_serialization_deserialization(u); } -TEST_CASE("varopt union: deserialize from java", "[var_opt_union]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "varopt_union_double_sampling.sk", std::ios::binary); - var_opt_union u = var_opt_union::deserialize(is); - - // must reduce k in the process, like in small_sampling_sketch() - var_opt_sketch result = u.get_result(); - REQUIRE_FALSE(result.is_empty()); - REQUIRE(result.get_n() == 97); - - double expected_wt = 96.0;// light items -- ignoring the heavy one - subset_summary ss = result.estimate_subset_sum([](double x){return x >= 0;}); - REQUIRE(ss.estimate == Approx(expected_wt).margin(EPS)); - REQUIRE(ss.total_sketch_weight == Approx(expected_wt + 1024.0).margin(EPS)); - REQUIRE(result.get_k() < 128); -} - } diff --git a/sampling/test/varopt_sketch_long_sampling.sk b/sampling/test/varopt_sketch_long_sampling.sk deleted file mode 100644 index ae8c750f..00000000 Binary files a/sampling/test/varopt_sketch_long_sampling.sk and /dev/null differ diff --git a/sampling/test/varopt_sketch_string_exact.sk b/sampling/test/varopt_sketch_string_exact.sk deleted file mode 100644 index 2da7e4ed..00000000 Binary files a/sampling/test/varopt_sketch_string_exact.sk and /dev/null differ diff --git a/sampling/test/varopt_union_double_sampling.sk b/sampling/test/varopt_union_double_sampling.sk deleted file mode 100644 index b3a229e1..00000000 Binary files a/sampling/test/varopt_union_double_sampling.sk and /dev/null differ diff --git a/theta/test/theta_compact_empty_from_java.sk b/theta/test/theta_compact_empty_from_java.sk deleted file mode 100644 index f6c647f8..00000000 Binary files a/theta/test/theta_compact_empty_from_java.sk and /dev/null differ diff --git a/theta/test/theta_compact_estimation_from_java.sk b/theta/test/theta_compact_estimation_from_java.sk deleted file mode 100644 index 7c6babf9..00000000 Binary files a/theta/test/theta_compact_estimation_from_java.sk and /dev/null differ diff --git a/theta/test/theta_compact_exact_from_java.sk b/theta/test/theta_compact_exact_from_java.sk deleted file mode 100644 index 30e4f773..00000000 Binary files a/theta/test/theta_compact_exact_from_java.sk and /dev/null differ diff --git a/theta/test/theta_compact_single_item_from_java.sk b/theta/test/theta_compact_single_item_from_java.sk deleted file mode 100644 index be5ee687..00000000 Binary files a/theta/test/theta_compact_single_item_from_java.sk and /dev/null differ diff --git a/theta/test/theta_sketch_deserialize_from_java_test.cpp b/theta/test/theta_sketch_deserialize_from_java_test.cpp index 04ac5a6b..f69467eb 100644 --- a/theta/test/theta_sketch_deserialize_from_java_test.cpp +++ b/theta/test/theta_sketch_deserialize_from_java_test.cpp @@ -28,16 +28,16 @@ namespace datasketches { static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; TEST_CASE("theta sketch", "[serde_compat]") { - unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; - for (unsigned n: n_arr) { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "theta_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = compact_theta_sketch::deserialize(is); + is.open(testBinaryInputPath + "theta_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = compact_theta_sketch::deserialize(is); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.is_estimation_mode() == (n > 1000)); REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); - for (auto hash: sketch) { + for (const auto hash: sketch) { REQUIRE(hash < sketch.get_theta64()); } REQUIRE(sketch.is_ordered()); @@ -48,8 +48,8 @@ TEST_CASE("theta sketch", "[serde_compat]") { TEST_CASE("theta sketch non-empty no entries", "[serde_compat]") { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "theta_non_empty_no_entries.sk", std::ios::binary); - auto sketch = compact_theta_sketch::deserialize(is); + is.open(testBinaryInputPath + "theta_non_empty_no_entries_java.sk", std::ios::binary); + const auto sketch = compact_theta_sketch::deserialize(is); REQUIRE_FALSE(sketch.is_empty()); REQUIRE(sketch.get_num_retained() == 0); } diff --git a/theta/test/theta_sketch_test.cpp b/theta/test/theta_sketch_test.cpp index 5ba2889e..488f67cf 100644 --- a/theta/test/theta_sketch_test.cpp +++ b/theta/test/theta_sketch_test.cpp @@ -167,20 +167,6 @@ TEST_CASE("theta sketch: estimation", "[theta_sketch]") { REQUIRE(compact_sketch.get_upper_bound(1) > n); } -TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary); - auto sketch = compact_theta_sketch::deserialize(is); - REQUIRE(sketch.is_empty()); - REQUIRE_FALSE(sketch.is_estimation_mode()); - REQUIRE(sketch.get_num_retained() == 0); - REQUIRE(sketch.get_theta() == 1.0); - REQUIRE(sketch.get_estimate() == 0.0); - REQUIRE(sketch.get_lower_bound(1) == 0.0); - REQUIRE(sketch.get_upper_bound(1) == 0.0); -} - TEST_CASE("theta sketch: deserialize compact v1 empty from java", "[theta_sketch]") { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); @@ -209,88 +195,6 @@ TEST_CASE("theta sketch: deserialize compact v2 empty from java", "[theta_sketch REQUIRE(sketch.get_upper_bound(1) == 0.0); } -TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary); - auto sketch = compact_theta_sketch::deserialize(is); - REQUIRE_FALSE(sketch.is_empty()); - REQUIRE_FALSE(sketch.is_estimation_mode()); - REQUIRE(sketch.get_num_retained() == 1); - REQUIRE(sketch.get_theta() == 1.0); - REQUIRE(sketch.get_estimate() == 1.0); - REQUIRE(sketch.get_lower_bound(1) == 1.0); - REQUIRE(sketch.get_upper_bound(1) == 1.0); -} - -TEST_CASE("theta sketch: deserialize compact exact from java", "[theta_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(inputPath + "theta_compact_exact_from_java.sk", std::ios::binary); - auto sketch = compact_theta_sketch::deserialize(is); - REQUIRE_FALSE(sketch.is_empty()); - REQUIRE_FALSE(sketch.is_estimation_mode()); - REQUIRE(sketch.is_ordered()); - REQUIRE(sketch.get_num_retained() == 100); - - // the same construction process in Java must have produced exactly the same sketch - auto update_sketch = update_theta_sketch::builder().build(); - const int n = 100; - for (int i = 0; i < n; i++) update_sketch.update(i); - REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained()); - REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10)); - REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10)); - REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10)); - REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10)); - REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10)); - REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10)); - REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10)); - REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10)); - compact_theta_sketch compact_sketch = update_sketch.compact(); - // the sketches are ordered, so the iteration sequence must match exactly - auto iter = sketch.begin(); - for (const auto& key: compact_sketch) { - REQUIRE(*iter == key); - ++iter; - } -} - -TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary); - auto sketch = compact_theta_sketch::deserialize(is); - REQUIRE_FALSE(sketch.is_empty()); - REQUIRE(sketch.is_estimation_mode()); - REQUIRE(sketch.is_ordered()); - REQUIRE(sketch.get_num_retained() == 4342); - REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10)); - REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10)); - REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10)); - REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10)); - - // the same construction process in Java must have produced exactly the same sketch - update_theta_sketch update_sketch = update_theta_sketch::builder().build(); - const int n = 8192; - for (int i = 0; i < n; i++) update_sketch.update(i); - REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained()); - REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10)); - REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10)); - REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10)); - REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10)); - REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10)); - REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10)); - REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10)); - REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10)); - compact_theta_sketch compact_sketch = update_sketch.compact(); - // the sketches are ordered, so the iteration sequence must match exactly - auto iter = sketch.begin(); - for (const auto& key: compact_sketch) { - REQUIRE(*iter == key); - ++iter; - } -} - TEST_CASE("theta sketch: deserialize compact v1 estimation from java", "[theta_sketch]") { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); @@ -473,30 +377,6 @@ TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sk REQUIRE_THROWS_AS(wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size(), 0), std::invalid_argument); } -TEST_CASE("theta sketch: wrap compact empty from java", "[theta_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary | std::ios::ate); - - std::vector buf; - if(is) { - auto size = is.tellg(); - buf.reserve(size); - buf.assign(size, 0); - is.seekg(0, std::ios_base::beg); - is.read((char*)(buf.data()), buf.size()); - } - - auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size()); - REQUIRE(sketch.is_empty()); - REQUIRE_FALSE(sketch.is_estimation_mode()); - REQUIRE(sketch.get_num_retained() == 0); - REQUIRE(sketch.get_theta() == 1.0); - REQUIRE(sketch.get_estimate() == 0.0); - REQUIRE(sketch.get_lower_bound(1) == 0.0); - REQUIRE(sketch.get_upper_bound(1) == 0.0); -} - TEST_CASE("theta sketch: wrap compact v1 empty from java", "[theta_sketch]") { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); @@ -545,74 +425,6 @@ TEST_CASE("theta sketch: wrap compact v2 empty from java", "[theta_sketch]") { REQUIRE(sketch.get_upper_bound(1) == 0.0); } -TEST_CASE("theta sketch: wrap single item from java", "[theta_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary | std::ios::ate); - std::vector buf; - if(is) { - auto size = is.tellg(); - buf.reserve(size); - buf.assign(size, 0); - is.seekg(0, std::ios_base::beg); - is.read((char*)(buf.data()), buf.size()); - } - - auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size()); - REQUIRE_FALSE(sketch.is_empty()); - REQUIRE_FALSE(sketch.is_estimation_mode()); - REQUIRE(sketch.get_num_retained() == 1); - REQUIRE(sketch.get_theta() == 1.0); - REQUIRE(sketch.get_estimate() == 1.0); - REQUIRE(sketch.get_lower_bound(1) == 1.0); - REQUIRE(sketch.get_upper_bound(1) == 1.0); -} - -TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") { - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary | std::ios::ate); - std::vector buf; - if(is) { - auto size = is.tellg(); - buf.reserve(size); - buf.assign(size, 0); - is.seekg(0, std::ios_base::beg); - is.read((char*)(buf.data()), buf.size()); - } - - auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size()); - REQUIRE_FALSE(sketch.is_empty()); - REQUIRE(sketch.is_estimation_mode()); - REQUIRE(sketch.is_ordered()); - REQUIRE(sketch.get_num_retained() == 4342); - REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10)); - REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10)); - REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10)); - REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10)); - - // the same construction process in Java must have produced exactly the same sketch - update_theta_sketch update_sketch = update_theta_sketch::builder().build(); - const int n = 8192; - for (int i = 0; i < n; i++) update_sketch.update(i); - REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained()); - REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10)); - REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10)); - REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10)); - REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10)); - REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10)); - REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10)); - REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10)); - REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10)); - compact_theta_sketch compact_sketch = update_sketch.compact(); - // the sketches are ordered, so the iteration sequence must match exactly - auto iter = sketch.begin(); - for (const auto key: compact_sketch) { - REQUIRE(*iter == key); - ++iter; - } -} - TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]") { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); diff --git a/tuple/test/aod_1_compact_empty_from_java.sk b/tuple/test/aod_1_compact_empty_from_java.sk deleted file mode 100644 index 8d2583d5..00000000 --- a/tuple/test/aod_1_compact_empty_from_java.sk +++ /dev/null @@ -1 +0,0 @@ - Ì“ÿÿÿÿÿÿÿ \ No newline at end of file diff --git a/tuple/test/aod_1_compact_estimation_from_java.sk b/tuple/test/aod_1_compact_estimation_from_java.sk deleted file mode 100644 index d0864898..00000000 Binary files a/tuple/test/aod_1_compact_estimation_from_java.sk and /dev/null differ diff --git a/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk b/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk deleted file mode 100644 index f67106d7..00000000 Binary files a/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk and /dev/null differ diff --git a/tuple/test/aod_2_compact_exact_from_java.sk b/tuple/test/aod_2_compact_exact_from_java.sk deleted file mode 100644 index a14fd086..00000000 Binary files a/tuple/test/aod_2_compact_exact_from_java.sk and /dev/null differ diff --git a/tuple/test/aod_3_compact_empty_from_java.sk b/tuple/test/aod_3_compact_empty_from_java.sk deleted file mode 100644 index 1579d9bc..00000000 --- a/tuple/test/aod_3_compact_empty_from_java.sk +++ /dev/null @@ -1 +0,0 @@ - Ì“ÿÿÿÿÿÿÿ \ No newline at end of file diff --git a/tuple/test/aod_sketch_deserialize_from_java_test.cpp b/tuple/test/aod_sketch_deserialize_from_java_test.cpp index 113ee0fc..4237e22c 100644 --- a/tuple/test/aod_sketch_deserialize_from_java_test.cpp +++ b/tuple/test/aod_sketch_deserialize_from_java_test.cpp @@ -28,12 +28,12 @@ namespace datasketches { static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; TEST_CASE("aod sketch one value", "[serde_compat]") { - unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; - for (unsigned n: n_arr) { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "aod_1_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = compact_array_of_doubles_sketch::deserialize(is); + is.open(testBinaryInputPath + "aod_1_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = compact_array_of_doubles_sketch::deserialize(is); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.is_estimation_mode() == (n > 1000)); REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); @@ -45,12 +45,12 @@ TEST_CASE("aod sketch one value", "[serde_compat]") { } TEST_CASE("aod sketch three values", "[serde_compat]") { - unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; - for (unsigned n: n_arr) { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "aod_3_n" + std::to_string(n) + ".sk", std::ios::binary); - auto sketch = compact_array_of_doubles_sketch::deserialize(is); + is.open(testBinaryInputPath + "aod_3_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = compact_array_of_doubles_sketch::deserialize(is); REQUIRE(sketch.is_empty() == (n == 0)); REQUIRE(sketch.is_estimation_mode() == (n > 1000)); REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); @@ -66,8 +66,8 @@ TEST_CASE("aod sketch three values", "[serde_compat]") { TEST_CASE("aod sketch non-empty no entries", "[serde_compat]") { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "aod_1_non_empty_no_entries.sk", std::ios::binary); - auto sketch = compact_array_of_doubles_sketch::deserialize(is); + is.open(testBinaryInputPath + "aod_1_non_empty_no_entries_java.sk", std::ios::binary); + const auto sketch = compact_array_of_doubles_sketch::deserialize(is); REQUIRE_FALSE(sketch.is_empty()); REQUIRE(sketch.get_num_retained() == 0); } diff --git a/tuple/test/array_of_doubles_sketch_test.cpp b/tuple/test/array_of_doubles_sketch_test.cpp index 103ed8e9..bb0aa3e5 100644 --- a/tuple/test/array_of_doubles_sketch_test.cpp +++ b/tuple/test/array_of_doubles_sketch_test.cpp @@ -47,130 +47,6 @@ TEST_CASE("aod sketch: reset", "[tuple_sketch]") { REQUIRE(update_sketch.get_num_retained() == 0); } -TEST_CASE("aod sketch: serialization compatibility with java - empty", "[tuple_sketch]") { - auto update_sketch = update_array_of_doubles_sketch::builder().build(); - REQUIRE(update_sketch.is_empty()); - REQUIRE(update_sketch.get_num_retained() == 0); - auto compact_sketch = update_sketch.compact(); - - // read binary sketch from Java - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(inputPath + "aod_1_compact_empty_from_java.sk", std::ios::binary); - auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is); - REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained()); - REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10)); - REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10)); - REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10)); - REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10)); -} - -TEST_CASE("aod sketch: serialization compatibility with java - empty configured for three values", "[tuple_sketch]") { - auto update_sketch = update_array_of_doubles_sketch::builder(3).build(); - REQUIRE(update_sketch.is_empty()); - REQUIRE(update_sketch.get_num_retained() == 0); - REQUIRE(update_sketch.get_num_values() == 3); - auto compact_sketch = update_sketch.compact(); - - // read binary sketch from Java - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(inputPath + "aod_3_compact_empty_from_java.sk", std::ios::binary); - auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is); - REQUIRE(compact_sketch.get_num_values() == compact_sketch_from_java.get_num_values()); - REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained()); - REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10)); - REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10)); - REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10)); - REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10)); -} - -TEST_CASE("aod sketch: serialization compatibility with java - non-empty no entries", "[tuple_sketch]") { - auto update_sketch = update_array_of_doubles_sketch::builder().set_p(0.01f).build(); - std::vector a = {1}; - update_sketch.update(1, a); - REQUIRE_FALSE(update_sketch.is_empty()); - REQUIRE(update_sketch.get_num_retained() == 0); - auto compact_sketch = update_sketch.compact(); - - // read binary sketch from Java - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(inputPath + "aod_1_compact_non_empty_no_entries_from_java.sk", std::ios::binary); - auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is); - REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained()); - REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10)); - REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10)); - REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10)); - REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10)); -} - -TEST_CASE("aod sketch: serialization compatibility with java - estimation mode", "[tuple_sketch]") { - auto update_sketch = update_array_of_doubles_sketch::builder().build(); - std::vector a = {1}; - for (int i = 0; i < 8192; ++i) update_sketch.update(i, a); - auto compact_sketch = update_sketch.compact(); - - // read binary sketch from Java - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(inputPath + "aod_1_compact_estimation_from_java.sk", std::ios::binary); - auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is); - REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained()); - REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10)); - REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10)); - REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10)); - REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10)); - REQUIRE(compact_sketch.get_lower_bound(2) == Approx(compact_sketch_from_java.get_lower_bound(2)).margin(1e-10)); - REQUIRE(compact_sketch.get_upper_bound(2) == Approx(compact_sketch_from_java.get_upper_bound(2)).margin(1e-10)); - REQUIRE(compact_sketch.get_lower_bound(3) == Approx(compact_sketch_from_java.get_lower_bound(3)).margin(1e-10)); - REQUIRE(compact_sketch.get_upper_bound(3) == Approx(compact_sketch_from_java.get_upper_bound(3)).margin(1e-10)); - - // sketch from Java is not ordered - // transform it to ordered so that iteration sequence would match exactly - compact_array_of_doubles_sketch ordered_sketch_from_java(compact_sketch_from_java, true); - auto it = ordered_sketch_from_java.begin(); - for (const auto& entry: compact_sketch) { - REQUIRE(entry == *it); - ++it; - } -} - -TEST_CASE("aod sketch: serialization compatibility with java - exact mode with two values", "[tuple_sketch]") { - auto update_sketch = update_array_of_doubles_sketch::builder(2).build(); - std::vector a = {1, 2}; - for (int i = 0; i < 1000; ++i) update_sketch.update(i, a.data()); // pass vector as pointer - auto compact_sketch = update_sketch.compact(); - REQUIRE_FALSE(compact_sketch.is_estimation_mode()); - - // read binary sketch from Java - std::ifstream is; - is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(inputPath + "aod_2_compact_exact_from_java.sk", std::ios::binary); - auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is); - REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained()); - REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10)); - REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10)); - REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10)); - REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10)); - REQUIRE(compact_sketch.get_lower_bound(2) == Approx(compact_sketch_from_java.get_lower_bound(2)).margin(1e-10)); - REQUIRE(compact_sketch.get_upper_bound(2) == Approx(compact_sketch_from_java.get_upper_bound(2)).margin(1e-10)); - REQUIRE(compact_sketch.get_lower_bound(3) == Approx(compact_sketch_from_java.get_lower_bound(3)).margin(1e-10)); - REQUIRE(compact_sketch.get_upper_bound(3) == Approx(compact_sketch_from_java.get_upper_bound(3)).margin(1e-10)); - - // sketch from Java is not ordered - // transform it to ordered so that iteration sequence would match exactly - compact_array_of_doubles_sketch ordered_sketch_from_java(compact_sketch_from_java, true); - auto it = ordered_sketch_from_java.begin(); - for (const auto& entry: compact_sketch) { - REQUIRE(entry.first == (*it).first); - REQUIRE(entry.second.size() == 2); - REQUIRE(entry.second[0] == (*it).second[0]); - REQUIRE(entry.second[1] == (*it).second[1]); - ++it; - } -} - TEST_CASE("aod sketch: stream serialize deserialize - estimation mode", "[tuple_sketch]") { auto update_sketch = update_array_of_doubles_sketch::builder(2).build(); std::vector a = {1, 2};