Skip to content

Commit

Permalink
Merge pull request #387 from apache/java_serde_compat_testing
Browse files Browse the repository at this point in the history
test deserialize sketches from Java
  • Loading branch information
AlexanderSaydakov authored Jul 25, 2023
2 parents 4d61301 + f8774b0 commit 52f1d6c
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 6 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/serde_compat.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Java SerDe Compatibility Test

on:
push:
branches:
- master
workflow_dispatch:

jobs:
build:
name: SerDe Test
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Checkout Java
uses: actions/checkout@v3
with:
repository: apache/datasketches-java
path: java
- name: Run Java
run: cd java && mvn test -Dtestng.excludedgroups=
- name: Run cmake
run: cd build && cmake .. -DSERDE_COMPAT=true
- name: Build C++ unit tests
run: cmake --build build --config Release
- name: Run C++ tests
run: cmake --build build --config Release --target test
19 changes: 13 additions & 6 deletions cpc/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ set_target_properties(cpc_test PROPERTIES
CXX_STANDARD_REQUIRED YES
)

#file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" CPC_TEST_BINARY_PATH)
#string(APPEND CPC_TEST_BINARY_PATH "/")
#target_compile_definitions(cpc_test
# PRIVATE
# TEST_BINARY_INPUT_PATH="${CPC_TEST_BINARY_PATH}"
#)
file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" CPC_TEST_BINARY_PATH)
string(APPEND CPC_TEST_BINARY_PATH "/")
target_compile_definitions(cpc_test
PRIVATE
TEST_BINARY_INPUT_PATH="${CPC_TEST_BINARY_PATH}"
)

add_test(
NAME cpc_test
Expand All @@ -42,3 +42,10 @@ target_sources(cpc_test
compression_test.cpp
cpc_sketch_allocation_test.cpp
)

if (SERDE_COMPAT)
target_sources(cpc_test
PRIVATE
cpc_sketch_deserialize_from_java_test.cpp
)
endif()
42 changes: 42 additions & 0 deletions cpc/test/cpc_sketch_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <catch2/catch.hpp>
#include <fstream>
#include <cpc_sketch.hpp>

namespace datasketches {

// assume the binary sketches for this test have been generated by datasketches-java code
// in the subdirectory called "java" in the root directory of this project
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";

TEST_CASE("cpc sketch", "[serde_compat]") {
unsigned n_arr[] = {0, 100, 200, 2000, 20000};
for (unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "cpc_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = cpc_sketch::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02));
}
}

} /* namespace datasketches */
7 changes: 7 additions & 0 deletions kll/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,10 @@ target_sources(kll_test
kll_sketch_validation.cpp
kolmogorov_smirnov_test.cpp
)

if (SERDE_COMPAT)
target_sources(kll_test
PRIVATE
kll_sketch_deserialize_from_java_test.cpp
)
endif()
78 changes: 78 additions & 0 deletions kll/test/kll_sketch_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <catch2/catch.hpp>
#include <fstream>
#include <kll_sketch.hpp>

namespace datasketches {

// assume the binary sketches for this test have been generated by datasketches-java code
// in the subdirectory called "java" in the root directory of this project
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";

TEST_CASE("kll float", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "kll_float_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = kll_sketch<float>::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.is_estimation_mode() == (n > kll_constants::DEFAULT_K));
REQUIRE(sketch.get_n() == n);
if (n > 0) {
REQUIRE(sketch.get_min_item() == 0.0);
REQUIRE(sketch.get_max_item() == static_cast<double>(n - 1));
uint64_t weight = 0;
for (auto pair: sketch) {
REQUIRE(pair.first >= sketch.get_min_item());
REQUIRE(pair.first <= sketch.get_max_item());
weight += pair.second;
}
REQUIRE(weight == sketch.get_n());
}
}
}

TEST_CASE("kll double", "[serde_compat]") {
unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (unsigned n: n_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "kll_double_n" + std::to_string(n) + ".sk", std::ios::binary);
auto sketch = kll_sketch<double>::deserialize(is);
REQUIRE(sketch.is_empty() == (n == 0));
REQUIRE(sketch.is_estimation_mode() == (n > kll_constants::DEFAULT_K));
REQUIRE(sketch.get_n() == n);
if (n > 0) {
REQUIRE(sketch.get_min_item() == 0.0);
REQUIRE(sketch.get_max_item() == static_cast<double>(n - 1));
uint64_t weight = 0;
for (auto pair: sketch) {
REQUIRE(pair.first >= sketch.get_min_item());
REQUIRE(pair.first <= sketch.get_max_item());
weight += pair.second;
}
REQUIRE(weight == sketch.get_n());
}
}
}

} /* namespace datasketches */

0 comments on commit 52f1d6c

Please sign in to comment.