From 0b7384272b82088b110edb9c8e95adf9372af997 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 9 Nov 2023 11:06:50 -0500 Subject: [PATCH] Test import/export of string view data with multiple data buffers --- cpp/src/arrow/c/bridge_test.cc | 100 ++++++++++++++++++++++++++------- 1 file changed, 81 insertions(+), 19 deletions(-) diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index 157a1f80a1660..8cbe31f707f2b 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -37,6 +37,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" #include "arrow/testing/util.h" +#include "arrow/util/binary_view_util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/endian.h" #include "arrow/util/key_value_metadata.h" @@ -92,7 +93,7 @@ class ReleaseCallback { public: using CType = typename Traits::CType; - explicit ReleaseCallback(CType* c_struct) : called_(false) { + explicit ReleaseCallback(CType* c_struct) { orig_release_ = c_struct->release; orig_private_data_ = c_struct->private_data; c_struct->release = StaticRelease; @@ -124,7 +125,7 @@ class ReleaseCallback { private: ARROW_DISALLOW_COPY_AND_ASSIGN(ReleaseCallback); - bool called_; + bool called_{}; void (*orig_release_)(CType*); void* orig_private_data_; }; @@ -239,8 +240,7 @@ struct SchemaExportChecker { flattened_flags.empty() ? std::vector(flattened_formats_.size(), kDefaultFlags) : std::move(flattened_flags)), - flattened_metadata_(std::move(flattened_metadata)), - flattened_index_(0) {} + flattened_metadata_(std::move(flattened_metadata)) {} void operator()(struct ArrowSchema* c_export, bool inner = false) { ASSERT_LT(flattened_index_, flattened_formats_.size()); @@ -289,7 +289,7 @@ struct SchemaExportChecker { const std::vector flattened_names_; std::vector flattened_flags_; const std::vector flattened_metadata_; - size_t flattened_index_; + size_t flattened_index_{}; }; class TestSchemaExport : public ::testing::Test { @@ -905,6 +905,32 @@ TEST_F(TestArrayExport, PrimitiveSliced) { TestPrimitive(factory); } +constexpr std::string_view binary_view_buffer_content0 = "12345foo bar baz quux", + binary_view_buffer_content1 = "BinaryViewMultipleBuffers"; + +static const BinaryViewType::c_type binary_view_buffer1[] = { + util::ToBinaryView(binary_view_buffer_content0, 0, 0), + util::ToInlineBinaryView("foo"), + util::ToBinaryView(binary_view_buffer_content1, 1, 0), + util::ToInlineBinaryView("bar"), + util::ToBinaryView(binary_view_buffer_content0.substr(5), 0, 5), + util::ToInlineBinaryView("baz"), + util::ToBinaryView(binary_view_buffer_content1.substr(6, 13), 1, 6), + util::ToInlineBinaryView("quux"), +}; + +TEST_F(TestArrayExport, BinaryViewMultipleBuffers) { + auto length = static_cast(std::size(binary_view_buffer1)); + auto arr = std::make_shared( + binary_view(), length, Buffer::Wrap(binary_view_buffer1, length), + BufferVector{ + std::make_shared(binary_view_buffer_content0), + std::make_shared(binary_view_buffer_content1), + }); + TestNested([&] { return arr; }); + TestNested([&] { return arr->Slice(1, length - 2); }); +} + TEST_F(TestArrayExport, Null) { TestPrimitive(null(), "[null, null, null]"); TestPrimitive(null(), "[]"); @@ -1202,13 +1228,16 @@ TEST_F(TestArrayExport, ExportRecordBatch) { static const char kMyDeviceTypeName[] = "arrowtest::MyDevice"; static const ArrowDeviceType kMyDeviceType = ARROW_DEVICE_EXT_DEV; -static const void* kMyEventPtr = reinterpret_cast(uintptr_t(0xBAADF00D)); +static const void* kMyEventPtr = + reinterpret_cast(static_cast(0xBAADF00D)); class MyBuffer final : public MutableBuffer { public: using MutableBuffer::MutableBuffer; - ~MyBuffer() { default_memory_pool()->Free(const_cast(data_), size_); } + ~MyBuffer() override { + default_memory_pool()->Free(const_cast(data_), size_); + } std::shared_ptr device_sync_event() override { return device_sync_; } @@ -1238,7 +1267,7 @@ class MyDevice : public Device { explicit MySyncEvent(void* sync_event, release_fn_t release_sync_event) : Device::SyncEvent(sync_event, release_sync_event) {} - virtual ~MySyncEvent() = default; + ~MySyncEvent() override = default; Status Wait() override { return Status::OK(); } Status Record(const Device::Stream&) override { return Status::OK(); } }; @@ -2339,6 +2368,16 @@ static const void* large_string_buffers_no_nulls1[3] = { static const void* large_string_buffers_omitted[3] = { nullptr, large_string_offsets_buffer1, nullptr}; +constexpr int64_t binary_view_buffer_sizes1[] = {binary_view_buffer_content0.size(), + binary_view_buffer_content1.size()}; +static const void* binary_view_buffers_no_nulls1[] = { + nullptr, + binary_view_buffer1, + binary_view_buffer_content0.data(), + binary_view_buffer_content1.data(), + binary_view_buffer_sizes1, +}; + static const int32_t list_offsets_buffer1[] = {0, 2, 2, 5, 6, 8}; static const void* list_buffers_no_nulls1[2] = {nullptr, list_offsets_buffer1}; static const void* list_buffers_nulls1[2] = {bits_buffer1, list_offsets_buffer1}; @@ -2418,6 +2457,16 @@ class TestArrayImport : public ::testing::Test { c->buffers = buffers; } + void FillStringViewLike(struct ArrowArray* c, int64_t length, int64_t null_count, + int64_t offset, const void** buffers, + int32_t data_buffer_count) { + c->length = length; + c->null_count = null_count; + c->offset = offset; + c->n_buffers = 2 + data_buffer_count + 1; + c->buffers = buffers; + } + void FillListLike(struct ArrowArray* c, int64_t length, int64_t null_count, int64_t offset, const void** buffers) { c->length = length; @@ -2480,6 +2529,12 @@ class TestArrayImport : public ::testing::Test { FillStringLike(&c_struct_, length, null_count, offset, buffers); } + void FillStringViewLike(int64_t length, int64_t null_count, int64_t offset, + const void** buffers, int32_t data_buffer_count) { + FillStringViewLike(&c_struct_, length, null_count, offset, buffers, + data_buffer_count); + } + void FillListLike(int64_t length, int64_t null_count, int64_t offset, const void** buffers) { FillListLike(&c_struct_, length, null_count, offset, buffers); @@ -2726,6 +2781,15 @@ TEST_F(TestArrayImport, String) { FillStringLike(4, 0, 0, large_string_buffers_no_nulls1); CheckImport(ArrayFromJSON(large_binary(), R"(["foo", "", "bar", "quux"])")); + auto length = static_cast(std::size(binary_view_buffer1)); + FillStringViewLike(length, 0, 0, binary_view_buffers_no_nulls1, 2); + CheckImport(std::make_shared( + binary_view(), length, Buffer::Wrap(binary_view_buffer1, length), + BufferVector{ + std::make_shared(binary_view_buffer_content0), + std::make_shared(binary_view_buffer_content1), + })); + // Empty array with null data pointers FillStringLike(0, 0, 0, string_buffers_omitted); CheckImport(ArrayFromJSON(utf8(), "[]")); @@ -3364,13 +3428,13 @@ TEST_F(TestSchemaRoundtrip, Primitive) { TestWithTypeFactory(boolean); TestWithTypeFactory(float16); - TestWithTypeFactory(std::bind(decimal128, 19, 4)); - TestWithTypeFactory(std::bind(decimal256, 19, 4)); - TestWithTypeFactory(std::bind(decimal128, 19, 0)); - TestWithTypeFactory(std::bind(decimal256, 19, 0)); - TestWithTypeFactory(std::bind(decimal128, 19, -5)); - TestWithTypeFactory(std::bind(decimal256, 19, -5)); - TestWithTypeFactory(std::bind(fixed_size_binary, 3)); + TestWithTypeFactory([] { return decimal128(19, 4); }); + TestWithTypeFactory([] { return decimal256(19, 4); }); + TestWithTypeFactory([] { return decimal128(19, 0); }); + TestWithTypeFactory([] { return decimal256(19, 0); }); + TestWithTypeFactory([] { return decimal128(19, -5); }); + TestWithTypeFactory([] { return decimal256(19, -5); }); + TestWithTypeFactory([] { return fixed_size_binary(3); }); TestWithTypeFactory(binary); TestWithTypeFactory(large_utf8); } @@ -3380,8 +3444,8 @@ TEST_F(TestSchemaRoundtrip, Temporal) { TestWithTypeFactory(day_time_interval); TestWithTypeFactory(month_interval); TestWithTypeFactory(month_day_nano_interval); - TestWithTypeFactory(std::bind(time64, TimeUnit::NANO)); - TestWithTypeFactory(std::bind(duration, TimeUnit::MICRO)); + TestWithTypeFactory([] { return time64(TimeUnit::NANO); }); + TestWithTypeFactory([] { return duration(TimeUnit::MICRO); }); TestWithTypeFactory([]() { return arrow::timestamp(TimeUnit::MICRO, "Europe/Paris"); }); } @@ -4008,8 +4072,6 @@ TEST_F(TestDeviceArrayRoundtrip, Primitive) { TestWithJSON(mm, int32(), "[4, 5, null]"); } -// TODO C -> C++ -> C roundtripping tests? - //////////////////////////////////////////////////////////////////////////// // Array stream export tests