diff --git a/backends-velox/src/test/java/org/apache/gluten/vectorized/ArrowColumnVectorTest.java b/backends-velox/src/test/java/org/apache/gluten/vectorized/ArrowColumnVectorTest.java new file mode 100644 index 000000000000..11330544df78 --- /dev/null +++ b/backends-velox/src/test/java/org/apache/gluten/vectorized/ArrowColumnVectorTest.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten.vectorized; + +import org.apache.spark.sql.execution.vectorized.MutableColumnarRow; +import org.apache.spark.sql.types.Decimal; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.util.TaskResources$; +import org.junit.Assert; +import org.junit.Test; + +public class ArrowColumnVectorTest { + + @Test + public void testWriteByMutableColumnarRow() { + TaskResources$.MODULE$.runUnsafe( + () -> { + final ArrowWritableColumnVector[] columns = newArrowColumns("a decimal(20, 1)", 20); + MutableColumnarRow row = new MutableColumnarRow(columns); + Decimal decimal = new Decimal(); + decimal.set(234, 20, 1); + row.setDecimal(0, decimal, 20); + Assert.assertEquals(row.getDecimal(0, 20, 1), decimal); + return null; + }); + } + + private static ArrowWritableColumnVector[] newArrowColumns(String schema, int numRows) { + ArrowWritableColumnVector[] columns = + ArrowWritableColumnVector.allocateColumns(numRows, StructType.fromDDL(schema)); + for (ArrowWritableColumnVector col : columns) { + col.setValueCount(numRows); + } + return columns; + } +} diff --git a/cpp/velox/tests/VeloxRowToColumnarTest.cc b/cpp/velox/tests/VeloxRowToColumnarTest.cc index c784dbd59c34..0d11dd4acbc9 100644 --- a/cpp/velox/tests/VeloxRowToColumnarTest.cc +++ b/cpp/velox/tests/VeloxRowToColumnarTest.cc @@ -87,10 +87,58 @@ TEST_F(VeloxRowToColumnarTest, allTypes) { makeNullableFlatVector( {std::nullopt, true, false, std::nullopt, true, true, false, true, std::nullopt, std::nullopt}), makeFlatVector( - {"alice0", "bob1", "alice2", "bob3", "Alice4", "Bob5", "AlicE6", "boB7", "ALICE8", "BOB9"}), + {"alice0", + "bob1", + "alice2", + "bob3", + "Alice4", + "Bob5123456789098766notinline", + "AlicE6", + "boB7", + "ALICE8", + "BOB9"}), makeNullableFlatVector( {"alice", "bob", std::nullopt, std::nullopt, "Alice", "Bob", std::nullopt, "alicE", std::nullopt, "boB"}), }); testRowVectorEqual(vector); } + +TEST_F(VeloxRowToColumnarTest, bigint) { + auto vector = makeRowVector({ + makeNullableFlatVector({1, 2, 3, std::nullopt, 4, std::nullopt, 5, 6, std::nullopt, 7}), + }); + testRowVectorEqual(vector); +} + +TEST_F(VeloxRowToColumnarTest, decimal) { + auto vector = makeRowVector({ + makeNullableFlatVector( + {123456, HugeInt::build(1045, 1789), 3678, std::nullopt, 4, std::nullopt, 5, 687987, std::nullopt, 7}, + DECIMAL(38, 2)), + makeNullableFlatVector( + {178987, 2, 3, std::nullopt, 4, std::nullopt, 5, 6, std::nullopt, 7}, DECIMAL(12, 3)), + }); + testRowVectorEqual(vector); +} + +TEST_F(VeloxRowToColumnarTest, timestamp) { + auto vector = makeRowVector({ + makeNullableFlatVector( + {Timestamp(-946684800, 0), + Timestamp(-7266, 0), + Timestamp(0, 0), + Timestamp(946684800, 0), + Timestamp(9466848000, 0), + Timestamp(94668480000, 0), + Timestamp(946729316, 0), + Timestamp(946729316, 0), + Timestamp(946729316, 0), + Timestamp(7266, 0), + Timestamp(-50049331200, 0), + Timestamp(253405036800, 0), + Timestamp(-62480037600, 0), + std::nullopt}), + }); + testRowVectorEqual(vector); +} } // namespace gluten diff --git a/gluten-data/src/main/java/org/apache/gluten/vectorized/ArrowWritableColumnVector.java b/gluten-data/src/main/java/org/apache/gluten/vectorized/ArrowWritableColumnVector.java index dfd570debc0a..336d33771b90 100644 --- a/gluten-data/src/main/java/org/apache/gluten/vectorized/ArrowWritableColumnVector.java +++ b/gluten-data/src/main/java/org/apache/gluten/vectorized/ArrowWritableColumnVector.java @@ -1256,7 +1256,11 @@ void setNull(int rowId) { } void setNotNull(int rowId) { - throw new UnsupportedOperationException(); + // Arrow Java library doesn't usually expose this API from its vectors. So we have to + // allow no-op here than throwing exceptions which could fail caller. And basically it's + // acceptable because finally Spark will set value after this method returned, + // During which Arrow Java will set the validity buffer anyway. As if the call to + // `setNotNull` is just deferred. } void setNulls(int rowId, int count) { @@ -1745,6 +1749,14 @@ final void setLong(int rowId, long value) { final void setBytes(int rowId, BigDecimal value) { writer.setSafe(rowId, value); } + + final void setBytes(int rowId, int count, byte[] src, int srcIndex) { + if (count == src.length && srcIndex == 0) { + writer.setBigEndianSafe(rowId, src); + return; + } + throw new UnsupportedOperationException(); + } } private static class StringWriter extends ArrowVectorWriter {