diff --git a/pyvortex/src/array.rs b/pyvortex/src/array.rs index 939b7846e..3ec416695 100644 --- a/pyvortex/src/array.rs +++ b/pyvortex/src/array.rs @@ -1,26 +1,6 @@ -use paste::paste; use pyo3::prelude::*; -use vortex::array::{ - Bool, BoolArray, BoolEncoding, Chunked, ChunkedArray, ChunkedEncoding, Constant, ConstantArray, - ConstantEncoding, Primitive, PrimitiveArray, PrimitiveEncoding, Sparse, SparseArray, - SparseEncoding, Struct, StructArray, StructEncoding, VarBin, VarBinArray, VarBinEncoding, - VarBinView, VarBinViewArray, VarBinViewEncoding, -}; use vortex::compute::take; -use vortex::encoding::EncodingRef; -use vortex::{Array, ArrayDType, ArrayData, ArrayDef, ToArray}; -use vortex_alp::{ALPArray, ALPEncoding, ALP}; -use vortex_dict::{Dict, DictArray, DictEncoding}; -use vortex_fastlanes::{ - BitPacked, BitPackedArray, BitPackedEncoding, Delta, DeltaArray, DeltaEncoding, FoR, FoRArray, - FoREncoding, -}; -use vortex_roaring::{ - RoaringBool, RoaringBoolArray, RoaringBoolEncoding, RoaringInt, RoaringIntArray, - RoaringIntEncoding, -}; -use vortex_runend::{RunEnd, RunEndArray, RunEndEncoding}; -use vortex_zigzag::{ZigZag, ZigZagArray, ZigZagEncoding}; +use vortex::{Array, ArrayDType}; use crate::dtype::PyDType; use crate::error::PyVortexError; @@ -31,144 +11,9 @@ pub struct PyArray { inner: Array, } -macro_rules! pyarray { - ($E:ident, $T:ident, $TName:tt) => { - paste! { - #[pyclass(name = $TName, module = "vortex", extends = PyArray, sequence, subclass)] - pub struct [] { - inner: $T, - #[allow(dead_code)] - encoding: EncodingRef, - } - - impl [] { - pub fn wrap(py: Python<'_>, inner: $T) -> PyResult> { - let init = PyClassInitializer::from(PyArray { inner: inner.to_array().clone() }) - .add_subclass([] { inner, encoding: &$E }); - Py::new(py, init) - } - - pub fn unwrap(&self) -> &$T { - &self.inner - } - } - } - }; -} - -pyarray!(BoolEncoding, BoolArray, "BoolArray"); -pyarray!(ChunkedEncoding, ChunkedArray, "ChunkedArray"); -pyarray!(ConstantEncoding, ConstantArray, "ConstantArray"); -pyarray!(PrimitiveEncoding, PrimitiveArray, "PrimitiveArray"); -pyarray!(SparseEncoding, SparseArray, "SparseArray"); -pyarray!(StructEncoding, StructArray, "StructArray"); -pyarray!(VarBinEncoding, VarBinArray, "VarBinArray"); -pyarray!(VarBinViewEncoding, VarBinViewArray, "VarBinViewArray"); - -pyarray!(ALPEncoding, ALPArray, "ALPArray"); -pyarray!(BitPackedEncoding, BitPackedArray, "BitPackedArray"); -pyarray!(FoREncoding, FoRArray, "FoRArray"); -pyarray!(DeltaEncoding, DeltaArray, "DeltaArray"); -pyarray!(DictEncoding, DictArray, "DictArray"); -pyarray!(RunEndEncoding, RunEndArray, "RunEndArray"); -pyarray!(RoaringBoolEncoding, RoaringBoolArray, "RoaringBoolArray"); -pyarray!(RoaringIntEncoding, RoaringIntArray, "RoaringIntArray"); -pyarray!(ZigZagEncoding, ZigZagArray, "ZigZagArray"); - impl PyArray { - pub fn wrap(py: Python<'_>, inner: ArrayData) -> PyResult> { - let encoding_id = inner.encoding().id(); - let array = Array::from(inner); - // This is the one place where we'd want to have owned kind enum but there's no other place this is used - match encoding_id { - Bool::ID => PyBoolArray::wrap( - py, - BoolArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - Chunked::ID => PyChunkedArray::wrap( - py, - ChunkedArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - Constant::ID => PyConstantArray::wrap( - py, - ConstantArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - Primitive::ID => PyPrimitiveArray::wrap( - py, - PrimitiveArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - Sparse::ID => PySparseArray::wrap( - py, - SparseArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - Struct::ID => PyStructArray::wrap( - py, - StructArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - VarBin::ID => PyVarBinArray::wrap( - py, - VarBinArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - VarBinView::ID => PyVarBinViewArray::wrap( - py, - VarBinViewArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - Dict::ID => PyDictArray::wrap( - py, - DictArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - RunEnd::ID => PyRunEndArray::wrap( - py, - RunEndArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - Delta::ID => PyDeltaArray::wrap( - py, - DeltaArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - FoR::ID => PyFoRArray::wrap( - py, - FoRArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - BitPacked::ID => PyBitPackedArray::wrap( - py, - BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - - ALP::ID => PyALPArray::wrap( - py, - ALPArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - RoaringBool::ID => PyBitPackedArray::wrap( - py, - BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - RoaringInt::ID => PyBitPackedArray::wrap( - py, - BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - ZigZag::ID => PyZigZagArray::wrap( - py, - ZigZagArray::try_from(array).map_err(PyVortexError::map_err)?, - )? - .extract(py), - _ => Py::new(py, Self { inner: array }), - } + pub fn new(inner: Array) -> PyArray { + PyArray { inner } } pub fn unwrap(&self) -> &Array { @@ -205,9 +50,9 @@ impl PyArray { PyDType::wrap(self_.py(), self_.inner.dtype().clone()) } - fn take(&self, indices: PyRef<'_, Self>) -> PyResult> { + fn take<'py>(&self, indices: PyRef<'py, Self>) -> PyResult> { take(&self.inner, indices.unwrap()) .map_err(PyVortexError::map_err) - .and_then(|arr| Self::wrap(indices.py(), arr.into())) + .and_then(|arr| Bound::new(indices.py(), PyArray { inner: arr })) } } diff --git a/pyvortex/src/compress.rs b/pyvortex/src/compress.rs index 9c9342662..71c29be9f 100644 --- a/pyvortex/src/compress.rs +++ b/pyvortex/src/compress.rs @@ -41,5 +41,5 @@ pub fn compress( let compressed = py .allow_threads(|| ctx.compress(arr.unwrap(), None)) .map_err(PyVortexError::map_err)?; - PyArray::wrap(py, compressed) + Bound::new(array.py(), PyArray::new(inner)) } diff --git a/pyvortex/src/encode.rs b/pyvortex/src/encode.rs index bab5233cd..2cc6adccf 100644 --- a/pyvortex/src/encode.rs +++ b/pyvortex/src/encode.rs @@ -7,7 +7,7 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use vortex::array::ChunkedArray; use vortex::arrow::{FromArrowArray, FromArrowType}; -use vortex::{Array, ToArrayData}; +use vortex::{Array, IntoArray}; use vortex_dtype::DType; use crate::array::PyArray; @@ -17,7 +17,7 @@ use crate::vortex_arrow::map_arrow_err; /// The main entry point for creating enc arrays from other Python objects. /// #[pyfunction] -pub fn encode(obj: &Bound) -> PyResult> { +pub fn encode<'py>(obj: &Bound<'py, PyAny>) -> PyResult> { let pa = obj.py().import_bound("pyarrow")?; let pa_array = pa.getattr("Array")?; let chunked_array = pa.getattr("ChunkedArray")?; @@ -26,7 +26,7 @@ pub fn encode(obj: &Bound) -> PyResult> { if obj.is_instance(&pa_array)? { let arrow_array = ArrowArrayData::from_pyarrow_bound(obj).map(make_array)?; let enc_array = Array::from_arrow(arrow_array, false); - PyArray::wrap(obj.py(), enc_array.into()) + Bound::new(obj.py(), PyArray::new(enc_array)) } else if obj.is_instance(&chunked_array)? { let chunks: Vec> = obj.getattr("chunks")?.extract()?; let encoded_chunks = chunks @@ -41,11 +41,13 @@ pub fn encode(obj: &Bound) -> PyResult> { .getattr("type") .and_then(|v| DataType::from_pyarrow_bound(&v)) .map(|dt| DType::from_arrow(&Field::new("_", dt, false)))?; - PyArray::wrap( + Bound::new( obj.py(), - ChunkedArray::try_new(encoded_chunks, dtype) - .map_err(PyVortexError::map_err)? - .to_array_data(), + PyArray::new( + ChunkedArray::try_new(encoded_chunks, dtype) + .map_err(PyVortexError::map_err)? + .into_array(), + ), ) } else if obj.is_instance(&table)? { let array_stream = ArrowArrayStreamReader::from_pyarrow_bound(obj)?; @@ -54,11 +56,13 @@ pub fn encode(obj: &Bound) -> PyResult> { .into_iter() .map(|b| b.map(Array::from).map_err(map_arrow_err)) .collect::>>()?; - PyArray::wrap( + Bound::new( obj.py(), - ChunkedArray::try_new(chunks, dtype) - .map_err(PyVortexError::map_err)? - .to_array_data(), + PyArray::new( + ChunkedArray::try_new(chunks, dtype) + .map_err(PyVortexError::map_err)? + .into_array(), + ), ) } else { Err(PyValueError::new_err("Cannot convert object to enc array")) diff --git a/pyvortex/src/lib.rs b/pyvortex/src/lib.rs index 0b732b373..8da6147c1 100644 --- a/pyvortex/src/lib.rs +++ b/pyvortex/src/lib.rs @@ -5,8 +5,6 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use vortex_dtype::{DType, PType}; -use crate::array::*; - mod array; mod dtype; mod encode; @@ -21,25 +19,6 @@ fn _lib(_py: Python, m: &Bound) -> PyResult<()> { m.add_function(wrap_pyfunction!(encode::encode, m)?)?; // m.add_function(wrap_pyfunction!(compress::compress, m)?)?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; m.add_function(wrap_pyfunction!(dtype_int, m)?)?; diff --git a/pyvortex/test/test_array.py b/pyvortex/test/test_array.py index cba6a9cd0..db93368ba 100644 --- a/pyvortex/test/test_array.py +++ b/pyvortex/test/test_array.py @@ -5,14 +5,12 @@ def test_primitive_array_round_trip(): a = pa.array([0, 1, 2, 3]) arr = vortex.encode(a) - assert isinstance(arr, vortex.PrimitiveArray) assert arr.to_arrow().combine_chunks() == a def test_varbin_array_round_trip(): a = pa.array(["a", "b", "c"]) arr = vortex.encode(a) - assert isinstance(arr, vortex.VarBinArray) assert arr.to_arrow().combine_chunks() == a diff --git a/pyvortex/test/test_compress.py b/pyvortex/test/test_compress.py index 5792c6331..368e7c488 100644 --- a/pyvortex/test/test_compress.py +++ b/pyvortex/test/test_compress.py @@ -59,7 +59,6 @@ def test_zigzag_encode(): def test_chunked_encode(): chunked = pa.chunked_array([pa.array([0, 1, 2]), pa.array([3, 4, 5])]) encoded = vortex.encode(chunked) - assert isinstance(encoded, vortex.ChunkedArray) assert encoded.to_arrow().combine_chunks() == pa.array([0, 1, 2, 3, 4, 5]) @@ -71,7 +70,6 @@ def test_table_encode(): } ) encoded = vortex.encode(table) - assert isinstance(encoded, vortex.ChunkedArray) assert encoded.to_arrow().combine_chunks() == pa.StructArray.from_arrays( [pa.array([0, 1, 2, 3, 4, 5]), pa.array(["a", "b", "c", "d", "e", "f"])], names=["number", "string"] )