Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update py03 from 0.20 to 0.21 #5566

Merged
merged 10 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arrow-pyarrow-integration-testing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ crate-type = ["cdylib"]

[dependencies]
arrow = { path = "../arrow", features = ["pyarrow"] }
pyo3 = { version = "0.20", features = ["extension-module"] }
pyo3 = { version = "0.21", features = ["extension-module"] }
2 changes: 1 addition & 1 deletion arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ arrow-select = { workspace = true }
arrow-string = { workspace = true }

rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
pyo3 = { version = "0.20", default-features = false, optional = true }
pyo3 = { version = "0.21", default-features = false, optional = true }

[package.metadata.docs.rs]
features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"]
Expand Down
38 changes: 17 additions & 21 deletions arrow/src/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ impl<T: ToPyArrow> IntoPyArrow for T {
}

fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> {
let pyarrow = PyModule::import(value.py(), "pyarrow")?;
let class = pyarrow.getattr(expected)?;
let pyarrow = PyModule::import_bound(value.py(), "pyarrow")?;
let class = pyarrow.getattr(expected)?.into_gil_ref(); // TODO
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not familiar with pyo3, so I was following the migration guide: https://pyo3.rs/v0.21.0/migration.html

Of particular note is:

PyO3 0.21 introduces a new Bound<'py, T> smart pointer which replaces the existing "GIL Refs" API to interact with Python objects. For example, in PyO3 0.20 the reference &'py PyAny would be used to interact with Python objects. In PyO3 0.21 the updated type is Bound<'py, PyAny>.

So it seems they are moving away from &PyAny is my understanding. This raises a question of if the PyArrow API's should follow suit?

e.g. here, introduce a new function that uses Bound?

fn from_pyarrow(value: &PyAny) -> PyResult<Self> {

For now I've used into_gil_ref() to get around this deprecation process, but I don't think it's meant to be a permanent solution.

I'll do some more reading, maybe it's been discussed in pyo3 before somewhere 🤔

if !value.is_instance(class)? {
let expected_module = class.getattr("__module__")?.extract::<&str>()?;
let expected_name = class.getattr("__name__")?.extract::<&str>()?;
Expand Down Expand Up @@ -143,8 +143,7 @@ impl FromPyArrow for DataType {
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
let capsule: &PyCapsule = value.getattr("__arrow_c_schema__")?.call0()?.downcast()?;
validate_pycapsule(capsule, "arrow_schema")?;

let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
Expand All @@ -166,7 +165,7 @@ impl ToPyArrow for DataType {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("DataType")?;
let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
Ok(dtype.into())
Expand All @@ -179,8 +178,7 @@ impl FromPyArrow for Field {
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
let capsule: &PyCapsule = value.getattr("__arrow_c_schema__")?.call0()?.downcast()?;
validate_pycapsule(capsule, "arrow_schema")?;

let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
Expand All @@ -202,7 +200,7 @@ impl ToPyArrow for Field {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("Field")?;
let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
Ok(dtype.into())
Expand All @@ -215,8 +213,7 @@ impl FromPyArrow for Schema {
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
let capsule: &PyCapsule = value.getattr("__arrow_c_schema__")?.call0()?.downcast()?;
validate_pycapsule(capsule, "arrow_schema")?;

let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
Expand All @@ -238,7 +235,7 @@ impl ToPyArrow for Schema {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("Schema")?;
let schema = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
Ok(schema.into())
Expand All @@ -259,8 +256,8 @@ impl FromPyArrow for ArrayData {
));
}

let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?;
let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?;
let schema_capsule: &PyCapsule = tuple.get_item(0)?.downcast()?;
let array_capsule: &PyCapsule = tuple.get_item(1)?.downcast()?;

validate_pycapsule(schema_capsule, "arrow_schema")?;
validate_pycapsule(array_capsule, "arrow_array")?;
Expand Down Expand Up @@ -296,7 +293,7 @@ impl ToPyArrow for ArrayData {
let array = FFI_ArrowArray::new(self);
let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?;

let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("Array")?;
let array = class.call_method1(
"_import_from_c",
Expand Down Expand Up @@ -340,8 +337,8 @@ impl FromPyArrow for RecordBatch {
));
}

let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?;
let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?;
let schema_capsule: &PyCapsule = tuple.get_item(0)?.downcast()?;
let array_capsule: &PyCapsule = tuple.get_item(1)?.downcast()?;

validate_pycapsule(schema_capsule, "arrow_schema")?;
validate_pycapsule(array_capsule, "arrow_array")?;
Expand Down Expand Up @@ -400,8 +397,7 @@ impl FromPyArrow for ArrowArrayStreamReader {
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_stream__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_stream__")?.call0()?)?;
let capsule: &PyCapsule = value.getattr("__arrow_c_stream__")?.call0()?.downcast()?;
validate_pycapsule(capsule, "arrow_array_stream")?;

let stream = unsafe { FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) };
Expand All @@ -421,7 +417,7 @@ impl FromPyArrow for ArrowArrayStreamReader {
// make the conversion through PyArrow's private API
// this changes the pointer's memory and is thus unsafe.
// In particular, `_export_to_c` can go out of bounds
let args = PyTuple::new(value.py(), [stream_ptr as Py_uintptr_t]);
let args = PyTuple::new_bound(value.py(), [stream_ptr as Py_uintptr_t]);
value.call_method1("_export_to_c", args)?;

let stream_reader = ArrowArrayStreamReader::try_new(stream)
Expand All @@ -439,9 +435,9 @@ impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
let mut stream = FFI_ArrowArrayStream::new(self);

let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("RecordBatchReader")?;
let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t]);
let args = PyTuple::new_bound(py, [stream_ptr as Py_uintptr_t]);
let reader = class.call_method1("_import_from_c", args)?;

Ok(PyObject::from(reader))
Expand Down
4 changes: 2 additions & 2 deletions arrow/tests/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ fn test_to_pyarrow() {

let res = Python::with_gil(|py| {
let py_input = input.to_pyarrow(py)?;
let records = RecordBatch::from_pyarrow(py_input.as_ref(py))?;
let records = RecordBatch::from_pyarrow(py_input.bind(py).as_gil_ref())?; // TODO
let py_records = records.to_pyarrow(py)?;
RecordBatch::from_pyarrow(py_records.as_ref(py))
RecordBatch::from_pyarrow(py_records.bind(py).as_gil_ref()) // TODO
})
.unwrap();

Expand Down
Loading