From 38d726e135d8bd315e1a0a2dcadad5c027a93b35 Mon Sep 17 00:00:00 2001 From: Elliana May Date: Tue, 19 Mar 2024 22:33:31 +0800 Subject: [PATCH 01/12] extract value_ref_internal --- src/row.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/row.rs b/src/row.rs index 28238526..4f93b19a 100644 --- a/src/row.rs +++ b/src/row.rs @@ -7,6 +7,7 @@ use arrow::{ array::{self, Array, StructArray}, datatypes::*, }; +use arrow::array::ArrayRef; use fallible_iterator::FallibleIterator; use fallible_streaming_iterator::FallibleStreamingIterator; use rust_decimal::prelude::*; @@ -339,6 +340,10 @@ impl<'stmt> Row<'stmt> { fn value_ref(&self, row: usize, col: usize) -> ValueRef<'_> { let column = self.arr.as_ref().as_ref().unwrap().column(col); + Self::value_ref_internal(row, col, column) + } + + fn value_ref_internal(row: usize, col: usize, column: &ArrayRef) -> ValueRef { if column.is_null(row) { return ValueRef::Null; } From 6e8a9b611303fd9546044e35315aa01c745f0ccb Mon Sep 17 00:00:00 2001 From: Elliana May Date: Tue, 19 Mar 2024 22:28:20 +0800 Subject: [PATCH 02/12] simplify error message --- src/row.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/row.rs b/src/row.rs index 4f93b19a..cff22ff2 100644 --- a/src/row.rs +++ b/src/row.rs @@ -583,7 +583,7 @@ impl<'stmt> Row<'stmt> { // DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => { // make_string_time!(array::Time64NanosecondArray, column, row) // } - _ => unreachable!("invalid value: {}, {}", col, self.stmt.column_type(col)), + _ => unreachable!("invalid value: {}", col), } } From 7317e6d69935358dbd25ad578f86e0f9587c7509 Mon Sep 17 00:00:00 2001 From: Elliana May Date: Wed, 20 Mar 2024 15:29:21 +0800 Subject: [PATCH 03/12] kinda working --- src/row.rs | 9 +++++++-- src/types/value.rs | 2 ++ src/types/value_ref.rs | 17 +++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/row.rs b/src/row.rs index cff22ff2..5239271a 100644 --- a/src/row.rs +++ b/src/row.rs @@ -3,11 +3,11 @@ use std::{convert, sync::Arc}; use super::{Error, Result, Statement}; use crate::types::{self, FromSql, FromSqlError, ValueRef}; +use arrow::array::{ArrayRef, ListArray}; use arrow::{ array::{self, Array, StructArray}, datatypes::*, }; -use arrow::array::ArrayRef; use fallible_iterator::FallibleIterator; use fallible_streaming_iterator::FallibleStreamingIterator; use rust_decimal::prelude::*; @@ -343,7 +343,7 @@ impl<'stmt> Row<'stmt> { Self::value_ref_internal(row, col, column) } - fn value_ref_internal(row: usize, col: usize, column: &ArrayRef) -> ValueRef { + pub(crate) fn value_ref_internal(row: usize, col: usize, column: &ArrayRef) -> ValueRef { if column.is_null(row) { return ValueRef::Null; } @@ -583,6 +583,11 @@ impl<'stmt> Row<'stmt> { // DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => { // make_string_time!(array::Time64NanosecondArray, column, row) // } + DataType::List(data) => { + let res = column.as_any().downcast_ref::().unwrap().values(); + + ValueRef::List(res, row) + } _ => unreachable!("invalid value: {}", col), } } diff --git a/src/types/value.rs b/src/types/value.rs index f3cf1f7f..d013bee8 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -46,6 +46,7 @@ pub enum Value { Date32(i32), /// The value is a time64 Time64(TimeUnit, i64), + List(Vec), } impl From for Value { @@ -212,6 +213,7 @@ impl Value { Value::Blob(_) => Type::Blob, Value::Date32(_) => Type::Date32, Value::Time64(..) => Type::Time64, + Value::List(_) => todo!(), } } } diff --git a/src/types/value_ref.rs b/src/types/value_ref.rs index f071dc3b..6c149092 100644 --- a/src/types/value_ref.rs +++ b/src/types/value_ref.rs @@ -1,8 +1,11 @@ use super::{Type, Value}; use crate::types::{FromSqlError, FromSqlResult}; +use crate::Row; use rust_decimal::prelude::*; +use arrow::array::ArrayRef; + /// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds. /// Copy from arrow::datatypes::TimeUnit #[derive(Copy, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -61,6 +64,7 @@ pub enum ValueRef<'a> { Date32(i32), /// The value is a time64 Time64(TimeUnit, i64), + List(&'a ArrayRef, usize), } impl ValueRef<'_> { @@ -87,8 +91,13 @@ impl ValueRef<'_> { ValueRef::Blob(_) => Type::Blob, ValueRef::Date32(_) => Type::Date32, ValueRef::Time64(..) => Type::Time64, + ValueRef::List(..) => todo!(), } } + + pub fn to_owned(&self) -> Value { + (*self).clone().into() + } } impl<'a> ValueRef<'a> { @@ -140,6 +149,13 @@ impl From> for Value { ValueRef::Blob(b) => Value::Blob(b.to_vec()), ValueRef::Date32(d) => Value::Date32(d), ValueRef::Time64(t, d) => Value::Time64(t, d), + ValueRef::List(items, idx) => { + let range = 0..items.len(); + let map: Vec = range + .map(|row| Row::value_ref_internal(row, idx, items).to_owned()) + .collect(); + Value::List(map) + } } } } @@ -181,6 +197,7 @@ impl<'a> From<&'a Value> for ValueRef<'a> { Value::Blob(ref b) => ValueRef::Blob(b), Value::Date32(d) => ValueRef::Date32(d), Value::Time64(t, d) => ValueRef::Time64(t, d), + Value::List(..) => unimplemented!(), } } } From 0943c9429a5cd66cd84c2c14b6673c1bfae5c56f Mon Sep 17 00:00:00 2001 From: Elliana May Date: Wed, 20 Mar 2024 18:39:43 +0800 Subject: [PATCH 04/12] fix clippy warning --- src/types/value_ref.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/types/value_ref.rs b/src/types/value_ref.rs index 6c149092..9758d45e 100644 --- a/src/types/value_ref.rs +++ b/src/types/value_ref.rs @@ -95,8 +95,9 @@ impl ValueRef<'_> { } } + /// Returns an owned version of this ValueRef pub fn to_owned(&self) -> Value { - (*self).clone().into() + (*self).into() } } From a31ffb2ac12d0eef1c302755e2ca67710097afbc Mon Sep 17 00:00:00 2001 From: Elliana May Date: Wed, 20 Mar 2024 18:52:57 +0800 Subject: [PATCH 05/12] add more docs --- src/types/value.rs | 1 + src/types/value_ref.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/src/types/value.rs b/src/types/value.rs index d013bee8..77ed663d 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -46,6 +46,7 @@ pub enum Value { Date32(i32), /// The value is a time64 Time64(TimeUnit, i64), + /// The value is a list List(Vec), } diff --git a/src/types/value_ref.rs b/src/types/value_ref.rs index 9758d45e..12531a6a 100644 --- a/src/types/value_ref.rs +++ b/src/types/value_ref.rs @@ -64,6 +64,7 @@ pub enum ValueRef<'a> { Date32(i32), /// The value is a time64 Time64(TimeUnit, i64), + /// The value is a list List(&'a ArrayRef, usize), } From c7c786449413e011959bfc7ff941bb8056153776 Mon Sep 17 00:00:00 2001 From: Elliana May Date: Wed, 20 Mar 2024 19:05:44 +0800 Subject: [PATCH 06/12] use list offsets --- src/row.rs | 6 +++--- src/types/value_ref.rs | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/row.rs b/src/row.rs index 5239271a..4f139376 100644 --- a/src/row.rs +++ b/src/row.rs @@ -583,10 +583,10 @@ impl<'stmt> Row<'stmt> { // DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => { // make_string_time!(array::Time64NanosecondArray, column, row) // } - DataType::List(data) => { - let res = column.as_any().downcast_ref::().unwrap().values(); + DataType::List(_data) => { + let arr = column.as_any().downcast_ref::().unwrap(); - ValueRef::List(res, row) + ValueRef::List(arr, row) } _ => unreachable!("invalid value: {}", col), } diff --git a/src/types/value_ref.rs b/src/types/value_ref.rs index 12531a6a..b35b31c3 100644 --- a/src/types/value_ref.rs +++ b/src/types/value_ref.rs @@ -4,7 +4,7 @@ use crate::types::{FromSqlError, FromSqlResult}; use crate::Row; use rust_decimal::prelude::*; -use arrow::array::ArrayRef; +use arrow::array::ListArray; /// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds. /// Copy from arrow::datatypes::TimeUnit @@ -65,7 +65,7 @@ pub enum ValueRef<'a> { /// The value is a time64 Time64(TimeUnit, i64), /// The value is a list - List(&'a ArrayRef, usize), + List(&'a ListArray, usize), } impl ValueRef<'_> { @@ -152,9 +152,10 @@ impl From> for Value { ValueRef::Date32(d) => Value::Date32(d), ValueRef::Time64(t, d) => Value::Time64(t, d), ValueRef::List(items, idx) => { - let range = 0..items.len(); + let offsets = items.offsets(); + let range = offsets[idx]..offsets[idx + 1]; let map: Vec = range - .map(|row| Row::value_ref_internal(row, idx, items).to_owned()) + .map(|row| Row::value_ref_internal(row.try_into().unwrap(), idx, items.values()).to_owned()) .collect(); Value::List(map) } From 69e38c22c826e6be9db503e73c8d3ff0ce5e0a63 Mon Sep 17 00:00:00 2001 From: Elliana May Date: Wed, 20 Mar 2024 20:02:43 +0800 Subject: [PATCH 07/12] amend type enum --- src/types/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/types/mod.rs b/src/types/mod.rs index d93f5974..474624dd 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -144,6 +144,8 @@ pub enum Type { Date32, /// TIME64 Time64, + /// LIST + List(Box), /// Any Any, } @@ -170,6 +172,7 @@ impl fmt::Display for Type { Type::Blob => f.pad("Blob"), Type::Date32 => f.pad("Date32"), Type::Time64 => f.pad("Time64"), + Type::List(..) => f.pad("List"), Type::Any => f.pad("Any"), } } From a9568b860992f0abd98a8becfadfece6548f8dd7 Mon Sep 17 00:00:00 2001 From: Elliana May Date: Thu, 21 Mar 2024 09:43:14 +0800 Subject: [PATCH 08/12] add conversion between DataType and Type --- src/types/mod.rs | 43 ++++++++++++++++++++++++++++++++++++++++++ src/types/value_ref.rs | 4 ++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/types/mod.rs b/src/types/mod.rs index 474624dd..fe9f2ce7 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -74,6 +74,7 @@ pub use self::{ value_ref::{TimeUnit, ValueRef}, }; +use arrow::datatypes::DataType; use std::fmt; #[cfg(feature = "chrono")] @@ -150,6 +151,48 @@ pub enum Type { Any, } +impl From<&DataType> for Type { + fn from(value: &DataType) -> Self { + match value { + DataType::Null => Self::Null, + DataType::Boolean => Self::Boolean, + DataType::Int8 => Self::TinyInt, + DataType::Int16 => Self::SmallInt, + DataType::Int32 => Self::Int, + DataType::Int64 => Self::BigInt, + DataType::UInt8 => Self::UTinyInt, + DataType::UInt16 => Self::USmallInt, + DataType::UInt32 => Self::UInt, + DataType::UInt64 => Self::UBigInt, + // DataType::Float16 => Self::Float16, + // DataType::Float32 => Self::Float32, + DataType::Float64 => Self::Float, + DataType::Timestamp(_, _) => Self::Timestamp, + DataType::Date32 => Self::Date32, + // DataType::Date64 => Self::Date64, + // DataType::Time32(_) => Self::Time32, + DataType::Time64(_) => Self::Time64, + // DataType::Duration(_) => Self::Duration, + // DataType::Interval(_) => Self::Interval, + DataType::Binary => Self::Blob, + // DataType::FixedSizeBinary(_) => Self::FixedSizeBinary, + // DataType::LargeBinary => Self::LargeBinary, + DataType::Utf8 => Self::Text, + // DataType::LargeUtf8 => Self::LargeUtf8, + DataType::List(inner) => Self::List(Box::new(Type::from(inner.data_type()))), + // DataType::FixedSizeList(field, size) => Self::Array, + // DataType::LargeList(_) => Self::LargeList, + // DataType::Struct(inner) => Self::Struct, + // DataType::Union(_, _) => Self::Union, + // DataType::Dictionary(_, _) => Self::Enum, + DataType::Decimal128(..) => Self::Decimal, + DataType::Decimal256(..) => Self::Decimal, + // DataType::Map(field, ..) => Self::Map, + res => unimplemented!("{}", res), + } + } +} + impl fmt::Display for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { diff --git a/src/types/value_ref.rs b/src/types/value_ref.rs index b35b31c3..d82ebc5d 100644 --- a/src/types/value_ref.rs +++ b/src/types/value_ref.rs @@ -4,7 +4,7 @@ use crate::types::{FromSqlError, FromSqlResult}; use crate::Row; use rust_decimal::prelude::*; -use arrow::array::ListArray; +use arrow::array::{Array, ListArray}; /// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds. /// Copy from arrow::datatypes::TimeUnit @@ -92,7 +92,7 @@ impl ValueRef<'_> { ValueRef::Blob(_) => Type::Blob, ValueRef::Date32(_) => Type::Date32, ValueRef::Time64(..) => Type::Time64, - ValueRef::List(..) => todo!(), + ValueRef::List(arr, _) => arr.data_type().into(), } } From 5f46084c452a9bcc3485b39d2ac3328fd664dcf9 Mon Sep 17 00:00:00 2001 From: Elliana May Date: Fri, 22 Mar 2024 13:42:54 +0800 Subject: [PATCH 09/12] add full test_all_types() test --- src/test_all_types.rs | 133 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 125 insertions(+), 8 deletions(-) diff --git a/src/test_all_types.rs b/src/test_all_types.rs index 185c6118..597613cf 100644 --- a/src/test_all_types.rs +++ b/src/test_all_types.rs @@ -2,7 +2,7 @@ use pretty_assertions::assert_eq; use rust_decimal::Decimal; use crate::{ - types::{TimeUnit, ValueRef}, + types::{TimeUnit, Value, ValueRef}, Connection, }; @@ -22,13 +22,6 @@ fn test_all_types() -> crate::Result<()> { "small_enum", "medium_enum", "large_enum", - "int_array", - "double_array", - "date_array", - "timestamp_array", - "timestamptz_array", - "varchar_array", - "nested_int_array", "struct", "struct_of_arrays", "array_of_structs", @@ -214,6 +207,122 @@ fn test_single(idx: &mut i32, column: String, value: ValueRef) { 1 => assert_eq!(value, ValueRef::Blob(&[0, 0, 0, 97])), _ => assert_eq!(value, ValueRef::Null), }, + "int_array" => match idx { + 0 => assert_eq!(value.to_owned(), Value::List(vec![])), + 1 => assert_eq!( + value.to_owned(), + Value::List(vec![ + Value::Int(42), + Value::Int(999), + Value::Null, + Value::Null, + Value::Int(-42), + ]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "double_array" => match idx { + 0 => assert_eq!(value.to_owned(), Value::List(vec![])), + 1 => { + let value = value.to_owned(); + + if let Value::List(values) = value { + assert_eq!(values.len(), 6); + assert_eq!(values[0], Value::Double(42.0)); + assert!(unwrap(&values[1]).is_nan()); + let val = unwrap(&values[2]); + assert!(val.is_infinite() && val.is_sign_positive()); + let val = unwrap(&values[3]); + assert!(val.is_infinite() && val.is_sign_negative()); + assert_eq!(values[4], Value::Null); + assert_eq!(values[5], Value::Double(-42.0)); + } + } + _ => assert_eq!(value, ValueRef::Null), + }, + "date_array" => match idx { + 0 => assert_eq!(value.to_owned(), Value::List(vec![])), + 1 => assert_eq!( + value.to_owned(), + Value::List(vec![ + Value::Date32(0), + Value::Date32(2147483647), + Value::Date32(-2147483647), + Value::Null, + Value::Date32(19124), + ]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "timestamp_array" => match idx { + 0 => assert_eq!(value.to_owned(), Value::List(vec![])), + 1 => assert_eq!( + value.to_owned(), + Value::List(vec![ + Value::Timestamp(TimeUnit::Microsecond, 0,), + Value::Timestamp(TimeUnit::Microsecond, 9223372036854775807,), + Value::Timestamp(TimeUnit::Microsecond, -9223372036854775807,), + Value::Null, + Value::Timestamp(TimeUnit::Microsecond, 1652372625000000,), + ],) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "timestamptz_array" => match idx { + 0 => assert_eq!(value.to_owned(), Value::List(vec![])), + 1 => assert_eq!( + value.to_owned(), + Value::List(vec![ + Value::Timestamp(TimeUnit::Microsecond, 0,), + Value::Timestamp(TimeUnit::Microsecond, 9223372036854775807,), + Value::Timestamp(TimeUnit::Microsecond, -9223372036854775807,), + Value::Null, + Value::Timestamp(TimeUnit::Microsecond, 1652397825000000,), + ]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "varchar_array" => match idx { + 0 => assert_eq!(value.to_owned(), Value::List(vec![])), + 1 => assert_eq!( + value.to_owned(), + Value::List(vec![ + Value::Text("🦆🦆🦆🦆🦆🦆".to_string()), + Value::Text("goose".to_string()), + Value::Null, + Value::Text("".to_string()), + ]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "nested_int_array" => match idx { + 0 => assert_eq!(value.to_owned(), Value::List(vec![])), + 1 => { + assert_eq!( + value.to_owned(), + Value::List(vec![ + Value::List(vec![],), + Value::List(vec![ + Value::Int(42,), + Value::Int(999,), + Value::Null, + Value::Null, + Value::Int(-42,), + ],), + Value::Null, + Value::List(vec![],), + Value::List(vec![ + Value::Int(42,), + Value::Int(999,), + Value::Null, + Value::Null, + Value::Int(-42,), + ],), + ],) + ) + } + _ => assert_eq!(value, ValueRef::Null), + }, "bit" => match idx { 0 => assert_eq!(value, ValueRef::Blob(&[1, 145, 46, 42, 215]),), 1 => assert_eq!(value, ValueRef::Blob(&[3, 245])), @@ -222,3 +331,11 @@ fn test_single(idx: &mut i32, column: String, value: ValueRef) { _ => todo!("{column:?}"), } } + +fn unwrap(value: &Value) -> f64 { + if let Value::Double(val) = value { + *val + } else { + panic!(); + } +} From 759f999a40056ecbf727027a99bb8dd26849c3aa Mon Sep 17 00:00:00 2001 From: Elliana May Date: Sat, 13 Apr 2024 23:19:37 +0800 Subject: [PATCH 10/12] chore: extend error message --- src/row.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/row.rs b/src/row.rs index 4f139376..63e46ccc 100644 --- a/src/row.rs +++ b/src/row.rs @@ -588,7 +588,7 @@ impl<'stmt> Row<'stmt> { ValueRef::List(arr, row) } - _ => unreachable!("invalid value: {}", col), + _ => unreachable!("invalid value: {} {}", col, column.data_type()), } } From a033f663b4a57d98e8352a8b76b87725fb4cf2fc Mon Sep 17 00:00:00 2001 From: Elliana May Date: Fri, 29 Mar 2024 17:18:06 +0800 Subject: [PATCH 11/12] tweak assertion --- src/test_all_types.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/test_all_types.rs b/src/test_all_types.rs index 597613cf..a9113752 100644 --- a/src/test_all_types.rs +++ b/src/test_all_types.rs @@ -2,7 +2,7 @@ use pretty_assertions::assert_eq; use rust_decimal::Decimal; use crate::{ - types::{TimeUnit, Value, ValueRef}, + types::{TimeUnit, Type, Value, ValueRef}, Connection, }; @@ -51,6 +51,9 @@ fn test_all_types() -> crate::Result<()> { idx += 1; for column in row.stmt.column_names() { let value = row.get_ref_unwrap(row.stmt.column_index(&column)?); + if idx != 2 { + assert_ne!(value.data_type(), Type::Null); + } test_single(&mut idx, column, value); } } From 474e053851045f9efc2e24836ee1387ab2619b08 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Wed, 17 Apr 2024 15:48:46 +0200 Subject: [PATCH 12/12] double choco openssl install timeout --- .github/workflows/rust.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yaml b/.github/workflows/rust.yaml index e18b5ffc..0ddf6416 100644 --- a/.github/workflows/rust.yaml +++ b/.github/workflows/rust.yaml @@ -109,7 +109,7 @@ jobs: - uses: actions/checkout@v2 # - run: echo "VCPKG_ROOT=$env:VCPKG_INSTALLATION_ROOT" | Out-File -FilePath $env:GITHUB_ENV -Append # - run: vcpkg install openssl:x64-windows-static-md - - run: choco install openssl + - run: choco install openssl --execution-timeout 5400 - run: echo 'OPENSSL_DIR=C:\Program Files\OpenSSL' | Out-File -FilePath $env:GITHUB_ENV -Append - uses: actions/cache@v3 with: