Skip to content

Commit

Permalink
Fix size calculation in ArrayAggGroupsAccumulator
Browse files Browse the repository at this point in the history
  • Loading branch information
joroKr21 committed Aug 21, 2024
1 parent 11ed341 commit 65b2fc9
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 20 deletions.
42 changes: 26 additions & 16 deletions datafusion/physical-expr/src/aggregate/array_agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,15 +321,15 @@ impl Accumulator for ArrayAggAccumulator {
}

fn size(&self) -> usize {
std::mem::size_of_val(self)
+ (std::mem::size_of::<ArrayRef>() * self.values.capacity())
size_of_val(self)
+ self.values.capacity() * size_of::<ArrayRef>()
+ self.datatype.size()
- size_of_val(&self.datatype)
+ self
.values
.iter()
.map(|arr| arr.get_array_memory_size())
.map(Array::get_array_memory_size)
.sum::<usize>()
+ self.datatype.size()
- std::mem::size_of_val(&self.datatype)
}
}

Expand Down Expand Up @@ -452,11 +452,21 @@ where
}

fn size(&self) -> usize {
std::mem::size_of_val(self)
+ std::mem::size_of::<PrimitiveBuilder<T>>() * self.values.capacity()
+ self.values.iter().map(|arr| arr.capacity()).sum::<usize>()
* std::mem::size_of::<<T as ArrowPrimitiveType>::Native>()
let data_type_size = self.data_type.size();
size_of_val(self)
+ data_type_size
+ self.null_state.size()
+ self.values.capacity() * size_of::<PrimitiveBuilder<T>>()
+ self
.values
.iter()
.map(|b| {
// Each primitive builder also stores the data type.
data_type_size
+ size_of_val(b.values_slice())
+ size_of_val(&b.validity_slice())
})
.sum::<usize>()
}
}

Expand Down Expand Up @@ -560,18 +570,18 @@ impl GroupsAccumulator for StringArrayAggGroupsAccumulator {
}

fn size(&self) -> usize {
std::mem::size_of_val(self)
+ std::mem::size_of::<StringBuilder>() * self.values.capacity()
size_of_val(self)
+ self.null_state.size()
+ self.values.capacity() * size_of::<StringBuilder>()
+ self
.values
.iter()
.map(|arr| {
std::mem::size_of_val(arr.values_slice())
+ std::mem::size_of_val(arr.offsets_slice())
+ arr.validity_slice().map(std::mem::size_of_val).unwrap_or(0)
.map(|b| {
size_of_val(b.values_slice())
+ size_of_val(b.offsets_slice())
+ size_of_val(&b.validity_slice())
})
.sum::<usize>()
+ self.null_state.size()
}
}

Expand Down
7 changes: 3 additions & 4 deletions datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,9 @@ impl Accumulator for DistinctArrayAggAccumulator {
}

fn size(&self) -> usize {
std::mem::size_of_val(self) + ScalarValue::size_of_hashset(&self.values)
- std::mem::size_of_val(&self.values)
+ self.datatype.size()
- std::mem::size_of_val(&self.datatype)
size_of_val(self) + self.datatype.size() - size_of_val(&self.datatype)
+ ScalarValue::size_of_hashset(&self.values)
- size_of_val(&self.values)
}
}

Expand Down

0 comments on commit 65b2fc9

Please sign in to comment.