Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 1 addition & 19 deletions datafusion/functions-aggregate/benches/array_agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ use std::sync::Arc;

use arrow::array::{
Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray, NullBufferBuilder,
PrimitiveArray,
};
use arrow::datatypes::{Field, Int64Type};
use criterion::{Criterion, criterion_group, criterion_main};
use datafusion_expr::Accumulator;
use datafusion_functions_aggregate::array_agg::ArrayAggAccumulator;

use arrow::buffer::OffsetBuffer;
use arrow::util::bench_util::create_primitive_array;
use rand::Rng;
use rand::SeedableRng;
use rand::distr::{Distribution, StandardUniform};
Expand All @@ -54,24 +54,6 @@ fn merge_batch_bench(c: &mut Criterion, name: &str, values: ArrayRef) {
});
}

pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
where
T: ArrowPrimitiveType,
StandardUniform: Distribution<T::Native>,
{
let mut rng = seedable_rng();

(0..size)
.map(|_| {
if rng.random::<f32>() < null_density {
None
} else {
Some(rng.random())
}
})
.collect()
}

/// Create List array with the given item data type, null density, null locations and zero length lists density
/// Creates a random (but fixed-seeded) array of a given size and null density
pub fn create_list_array<T>(
Expand Down
1 change: 1 addition & 0 deletions datafusion/functions-nested/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ log = { workspace = true }
memchr = { workspace = true }

[dev-dependencies]
arrow = { workspace = true, features = ["test_utils"] }
criterion = { workspace = true, features = ["async_tokio"] }
rand = { workspace = true }

Expand Down
69 changes: 11 additions & 58 deletions datafusion/functions-nested/benches/array_min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,81 +17,34 @@

use std::sync::Arc;

use arrow::array::{ArrayRef, Int64Array, ListArray};
use arrow::buffer::{NullBuffer, OffsetBuffer};
use arrow::datatypes::{DataType, Field};
use arrow::array::{Array, ArrayRef};
use arrow::datatypes::{DataType, Field, Int64Type};
use arrow::util::bench_util::create_primitive_list_array_with_seed;
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use datafusion_common::config::ConfigOptions;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
use datafusion_functions_nested::min_max::ArrayMax;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};

const NUM_ROWS: usize = 8192;
const SEED: u64 = 42;
const LIST_NULL_DENSITY: f64 = 0.1;
const ELEMENT_NULL_DENSITY: f64 = 0.1;

fn create_int64_list_array(
num_rows: usize,
list_size: usize,
element_null_density: f64,
) -> ArrayRef {
let mut rng = StdRng::seed_from_u64(SEED);
let total_values = num_rows * list_size;

if element_null_density > 0.0 {
let values: Vec<Option<i64>> = (0..total_values)
.map(|_| {
if rng.random::<f64>() < element_null_density {
None
} else {
Some(rng.random::<i64>() % 10_000)
}
})
.collect();
let values_array = Arc::new(Int64Array::from(values));

let offsets: Vec<i32> = (0..=num_rows).map(|i| (i * list_size) as i32).collect();
let nulls: Vec<bool> = (0..num_rows)
.map(|_| rng.random::<f64>() >= LIST_NULL_DENSITY)
.collect();

Arc::new(ListArray::new(
Arc::new(Field::new("item", DataType::Int64, true)),
OffsetBuffer::new(offsets.into()),
values_array,
Some(NullBuffer::from(nulls)),
))
} else {
// No element nulls — values array has no null buffer
let values: Vec<i64> = (0..total_values)
.map(|_| rng.random::<i64>() % 10_000)
.collect();
let values_array = Arc::new(Int64Array::from(values));

let offsets: Vec<i32> = (0..=num_rows).map(|i| (i * list_size) as i32).collect();
let nulls: Vec<bool> = (0..num_rows)
.map(|_| rng.random::<f64>() >= LIST_NULL_DENSITY)
.collect();

Arc::new(ListArray::new(
Arc::new(Field::new("item", DataType::Int64, false)),
OffsetBuffer::new(offsets.into()),
values_array,
Some(NullBuffer::from(nulls)),
))
}
}

fn criterion_benchmark(c: &mut Criterion) {
let udf = ArrayMax::new();
let config_options = Arc::new(ConfigOptions::default());

for list_size in [10, 100, 1000] {
for (label, null_density) in [("nulls", ELEMENT_NULL_DENSITY), ("no_nulls", 0.0)]
{
let list_array = create_int64_list_array(NUM_ROWS, list_size, null_density);
let list_array: ArrayRef =
Arc::new(create_primitive_list_array_with_seed::<i32, Int64Type>(
NUM_ROWS,
LIST_NULL_DENSITY as f32,
null_density as f32,
list_size,
SEED,
));
let args = vec![ColumnarValue::Array(Arc::clone(&list_array))];
let arg_fields =
vec![Field::new("arg_0", list_array.data_type().clone(), true).into()];
Expand Down
133 changes: 33 additions & 100 deletions datafusion/functions-nested/benches/array_remove.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
// under the License.

use arrow::array::{
Array, ArrayRef, BinaryArray, BooleanArray, Decimal128Array, FixedSizeBinaryArray,
Float64Array, Int64Array, ListArray, StringArray,
Array, ArrayRef, BinaryArray, BooleanArray, FixedSizeBinaryArray, ListArray,
StringArray,
};
use arrow::buffer::OffsetBuffer;
use arrow::datatypes::{DataType, Field};
use arrow::datatypes::{DataType, Decimal128Type, Field, Float64Type, Int64Type};
use arrow::util::bench_util::create_primitive_list_array_with_seed;
use criterion::{
criterion_group, criterion_main, {BenchmarkId, Criterion},
};
Expand Down Expand Up @@ -55,7 +56,15 @@ fn bench_array_remove_int64(c: &mut Criterion) {
let mut group = c.benchmark_group("array_remove_int64");

for &array_size in ARRAY_SIZES {
let list_array = create_int64_list_array(NUM_ROWS, array_size, NULL_DENSITY);
let list_array: ArrayRef =
Arc::new(create_primitive_list_array_with_seed::<i32, Int64Type>(
NUM_ROWS,
0.0,
NULL_DENSITY as f32,
array_size,
SEED,
));

let element_to_remove = ScalarValue::Int64(Some(1));
let args = create_args(list_array.clone(), element_to_remove.clone());

Expand Down Expand Up @@ -96,7 +105,14 @@ fn bench_array_remove_f64(c: &mut Criterion) {
let mut group = c.benchmark_group("array_remove_f64");

for &array_size in ARRAY_SIZES {
let list_array = create_f64_list_array(NUM_ROWS, array_size, NULL_DENSITY);
let list_array: ArrayRef =
Arc::new(create_primitive_list_array_with_seed::<i32, Float64Type>(
NUM_ROWS,
0.0,
NULL_DENSITY as f32,
array_size,
SEED,
));
let element_to_remove = ScalarValue::Float64(Some(1.0));
let args = create_args(list_array.clone(), element_to_remove.clone());

Expand Down Expand Up @@ -260,8 +276,17 @@ fn bench_array_remove_decimal64(c: &mut Criterion) {
let mut group = c.benchmark_group("array_remove_decimal64");

for &array_size in ARRAY_SIZES {
let list_array = create_decimal64_list_array(NUM_ROWS, array_size, NULL_DENSITY);
let element_to_remove = ScalarValue::Decimal128(Some(100_i128), 10, 2);
let list_array: ArrayRef = Arc::new(create_primitive_list_array_with_seed::<
i32,
Decimal128Type,
>(
NUM_ROWS,
0.0,
NULL_DENSITY as f32,
array_size,
SEED,
));
let element_to_remove = ScalarValue::Decimal128(Some(100_i128), 38, 10);
let args = create_args(list_array.clone(), element_to_remove.clone());

group.bench_with_input(
Expand All @@ -276,7 +301,7 @@ fn bench_array_remove_decimal64(c: &mut Criterion) {
arg_fields: vec![
Field::new("arr", list_array.data_type().clone(), false)
.into(),
Field::new("el", DataType::Decimal128(10, 2), false)
Field::new("el", DataType::Decimal128(38, 10), false)
.into(),
],
number_rows: NUM_ROWS,
Expand Down Expand Up @@ -348,66 +373,6 @@ fn create_args(list_array: ArrayRef, element: ScalarValue) -> Vec<ColumnarValue>
]
}

fn create_int64_list_array(
num_rows: usize,
array_size: usize,
null_density: f64,
) -> ArrayRef {
let mut rng = StdRng::seed_from_u64(SEED);
let values = (0..num_rows * array_size)
.map(|_| {
if rng.random::<f64>() < null_density {
None
} else {
Some(rng.random_range(0..array_size as i64))
}
})
.collect::<Int64Array>();
let offsets = (0..=num_rows)
.map(|i| (i * array_size) as i32)
.collect::<Vec<i32>>();

Arc::new(
ListArray::try_new(
Arc::new(Field::new("item", DataType::Int64, true)),
OffsetBuffer::new(offsets.into()),
Arc::new(values),
None,
)
.unwrap(),
)
}

fn create_f64_list_array(
num_rows: usize,
array_size: usize,
null_density: f64,
) -> ArrayRef {
let mut rng = StdRng::seed_from_u64(SEED);
let values = (0..num_rows * array_size)
.map(|_| {
if rng.random::<f64>() < null_density {
None
} else {
Some(rng.random_range(0..array_size as i64) as f64)
}
})
.collect::<Float64Array>();
let offsets = (0..=num_rows)
.map(|i| (i * array_size) as i32)
.collect::<Vec<i32>>();

Arc::new(
ListArray::try_new(
Arc::new(Field::new("item", DataType::Float64, true)),
OffsetBuffer::new(offsets.into()),
Arc::new(values),
None,
)
.unwrap(),
)
}

fn create_string_list_array(
num_rows: usize,
array_size: usize,
Expand Down Expand Up @@ -500,38 +465,6 @@ fn create_boolean_list_array(
)
}

fn create_decimal64_list_array(
num_rows: usize,
array_size: usize,
null_density: f64,
) -> ArrayRef {
let mut rng = StdRng::seed_from_u64(SEED);
let values = (0..num_rows * array_size)
.map(|_| {
if rng.random::<f64>() < null_density {
None
} else {
Some(rng.random_range(0..array_size) as i128 * 100)
}
})
.collect::<Decimal128Array>()
.with_precision_and_scale(10, 2)
.unwrap();
let offsets = (0..=num_rows)
.map(|i| (i * array_size) as i32)
.collect::<Vec<i32>>();

Arc::new(
ListArray::try_new(
Arc::new(Field::new("item", DataType::Decimal128(10, 2), true)),
OffsetBuffer::new(offsets.into()),
Arc::new(values),
None,
)
.unwrap(),
)
}

fn create_fixed_size_binary_list_array(
num_rows: usize,
array_size: usize,
Expand Down
Loading
Loading