Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Upgrade rand crate and some other minor crates #14967

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
530 changes: 266 additions & 264 deletions Cargo.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ bigdecimal = "0.4.7"
bytes = "1.10"
chrono = { version = "0.4.38", default-features = false }
criterion = "0.5.1"
ctor = "0.2.9"
ctor = "0.4.0"
dashmap = "6.0.1"
datafusion = { path = "datafusion/core", version = "46.0.0", default-features = false }
datafusion-catalog = { path = "datafusion/catalog", version = "46.0.0" }
Expand Down Expand Up @@ -156,10 +156,10 @@ pbjson = { version = "0.7.0" }
pbjson-types = "0.7"
# Should match arrow-flight's version of prost.
prost = "0.13.1"
rand = "0.8.5"
rand = "0.9"
recursive = "0.1.1"
regex = "1.8"
rstest = "0.24.0"
rstest = "0.25.0"
serde_json = "1"
sqlparser = { version = "0.54.0", features = ["visitor"] }
tempfile = "3"
Expand Down
10 changes: 5 additions & 5 deletions benchmarks/src/cancellation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ use futures::TryStreamExt;
use object_store::ObjectStore;
use parquet::arrow::async_writer::ParquetObjectWriter;
use parquet::arrow::AsyncArrowWriter;
use rand::distributions::Alphanumeric;
use rand::distr::Alphanumeric;
use rand::rngs::ThreadRng;
use rand::Rng;
use structopt::StructOpt;
Expand Down Expand Up @@ -234,7 +234,7 @@ fn find_files_on_disk(data_dir: impl AsRef<Path>) -> Result<Vec<PathBuf>> {
let path = file.unwrap().path();
if path
.extension()
.map(|ext| (ext == "parquet"))
.map(|ext| ext == "parquet")
.unwrap_or(false)
{
Some(path)
Expand Down Expand Up @@ -309,15 +309,15 @@ async fn generate_data(
}

fn random_data(column_type: &DataType, rows: usize) -> Arc<dyn Array> {
let mut rng = rand::thread_rng();
let mut rng = rand::rng();
let values = (0..rows).map(|_| random_value(&mut rng, column_type));
ScalarValue::iter_to_array(values).unwrap()
}

fn random_value(rng: &mut ThreadRng, column_type: &DataType) -> ScalarValue {
match column_type {
DataType::Float64 => ScalarValue::Float64(Some(rng.gen())),
DataType::Boolean => ScalarValue::Boolean(Some(rng.gen())),
DataType::Float64 => ScalarValue::Float64(Some(rng.random())),
DataType::Boolean => ScalarValue::Boolean(Some(rng.random())),
DataType::Utf8 => ScalarValue::Utf8(Some(
rng.sample_iter(&Alphanumeric)
.take(10)
Expand Down
1 change: 0 additions & 1 deletion datafusion/catalog-listing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ object_store = { workspace = true }
tokio = { workspace = true }

[dev-dependencies]
tempfile = { workspace = true }

[lints]
workspace = true
Expand Down
38 changes: 19 additions & 19 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7146,14 +7146,14 @@ mod tests {
fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
let vector_size = sample_size;
let mut timestamp = vec![];
let mut rng = rand::thread_rng();
let mut rng = rand::rng();
for i in 0..vector_size {
let year = rng.gen_range(1995..=2050);
let month = rng.gen_range(1..=12);
let day = rng.gen_range(1..=28); // to exclude invalid dates
let hour = rng.gen_range(0..=23);
let minute = rng.gen_range(0..=59);
let second = rng.gen_range(0..=59);
let year = rng.random_range(1995..=2050);
let month = rng.random_range(1..=12);
let day = rng.random_range(1..=28); // to exclude invalid dates
let hour = rng.random_range(0..=23);
let minute = rng.random_range(0..=59);
let second = rng.random_range(0..=59);
if i % 4 == 0 {
timestamp.push(ScalarValue::TimestampSecond(
Some(
Expand All @@ -7167,7 +7167,7 @@ mod tests {
None,
))
} else if i % 4 == 1 {
let millisec = rng.gen_range(0..=999);
let millisec = rng.random_range(0..=999);
timestamp.push(ScalarValue::TimestampMillisecond(
Some(
NaiveDate::from_ymd_opt(year, month, day)
Expand All @@ -7180,7 +7180,7 @@ mod tests {
None,
))
} else if i % 4 == 2 {
let microsec = rng.gen_range(0..=999_999);
let microsec = rng.random_range(0..=999_999);
timestamp.push(ScalarValue::TimestampMicrosecond(
Some(
NaiveDate::from_ymd_opt(year, month, day)
Expand All @@ -7193,7 +7193,7 @@ mod tests {
None,
))
} else if i % 4 == 3 {
let nanosec = rng.gen_range(0..=999_999_999);
let nanosec = rng.random_range(0..=999_999_999);
timestamp.push(ScalarValue::TimestampNanosecond(
Some(
NaiveDate::from_ymd_opt(year, month, day)
Expand All @@ -7217,27 +7217,27 @@ mod tests {

let vector_size = sample_size;
let mut intervals = vec![];
let mut rng = rand::thread_rng();
let mut rng = rand::rng();
const SECS_IN_ONE_DAY: i32 = 86_400;
const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
for i in 0..vector_size {
if i % 4 == 0 {
let days = rng.gen_range(0..5000);
let days = rng.random_range(0..5000);
// to not break second precision
let millis = rng.gen_range(0..SECS_IN_ONE_DAY) * 1000;
let millis = rng.random_range(0..SECS_IN_ONE_DAY) * 1000;
intervals.push(ScalarValue::new_interval_dt(days, millis));
} else if i % 4 == 1 {
let days = rng.gen_range(0..5000);
let millisec = rng.gen_range(0..(MILLISECS_IN_ONE_DAY as i32));
let days = rng.random_range(0..5000);
let millisec = rng.random_range(0..(MILLISECS_IN_ONE_DAY as i32));
intervals.push(ScalarValue::new_interval_dt(days, millisec));
} else if i % 4 == 2 {
let days = rng.gen_range(0..5000);
let days = rng.random_range(0..5000);
// to not break microsec precision
let nanosec = rng.gen_range(0..MICROSECS_IN_ONE_DAY) * 1000;
let nanosec = rng.random_range(0..MICROSECS_IN_ONE_DAY) * 1000;
intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
} else {
let days = rng.gen_range(0..5000);
let nanosec = rng.gen_range(0..NANOSECS_IN_ONE_DAY);
let days = rng.random_range(0..5000);
let nanosec = rng.random_range(0..NANOSECS_IN_ONE_DAY);
intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
}
}
Expand Down
5 changes: 1 addition & 4 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ datafusion-functions-aggregate = { workspace = true }
datafusion-functions-nested = { workspace = true, optional = true }
datafusion-functions-table = { workspace = true }
datafusion-functions-window = { workspace = true }
datafusion-macros = { workspace = true }
datafusion-optimizer = { workspace = true }
datafusion-physical-expr = { workspace = true }
datafusion-physical-expr-common = { workspace = true }
Expand Down Expand Up @@ -141,15 +140,13 @@ zstd = { version = "0.13", optional = true, default-features = false }
async-trait = { workspace = true }
criterion = { workspace = true, features = ["async_tokio"] }
ctor = { workspace = true }
dashmap = "6.1.0"
datafusion-doc = { workspace = true }
datafusion-functions-window-common = { workspace = true }
datafusion-physical-optimizer = { workspace = true }
doc-comment = { workspace = true }
env_logger = { workspace = true }
paste = "^1.0"
rand = { workspace = true, features = ["small_rng"] }
rand_distr = "0.4.3"
rand_distr = "0.5"
regex = { workspace = true }
rstest = { workspace = true }
serde_json = { workspace = true }
Expand Down
27 changes: 11 additions & 16 deletions datafusion/core/benches/data_utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use datafusion::datasource::MemTable;
use datafusion::error::Result;
use datafusion_common::DataFusionError;
use rand::prelude::IndexedRandom;
use rand::rngs::StdRng;
use rand::seq::SliceRandom;
use rand::{Rng, SeedableRng};
use rand_distr::Distribution;
use rand_distr::{Normal, Pareto};
Expand All @@ -48,11 +48,6 @@ pub fn create_table_provider(
MemTable::try_new(schema, partitions).map(Arc::new)
}

/// create a seedable [`StdRng`](rand::StdRng)
fn seedable_rng() -> StdRng {
StdRng::seed_from_u64(42)
}

/// Create test data schema
pub fn create_schema() -> Schema {
Schema::new(vec![
Expand All @@ -72,29 +67,29 @@ pub fn create_schema() -> Schema {

fn create_data(size: usize, null_density: f64) -> Vec<Option<f64>> {
// use random numbers to avoid spurious compiler optimizations wrt to branching
let mut rng = seedable_rng();
let mut rng = StdRng::seed_from_u64(42);

(0..size)
.map(|_| {
if rng.gen::<f64>() > null_density {
if rng.random::<f64>() > null_density {
None
} else {
Some(rng.gen::<f64>())
Some(rng.random::<f64>())
}
})
.collect()
}

fn create_integer_data(size: usize, value_density: f64) -> Vec<Option<u64>> {
// use random numbers to avoid spurious compiler optimizations wrt to branching
let mut rng = seedable_rng();
let mut rng = StdRng::seed_from_u64(42);

(0..size)
.map(|_| {
if rng.gen::<f64>() > value_density {
if rng.random::<f64>() > value_density {
None
} else {
Some(rng.gen::<u64>())
Some(rng.random::<u64>())
}
})
.collect()
Expand Down Expand Up @@ -124,7 +119,7 @@ fn create_record_batch(

// Integer values between [0, 9].
let integer_values_narrow = (0..batch_size)
.map(|_| rng.gen_range(0_u64..10))
.map(|_| rng.random_range(0_u64..10))
.collect::<Vec<_>>();

RecordBatch::try_new(
Expand All @@ -148,7 +143,7 @@ pub fn create_record_batches(
partitions_len: usize,
batch_size: usize,
) -> Vec<Vec<RecordBatch>> {
let mut rng = seedable_rng();
let mut rng = StdRng::seed_from_u64(42);
(0..partitions_len)
.map(|_| {
(0..array_len / batch_size / partitions_len)
Expand Down Expand Up @@ -184,7 +179,7 @@ pub(crate) fn make_data(
let mut id_builder = StringBuilder::new();
let mut ts_builder = Int64Builder::new();
let gen_id = |rng: &mut rand::rngs::SmallRng| {
rng.gen::<[u8; 16]>()
rng.random::<[u8; 16]>()
.iter()
.fold(String::new(), |mut output, b| {
let _ = write!(output, "{b:02X}");
Expand All @@ -200,7 +195,7 @@ pub(crate) fn make_data(
.map(|_| gen_sample_cnt(&mut rng))
.collect::<Vec<_>>();
for _ in 0..sample_cnt {
let random_index = rng.gen_range(0..simultaneous_group_cnt);
let random_index = rng.random_range(0..simultaneous_group_cnt);
let trace_id = &mut group_ids[random_index];
let sample_cnt = &mut group_sample_cnts[random_index];
*sample_cnt -= 1;
Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/benches/map_query_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ mod data_utils;
fn build_keys(rng: &mut ThreadRng) -> Vec<String> {
let mut keys = vec![];
for _ in 0..1000 {
keys.push(rng.gen_range(0..9999).to_string());
keys.push(rng.random_range(0..9999).to_string());
}
keys
}

fn build_values(rng: &mut ThreadRng) -> Vec<i32> {
let mut values = vec![];
for _ in 0..1000 {
values.push(rng.gen_range(0..9999));
values.push(rng.random_range(0..9999));
}
values
}
Expand All @@ -64,7 +64,7 @@ fn criterion_benchmark(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let df = rt.block_on(ctx.lock().table("t")).unwrap();

let mut rng = rand::thread_rng();
let mut rng = rand::rng();
let keys = build_keys(&mut rng);
let values = build_values(&mut rng);
let mut key_buffer = Vec::new();
Expand Down
23 changes: 12 additions & 11 deletions datafusion/core/benches/parquet_query_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ use datafusion_common::instant::Instant;
use futures::stream::StreamExt;
use parquet::arrow::ArrowWriter;
use parquet::file::properties::{WriterProperties, WriterVersion};
use rand::distributions::uniform::SampleUniform;
use rand::distributions::Alphanumeric;
use rand::distr::uniform::SampleUniform;
use rand::distr::Alphanumeric;
use rand::prelude::*;
use rand::rng;
use std::fs::File;
use std::io::Read;
use std::ops::Range;
Expand Down Expand Up @@ -97,13 +98,13 @@ fn generate_string_dictionary(
len: usize,
valid_percent: f64,
) -> ArrayRef {
let mut rng = thread_rng();
let mut rng = rng();
let strings: Vec<_> = (0..cardinality).map(|x| format!("{prefix}#{x}")).collect();

Arc::new(DictionaryArray::<Int32Type>::from_iter((0..len).map(
|_| {
rng.gen_bool(valid_percent)
.then(|| strings[rng.gen_range(0..cardinality)].as_str())
rng.random_bool(valid_percent)
.then(|| strings[rng.random_range(0..cardinality)].as_str())
},
)))
}
Expand All @@ -113,10 +114,10 @@ fn generate_strings(
len: usize,
valid_percent: f64,
) -> ArrayRef {
let mut rng = thread_rng();
let mut rng = rng();
Arc::new(StringArray::from_iter((0..len).map(|_| {
rng.gen_bool(valid_percent).then(|| {
let string_len = rng.gen_range(string_length_range.clone());
rng.random_bool(valid_percent).then(|| {
let string_len = rng.random_range(string_length_range.clone());
(0..string_len)
.map(|_| char::from(rng.sample(Alphanumeric)))
.collect::<String>()
Expand All @@ -133,10 +134,10 @@ where
T: ArrowPrimitiveType,
T::Native: SampleUniform,
{
let mut rng = thread_rng();
let mut rng = rng();
Arc::new(PrimitiveArray::<T>::from_iter((0..len).map(|_| {
rng.gen_bool(valid_percent)
.then(|| rng.gen_range(range.clone()))
rng.random_bool(valid_percent)
.then(|| rng.random_range(range.clone()))
})))
}

Expand Down
8 changes: 4 additions & 4 deletions datafusion/core/benches/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ impl DataGenerator {
/// Create an array of i64 sorted values (where approximately 1/3 values is repeated)
fn i64_values(&mut self) -> Vec<i64> {
let mut vec: Vec<_> = (0..INPUT_SIZE)
.map(|_| self.rng.gen_range(0..INPUT_SIZE as i64))
.map(|_| self.rng.random_range(0..INPUT_SIZE as i64))
.collect();

vec.sort_unstable();
Expand All @@ -513,7 +513,7 @@ impl DataGenerator {
// pick from the 100 strings randomly
let mut input = (0..INPUT_SIZE)
.map(|_| {
let idx = self.rng.gen_range(0..strings.len());
let idx = self.rng.random_range(0..strings.len());
let s = Arc::clone(&strings[idx]);
Some(s)
})
Expand All @@ -536,7 +536,7 @@ impl DataGenerator {

fn random_string(&mut self) -> String {
let rng = &mut self.rng;
rng.sample_iter(rand::distributions::Alphanumeric)
rng.sample_iter(rand::distr::Alphanumeric)
.filter(|c| c.is_ascii_alphabetic())
.take(20)
.map(char::from)
Expand All @@ -558,7 +558,7 @@ where
let mut outputs: Vec<Vec<Vec<T>>> = (0..NUM_STREAMS).map(|_| Vec::new()).collect();

for i in input {
let stream_idx = rng.gen_range(0..NUM_STREAMS);
let stream_idx = rng.random_range(0..NUM_STREAMS);
let stream = &mut outputs[stream_idx];
match stream.last_mut() {
Some(x) if x.len() < BATCH_SIZE => x.push(i),
Expand Down
Loading
Loading