diff --git a/src/join.rs b/src/join.rs index 94270af..a5e25bd 100644 --- a/src/join.rs +++ b/src/join.rs @@ -9,12 +9,56 @@ use std::ops::Deref; /// because relations have no "recent" tuples, so the fn would be a /// guaranteed no-op if both arguments were relations. See also /// `join_into_relation`. -pub(crate) fn join_into<'me, Key: Ord, Val1: Ord, Val2: Ord, Result: Ord>( - input1: &Variable<(Key, Val1)>, - input2: impl JoinInput<'me, (Key, Val2)>, +pub(crate) fn join_into<'me, Key: Ord, Value1: Ord, Value2: Ord, Result: Ord>( + input1: &Variable<(Key, Value1)>, + input2: impl JoinInput<'me, (Key, Value2)>, output: &Variable, - mut logic: impl FnMut(&Key, &Val1, &Val2) -> Result, + mut logic: impl FnMut(&Key, &Value1, &Value2) -> Result, ) { + join_into_by_impl( + input1, + input2, + output, + |(key, _value)| key, + |(key, _value)| key, + |tuple1, tuple2| logic(&tuple1.0, &tuple1.1, &tuple2.1), + ) +} + +pub(crate) fn join_into_by< + 'me, + Key: Ord, + Tuple1: Ord, + Tuple2: Ord, + Accessor1, + Accessor2, + Result: Ord, +>( + input1: &Variable, + input2: impl JoinInput<'me, Tuple2>, + output: &Variable, + accessor1: Accessor1, + accessor2: Accessor2, + logic: impl FnMut(&Tuple1, &Tuple2) -> Result, +) where + Accessor1: Fn(&Tuple1) -> &Key, + Accessor2: Fn(&Tuple2) -> &Key, +{ + join_into_by_impl(input1, input2, output, accessor1, accessor2, logic) +} + +#[inline(always)] +fn join_into_by_impl<'me, Key: Ord, Tuple1: Ord, Tuple2: Ord, Accessor1, Accessor2, Result: Ord>( + input1: &Variable, + input2: impl JoinInput<'me, Tuple2>, + output: &Variable, + accessor1: Accessor1, + accessor2: Accessor2, + mut logic: impl FnMut(&Tuple1, &Tuple2) -> Result, +) where + Accessor1: Fn(&Tuple1) -> &Key, + Accessor2: Fn(&Tuple2) -> &Key, +{ let mut results = Vec::new(); let recent1 = input1.recent(); @@ -23,80 +67,210 @@ pub(crate) fn join_into<'me, Key: Ord, Val1: Ord, Val2: Ord, Result: Ord>( { // scoped to let `closure` drop borrow of `results`. - let mut closure = |k: &Key, v1: &Val1, v2: &Val2| results.push(logic(k, v1, v2)); + let mut closure = |tuple1: &Tuple1, tuple2: &Tuple2| results.push(logic(tuple1, tuple2)); for batch2 in input2.stable().iter() { - join_helper(&recent1, &batch2, &mut closure); + join_helper_by(&recent1, &batch2, &accessor1, &accessor2, &mut closure); } for batch1 in input1.stable().iter() { - join_helper(&batch1, &recent2, &mut closure); + join_helper_by(&batch1, &recent2, &accessor1, &accessor2, &mut closure); } - join_helper(&recent1, &recent2, &mut closure); + join_helper_by(&recent1, &recent2, &accessor1, &accessor2, &mut closure); } output.insert(Relation::from_vec(results)); } /// Join, but for two relations. -pub(crate) fn join_into_relation<'me, Key: Ord, Val1: Ord, Val2: Ord, Result: Ord>( - input1: &Relation<(Key, Val1)>, - input2: &Relation<(Key, Val2)>, - mut logic: impl FnMut(&Key, &Val1, &Val2) -> Result, +pub(crate) fn join_into_relation<'me, Key: Ord, Value1: Ord, Value2: Ord, Result: Ord>( + input1: &Relation<(Key, Value1)>, + input2: &Relation<(Key, Value2)>, + mut logic: impl FnMut(&Key, &Value1, &Value2) -> Result, ) -> Relation { + join_into_relation_by( + input1, + input2, + |(key, _value)| key, + |(key, _value)| key, + |tuple1, tuple2| logic(&tuple1.0, &tuple1.1, &tuple2.1), + ) +} + +/// Join, but for two relations. +pub(crate) fn join_into_relation_by< + 'me, + Key: Ord, + Tuple1: Ord, + Tuple2: Ord, + Accessor1, + Accessor2, + Result: Ord, +>( + input1: &Relation, + input2: &Relation, + accessor1: Accessor1, + accessor2: Accessor2, + mut logic: impl FnMut(&Tuple1, &Tuple2) -> Result, +) -> Relation +where + Accessor1: Fn(&Tuple1) -> &Key, + Accessor2: Fn(&Tuple2) -> &Key, +{ let mut results = Vec::new(); - join_helper(&input1.elements, &input2.elements, |k, v1, v2| { - results.push(logic(k, v1, v2)); - }); + join_helper_by( + &input1.elements, + &input2.elements, + &accessor1, + &accessor2, + |tuple1, tuple2| { + results.push(logic(tuple1, tuple2)); + }, + ); Relation::from_vec(results) } /// Moves all recent tuples from `input1` that are not present in `input2` into `output`. -pub(crate) fn antijoin<'me, Key: Ord, Val: Ord, Result: Ord>( - input1: impl JoinInput<'me, (Key, Val)>, +pub(crate) fn antijoin<'me, Key: Ord, Value1: Ord, Result: Ord>( + input1: impl JoinInput<'me, (Key, Value1)>, input2: &Relation, - mut logic: impl FnMut(&Key, &Val) -> Result, + mut logic: impl FnMut(&Key, &Value1) -> Result, ) -> Relation { + antijoin_by_impl( + input1, + input2, + |(key, _value)| key, + |key| key, + |(key, value)| logic(key, value), + ) +} + +pub(crate) fn antijoin_by< + 'me, + Tuple1: Ord, + Tuple2: Ord, + Key: Ord, + Accessor1, + Accessor2, + Result: Ord, +>( + input1: impl JoinInput<'me, Tuple1>, + input2: &Relation, + accessor1: Accessor1, + accessor2: Accessor2, + logic: impl FnMut(&Tuple1) -> Result, +) -> Relation +where + Accessor1: Fn(&Tuple1) -> &Key, + Accessor2: Fn(&Tuple2) -> &Key, +{ + antijoin_by_impl(input1, input2, accessor1, accessor2, logic) +} + +/// Moves all recent tuples from `input1` that are not present in `input2` into `output`. +#[inline(always)] +pub(crate) fn antijoin_by_impl< + 'me, + Tuple1: Ord, + Tuple2: Ord, + Key: Ord, + Accessor1, + Accessor2, + Result: Ord, +>( + input1: impl JoinInput<'me, Tuple1>, + input2: &Relation, + accessor1: Accessor1, + accessor2: Accessor2, + mut logic: impl FnMut(&Tuple1) -> Result, +) -> Relation +where + Accessor1: Fn(&Tuple1) -> &Key, + Accessor2: Fn(&Tuple2) -> &Key, +{ let mut tuples2 = &input2[..]; let results = input1 .recent() .iter() - .filter(|(ref key, _)| { - tuples2 = gallop(tuples2, |k| k < key); - tuples2.first() != Some(key) + .filter(|tuple| { + let key = accessor1(tuple); + tuples2 = gallop(tuples2, |k| accessor2(k) < key); + tuples2.first().map(|tuple2| accessor2(tuple2)) != Some(key) }) - .map(|(ref key, ref val)| logic(key, val)) + .map(|tuple| logic(tuple)) .collect::>(); Relation::from_vec(results) } -fn join_helper( - mut slice1: &[(K, V1)], - mut slice2: &[(K, V2)], - mut result: impl FnMut(&K, &V1, &V2), +#[allow(dead_code)] +fn join_helper( + slice1: &[(Key, Value1)], + slice2: &[(Key, Value2)], + mut result: impl FnMut(&Key, &Value1, &Value2), ) { + join_helper_by_impl( + slice1, + slice2, + |(key, _value)| key, + |(key, _value)| key, + |tuple1, tuple2| result(&tuple1.0, &tuple1.1, &tuple2.1), + ) +} + +fn join_helper_by( + slice1: &[Tuple1], + slice2: &[Tuple2], + accessor1: Accessor1, + accessor2: Accessor2, + result: impl FnMut(&Tuple1, &Tuple2), +) where + Accessor1: Fn(&Tuple1) -> &Key, + Accessor2: Fn(&Tuple2) -> &Key, +{ + join_helper_by_impl(slice1, slice2, accessor1, accessor2, result) +} + +#[inline(always)] +fn join_helper_by_impl( + mut slice1: &[Tuple1], + mut slice2: &[Tuple2], + accessor1: Accessor1, + accessor2: Accessor2, + mut result: impl FnMut(&Tuple1, &Tuple2), +) where + Accessor1: Fn(&Tuple1) -> &Key, + Accessor2: Fn(&Tuple2) -> &Key, +{ while !slice1.is_empty() && !slice2.is_empty() { use std::cmp::Ordering; + let ordering = { accessor1(&slice1[0]).cmp(&accessor2(&slice2[0])) }; + // If the keys match produce tuples, else advance the smaller key until they might. - match slice1[0].0.cmp(&slice2[0].0) { + match ordering { Ordering::Less => { - slice1 = gallop(slice1, |x| x.0 < slice2[0].0); + slice1 = gallop(slice1, |x| accessor1(x) < accessor2(&slice2[0])); } Ordering::Equal => { // Determine the number of matching keys in each slice. - let count1 = slice1.iter().take_while(|x| x.0 == slice1[0].0).count(); - let count2 = slice2.iter().take_while(|x| x.0 == slice2[0].0).count(); + let count1 = slice1 + .iter() + .take_while(|x| accessor1(x) == accessor1(&slice1[0])) + .count(); + let count2 = slice2 + .iter() + .take_while(|x| accessor2(x) == accessor2(&slice2[0])) + .count(); // Produce results from the cross-product of matches. for index1 in 0..count1 { for s2 in slice2[..count2].iter() { - result(&slice1[0].0, &slice1[index1].1, &s2.1); + result(&slice1[index1], s2); } } @@ -105,13 +279,13 @@ fn join_helper( slice2 = &slice2[count2..]; } Ordering::Greater => { - slice2 = gallop(slice2, |x| x.0 < slice1[0].0); + slice2 = gallop(slice2, |x| accessor2(x) < accessor1(&slice1[0])); } } } } -pub(crate) fn gallop(mut slice: &[T], mut cmp: impl FnMut(&T) -> bool) -> &[T] { +pub(crate) fn gallop(mut slice: &[Tuple], mut cmp: impl FnMut(&Tuple) -> bool) -> &[Tuple] { // if empty slice, or already >= element, return if !slice.is_empty() && cmp(&slice[0]) { let mut step = 1; diff --git a/src/map.rs b/src/map.rs index 1a8c101..3cb9bc6 100644 --- a/src/map.rs +++ b/src/map.rs @@ -2,12 +2,12 @@ use super::{Relation, Variable}; -pub(crate) fn map_into( - input: &Variable, - output: &Variable, - logic: impl FnMut(&T1) -> T2, +pub(crate) fn map_into( + input: &Variable, + output: &Variable, + logic: impl FnMut(&Tuple1) -> Tuple2, ) { - let results: Vec = input.recent.borrow().iter().map(logic).collect(); + let results: Vec = input.recent.borrow().iter().map(logic).collect(); output.insert(Relation::from_vec(results)); } diff --git a/src/relation.rs b/src/relation.rs index b671fe0..0d83336 100644 --- a/src/relation.rs +++ b/src/relation.rs @@ -48,31 +48,71 @@ impl Relation { /// `input2` and then applying `logic`. Like /// [`Variable::from_join`] except for use where the inputs are /// not varying across iterations. - pub fn from_join( - input1: &Relation<(Key, Val1)>, - input2: &Relation<(Key, Val2)>, - logic: impl FnMut(&Key, &Val1, &Val2) -> Tuple, + pub fn from_join( + input1: &Relation<(Key, Value1)>, + input2: &Relation<(Key, Value2)>, + logic: impl FnMut(&Key, &Value1, &Value2) -> Tuple, ) -> Self { join::join_into_relation(input1, input2, logic) } + /// Creates a `Relation` by joining the values from `input1` and + /// `input2`, through `accessor1` and `accessor2` respectively, and then applying `logic`. Like + /// [`Variable::from_join_by`] except for use where the inputs are + /// not varying across iterations. + pub fn from_join_by( + input1: &Relation, + input2: &Relation, + accessor1: Accessor1, + accessor2: Accessor2, + logic: impl FnMut(&Tuple1, &Tuple2) -> Tuple, + ) -> Self + where + Accessor1: Fn(&Tuple1) -> &Key, + Accessor2: Fn(&Tuple2) -> &Key, + { + join::join_into_relation_by(input1, input2, accessor1, accessor2, logic) + } + /// Creates a `Relation` by removing all values from `input1` that /// share a key with `input2`, and then transforming the resulting /// tuples with the `logic` closure. Like /// [`Variable::from_antijoin`] except for use where the inputs /// are not varying across iterations. - pub fn from_antijoin( - input1: &Relation<(Key, Val1)>, + pub fn from_antijoin( + input1: &Relation<(Key, Value1)>, input2: &Relation, - logic: impl FnMut(&Key, &Val1) -> Tuple, + logic: impl FnMut(&Key, &Value1) -> Tuple, ) -> Self { join::antijoin(input1, input2, logic) } + /// Creates a `Relation` by removing all values from `input1` that + /// share a key with `input2`, through `accessor1` and `accessor2` respectively, + /// and then transforming the resulting tuples with the `logic` closure. + /// Like [`Variable::from_antijoin_by`] except for use where the inputs + /// are not varying across iterations. + pub fn from_antijoin_by( + input1: &Relation, + input2: &Relation, + accessor1: Accessor1, + accessor2: Accessor2, + logic: impl FnMut(&Tuple1) -> Tuple, + ) -> Self + where + Accessor1: Fn(&Tuple1) -> &Key, + Accessor2: Fn(&Tuple2) -> &Key, + { + join::antijoin_by(input1, input2, accessor1, accessor2, logic) + } + /// Construct a new relation by mapping another one. Equivalent to /// creating an iterator but perhaps more convenient. Analogous to /// `Variable::from_map`. - pub fn from_map(input: &Relation, logic: impl FnMut(&T2) -> Tuple) -> Self { + pub fn from_map( + input: &Relation, + logic: impl FnMut(&Tuple2) -> Tuple, + ) -> Self { input.iter().map(logic).collect() } diff --git a/src/test.rs b/src/test.rs index 9d5af35..b814645 100644 --- a/src/test.rs +++ b/src/test.rs @@ -86,7 +86,7 @@ fn sum_join_via_var( let output = iteration.variable::<(u32, u32)>("output"); while iteration.changed() { - // output(K1, V1 * 100 + V2) :- input1(K1, V1), input2(K1, V2). + // output(K1, Value1 * 100 + Value2) :- input1(K1, Value1), input2(K1, Value2). output.from_join(&input1, &input2, |&k1, &v1, &v2| (k1, v1 * 100 + v2)); } diff --git a/src/treefrog.rs b/src/treefrog.rs index 8a3b5f2..34e315a 100644 --- a/src/treefrog.rs +++ b/src/treefrog.rs @@ -3,10 +3,10 @@ use super::Relation; /// Performs treefrog leapjoin using a list of leapers. -pub(crate) fn leapjoin<'leap, Tuple: Ord, Val: Ord + 'leap, Result: Ord>( +pub(crate) fn leapjoin<'leap, Tuple: Ord, Value: Ord + 'leap, Result: Ord>( source: &[Tuple], - mut leapers: impl Leapers<'leap, Tuple, Val>, - mut logic: impl FnMut(&Tuple, &Val) -> Result, + mut leapers: impl Leapers<'leap, Tuple, Value>, + mut logic: impl FnMut(&Tuple, &Value) -> Result, ) -> Relation { let mut result = Vec::new(); // temp output storage. let mut values = Vec::new(); // temp value storage. @@ -45,23 +45,23 @@ pub(crate) fn leapjoin<'leap, Tuple: Ord, Val: Ord + 'leap, Result: Ord>( } /// Implemented for a tuple of leapers -pub trait Leapers<'leap, Tuple, Val> { +pub trait Leapers<'leap, Tuple, Value> { /// Internal method: fn for_each_count(&mut self, tuple: &Tuple, op: impl FnMut(usize, usize)); /// Internal method: - fn propose(&mut self, tuple: &Tuple, min_index: usize, values: &mut Vec<&'leap Val>); + fn propose(&mut self, tuple: &Tuple, min_index: usize, values: &mut Vec<&'leap Value>); /// Internal method: - fn intersect(&mut self, tuple: &Tuple, min_index: usize, values: &mut Vec<&'leap Val>); + fn intersect(&mut self, tuple: &Tuple, min_index: usize, values: &mut Vec<&'leap Value>); } macro_rules! tuple_leapers { ($($Ty:ident)*) => { #[allow(unused_assignments, non_snake_case)] - impl<'leap, Tuple, Val, $($Ty),*> Leapers<'leap, Tuple, Val> for ($($Ty,)*) + impl<'leap, Tuple, Value, $($Ty),*> Leapers<'leap, Tuple, Value> for ($($Ty,)*) where - $($Ty: Leaper<'leap, Tuple, Val>,)* + $($Ty: Leaper<'leap, Tuple, Value>,)* { fn for_each_count(&mut self, tuple: &Tuple, mut op: impl FnMut(usize, usize)) { let ($($Ty,)*) = self; @@ -73,7 +73,7 @@ macro_rules! tuple_leapers { )* } - fn propose(&mut self, tuple: &Tuple, min_index: usize, values: &mut Vec<&'leap Val>) { + fn propose(&mut self, tuple: &Tuple, min_index: usize, values: &mut Vec<&'leap Value>) { let ($($Ty,)*) = self; let mut index = 0; $( @@ -85,7 +85,7 @@ macro_rules! tuple_leapers { panic!("no match found for min_index={}", min_index); } - fn intersect(&mut self, tuple: &Tuple, min_index: usize, values: &mut Vec<&'leap Val>) { + fn intersect(&mut self, tuple: &Tuple, min_index: usize, values: &mut Vec<&'leap Value>) { let ($($Ty,)*) = self; let mut index = 0; $( @@ -107,13 +107,13 @@ tuple_leapers!(A B C D E F); tuple_leapers!(A B C D E F G); /// Methods to support treefrog leapjoin. -pub trait Leaper<'leap, Tuple, Val> { +pub trait Leaper<'leap, Tuple, Value> { /// Estimates the number of proposed values. fn count(&mut self, prefix: &Tuple) -> usize; /// Populates `values` with proposed values. - fn propose(&mut self, prefix: &Tuple, values: &mut Vec<&'leap Val>); + fn propose(&mut self, prefix: &Tuple, values: &mut Vec<&'leap Value>); /// Restricts `values` to proposed values. - fn intersect(&mut self, prefix: &Tuple, values: &mut Vec<&'leap Val>); + fn intersect(&mut self, prefix: &Tuple, values: &mut Vec<&'leap Value>); } pub(crate) mod filters { @@ -144,7 +144,7 @@ pub(crate) mod filters { } } - impl<'leap, Tuple, Val, Func> Leaper<'leap, Tuple, Val> for PrefixFilter + impl<'leap, Tuple, Value, Func> Leaper<'leap, Tuple, Value> for PrefixFilter where Func: Fn(&Tuple) -> bool, { @@ -157,11 +157,11 @@ pub(crate) mod filters { } } /// Populates `values` with proposed values. - fn propose(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Val>) { + fn propose(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Value>) { panic!("PrefixFilter::propose(): variable apparently unbound"); } /// Restricts `values` to proposed values. - fn intersect(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Val>) { + fn intersect(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Value>) { // We can only be here if we returned max_value() above. } } @@ -198,14 +198,14 @@ pub(crate) mod filters { /// /// This leaper cannot be used in isolation, it must be combined /// with other leapers. - pub struct ValueFilter bool> { - phantom: ::std::marker::PhantomData<(Tuple, Val)>, + pub struct ValueFilter bool> { + phantom: ::std::marker::PhantomData<(Tuple, Value)>, predicate: Func, } - impl<'leap, Tuple, Val, Func> ValueFilter + impl<'leap, Tuple, Value, Func> ValueFilter where - Func: Fn(&Tuple, &Val) -> bool, + Func: Fn(&Tuple, &Value) -> bool, { /// Creates a new filter based on the prefix pub fn from(predicate: Func) -> Self { @@ -216,99 +216,99 @@ pub(crate) mod filters { } } - impl<'leap, Tuple, Val, Func> Leaper<'leap, Tuple, Val> for ValueFilter + impl<'leap, Tuple, Value, Func> Leaper<'leap, Tuple, Value> for ValueFilter where - Func: Fn(&Tuple, &Val) -> bool, + Func: Fn(&Tuple, &Value) -> bool, { /// Estimates the number of proposed values. fn count(&mut self, _prefix: &Tuple) -> usize { usize::max_value() } /// Populates `values` with proposed values. - fn propose(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Val>) { + fn propose(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Value>) { panic!("PrefixFilter::propose(): variable apparently unbound"); } /// Restricts `values` to proposed values. - fn intersect(&mut self, prefix: &Tuple, values: &mut Vec<&'leap Val>) { + fn intersect(&mut self, prefix: &Tuple, values: &mut Vec<&'leap Value>) { values.retain(|val| (self.predicate)(prefix, val)); } } } /// Extension method for relations. -pub trait RelationLeaper { - /// Extend with `Val` using the elements of the relation. +pub trait RelationLeaper { + /// Extend with `Value` using the elements of the relation. fn extend_with<'leap, Tuple: Ord, Func: Fn(&Tuple) -> Key>( &'leap self, key_func: Func, - ) -> extend_with::ExtendWith<'leap, Key, Val, Tuple, Func> + ) -> extend_with::ExtendWith<'leap, Key, Value, Tuple, Func> where Key: 'leap, - Val: 'leap; - /// Extend with `Val` using the complement of the relation. + Value: 'leap; + /// Extend with `Value` using the complement of the relation. fn extend_anti<'leap, Tuple: Ord, Func: Fn(&Tuple) -> Key>( &'leap self, key_func: Func, - ) -> extend_anti::ExtendAnti<'leap, Key, Val, Tuple, Func> + ) -> extend_anti::ExtendAnti<'leap, Key, Value, Tuple, Func> where Key: 'leap, - Val: 'leap; + Value: 'leap; /// Extend with any value if tuple is present in relation. - fn filter_with<'leap, Tuple: Ord, Func: Fn(&Tuple) -> (Key, Val)>( + fn filter_with<'leap, Tuple: Ord, Func: Fn(&Tuple) -> (Key, Value)>( &'leap self, key_func: Func, - ) -> filter_with::FilterWith<'leap, Key, Val, Tuple, Func> + ) -> filter_with::FilterWith<'leap, Key, Value, Tuple, Func> where Key: 'leap, - Val: 'leap; + Value: 'leap; /// Extend with any value if tuple is absent from relation. - fn filter_anti<'leap, Tuple: Ord, Func: Fn(&Tuple) -> (Key, Val)>( + fn filter_anti<'leap, Tuple: Ord, Func: Fn(&Tuple) -> (Key, Value)>( &'leap self, key_func: Func, - ) -> filter_anti::FilterAnti<'leap, Key, Val, Tuple, Func> + ) -> filter_anti::FilterAnti<'leap, Key, Value, Tuple, Func> where Key: 'leap, - Val: 'leap; + Value: 'leap; } -impl RelationLeaper for Relation<(Key, Val)> { +impl RelationLeaper for Relation<(Key, Value)> { fn extend_with<'leap, Tuple: Ord, Func: Fn(&Tuple) -> Key>( &'leap self, key_func: Func, - ) -> extend_with::ExtendWith<'leap, Key, Val, Tuple, Func> + ) -> extend_with::ExtendWith<'leap, Key, Value, Tuple, Func> where Key: 'leap, - Val: 'leap, + Value: 'leap, { extend_with::ExtendWith::from(self, key_func) } fn extend_anti<'leap, Tuple: Ord, Func: Fn(&Tuple) -> Key>( &'leap self, key_func: Func, - ) -> extend_anti::ExtendAnti<'leap, Key, Val, Tuple, Func> + ) -> extend_anti::ExtendAnti<'leap, Key, Value, Tuple, Func> where Key: 'leap, - Val: 'leap, + Value: 'leap, { extend_anti::ExtendAnti::from(self, key_func) } - fn filter_with<'leap, Tuple: Ord, Func: Fn(&Tuple) -> (Key, Val)>( + fn filter_with<'leap, Tuple: Ord, Func: Fn(&Tuple) -> (Key, Value)>( &'leap self, key_func: Func, - ) -> filter_with::FilterWith<'leap, Key, Val, Tuple, Func> + ) -> filter_with::FilterWith<'leap, Key, Value, Tuple, Func> where Key: 'leap, - Val: 'leap, + Value: 'leap, { filter_with::FilterWith::from(self, key_func) } - fn filter_anti<'leap, Tuple: Ord, Func: Fn(&Tuple) -> (Key, Val)>( + fn filter_anti<'leap, Tuple: Ord, Func: Fn(&Tuple) -> (Key, Value)>( &'leap self, key_func: Func, - ) -> filter_anti::FilterAnti<'leap, Key, Val, Tuple, Func> + ) -> filter_anti::FilterAnti<'leap, Key, Value, Tuple, Func> where Key: 'leap, - Val: 'leap, + Value: 'leap, { filter_anti::FilterAnti::from(self, key_func) } @@ -319,29 +319,29 @@ pub(crate) mod extend_with { use crate::join::gallop; /// Wraps a Relation as a leaper. - pub struct ExtendWith<'leap, Key, Val, Tuple, Func> + pub struct ExtendWith<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, Func: Fn(&Tuple) -> Key, { - relation: &'leap Relation<(Key, Val)>, + relation: &'leap Relation<(Key, Value)>, start: usize, end: usize, key_func: Func, phantom: ::std::marker::PhantomData, } - impl<'leap, Key, Val, Tuple, Func> ExtendWith<'leap, Key, Val, Tuple, Func> + impl<'leap, Key, Value, Tuple, Func> ExtendWith<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, Func: Fn(&Tuple) -> Key, { /// Constructs a ExtendWith from a relation and key and value function. - pub fn from(relation: &'leap Relation<(Key, Val)>, key_func: Func) -> Self { + pub fn from(relation: &'leap Relation<(Key, Value)>, key_func: Func) -> Self { ExtendWith { relation, start: 0, @@ -352,11 +352,11 @@ pub(crate) mod extend_with { } } - impl<'leap, Key, Val, Tuple, Func> Leaper<'leap, Tuple, Val> - for ExtendWith<'leap, Key, Val, Tuple, Func> + impl<'leap, Key, Value, Tuple, Func> Leaper<'leap, Tuple, Value> + for ExtendWith<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, Func: Fn(&Tuple) -> Key, { @@ -368,11 +368,11 @@ pub(crate) mod extend_with { self.end = self.relation.len() - slice2.len(); slice1.len() - slice2.len() } - fn propose(&mut self, _prefix: &Tuple, values: &mut Vec<&'leap Val>) { + fn propose(&mut self, _prefix: &Tuple, values: &mut Vec<&'leap Value>) { let slice = &self.relation[self.start..self.end]; values.extend(slice.iter().map(|&(_, ref val)| val)); } - fn intersect(&mut self, _prefix: &Tuple, values: &mut Vec<&'leap Val>) { + fn intersect(&mut self, _prefix: &Tuple, values: &mut Vec<&'leap Value>) { let mut slice = &self.relation[self.start..self.end]; values.retain(|v| { slice = gallop(slice, |kv| &kv.1 < v); @@ -381,11 +381,11 @@ pub(crate) mod extend_with { } } - impl<'leap, Key, Val, Tuple, Func> Leapers<'leap, Tuple, Val> - for ExtendWith<'leap, Key, Val, Tuple, Func> + impl<'leap, Key, Value, Tuple, Func> Leapers<'leap, Tuple, Value> + for ExtendWith<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, Func: Fn(&Tuple) -> Key, { @@ -393,12 +393,12 @@ pub(crate) mod extend_with { op(0, self.count(tuple)) } - fn propose(&mut self, tuple: &Tuple, min_index: usize, values: &mut Vec<&'leap Val>) { + fn propose(&mut self, tuple: &Tuple, min_index: usize, values: &mut Vec<&'leap Value>) { assert_eq!(min_index, 0); Leaper::propose(self, tuple, values); } - fn intersect(&mut self, _: &Tuple, min_index: usize, _: &mut Vec<&'leap Val>) { + fn intersect(&mut self, _: &Tuple, min_index: usize, _: &mut Vec<&'leap Value>) { assert_eq!(min_index, 0); } } @@ -409,27 +409,27 @@ pub(crate) mod extend_anti { use crate::join::gallop; /// Wraps a Relation as a leaper. - pub struct ExtendAnti<'leap, Key, Val, Tuple, Func> + pub struct ExtendAnti<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, Func: Fn(&Tuple) -> Key, { - relation: &'leap Relation<(Key, Val)>, + relation: &'leap Relation<(Key, Value)>, key_func: Func, phantom: ::std::marker::PhantomData, } - impl<'leap, Key, Val, Tuple, Func> ExtendAnti<'leap, Key, Val, Tuple, Func> + impl<'leap, Key, Value, Tuple, Func> ExtendAnti<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, Func: Fn(&Tuple) -> Key, { /// Constructs a ExtendAnti from a relation and key and value function. - pub fn from(relation: &'leap Relation<(Key, Val)>, key_func: Func) -> Self { + pub fn from(relation: &'leap Relation<(Key, Value)>, key_func: Func) -> Self { ExtendAnti { relation, key_func, @@ -438,21 +438,21 @@ pub(crate) mod extend_anti { } } - impl<'leap, Key: Ord, Val: Ord + 'leap, Tuple: Ord, Func> Leaper<'leap, Tuple, Val> - for ExtendAnti<'leap, Key, Val, Tuple, Func> + impl<'leap, Key: Ord, Value: Ord + 'leap, Tuple: Ord, Func> Leaper<'leap, Tuple, Value> + for ExtendAnti<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, Func: Fn(&Tuple) -> Key, { fn count(&mut self, _prefix: &Tuple) -> usize { usize::max_value() } - fn propose(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Val>) { + fn propose(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Value>) { panic!("ExtendAnti::propose(): variable apparently unbound."); } - fn intersect(&mut self, prefix: &Tuple, values: &mut Vec<&'leap Val>) { + fn intersect(&mut self, prefix: &Tuple, values: &mut Vec<&'leap Value>) { let key = (self.key_func)(prefix); let start = binary_search(&self.relation.elements, |x| &x.0 < &key); let slice1 = &self.relation[start..]; @@ -473,27 +473,27 @@ pub(crate) mod filter_with { use super::{Leaper, Leapers, Relation}; /// Wraps a Relation as a leaper. - pub struct FilterWith<'leap, Key, Val, Tuple, Func> + pub struct FilterWith<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, - Func: Fn(&Tuple) -> (Key, Val), + Func: Fn(&Tuple) -> (Key, Value), { - relation: &'leap Relation<(Key, Val)>, + relation: &'leap Relation<(Key, Value)>, key_func: Func, phantom: ::std::marker::PhantomData, } - impl<'leap, Key, Val, Tuple, Func> FilterWith<'leap, Key, Val, Tuple, Func> + impl<'leap, Key, Value, Tuple, Func> FilterWith<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, - Func: Fn(&Tuple) -> (Key, Val), + Func: Fn(&Tuple) -> (Key, Value), { /// Constructs a FilterWith from a relation and key and value function. - pub fn from(relation: &'leap Relation<(Key, Val)>, key_func: Func) -> Self { + pub fn from(relation: &'leap Relation<(Key, Value)>, key_func: Func) -> Self { FilterWith { relation, key_func, @@ -502,13 +502,13 @@ pub(crate) mod filter_with { } } - impl<'leap, Key, Val, Val2, Tuple, Func> Leaper<'leap, Tuple, Val2> - for FilterWith<'leap, Key, Val, Tuple, Func> + impl<'leap, Key, Value, Value2, Tuple, Func> Leaper<'leap, Tuple, Value2> + for FilterWith<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, - Func: Fn(&Tuple) -> (Key, Val), + Func: Fn(&Tuple) -> (Key, Value), { fn count(&mut self, prefix: &Tuple) -> usize { let key_val = (self.key_func)(prefix); @@ -518,21 +518,21 @@ pub(crate) mod filter_with { 0 } } - fn propose(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Val2>) { + fn propose(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Value2>) { panic!("FilterWith::propose(): variable apparently unbound."); } - fn intersect(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Val2>) { + fn intersect(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Value2>) { // Only here because we didn't return zero above, right? } } - impl<'leap, Key, Val, Tuple, Func> Leapers<'leap, Tuple, ()> - for FilterWith<'leap, Key, Val, Tuple, Func> + impl<'leap, Key, Value, Tuple, Func> Leapers<'leap, Tuple, ()> + for FilterWith<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, - Func: Fn(&Tuple) -> (Key, Val), + Func: Fn(&Tuple) -> (Key, Value), { fn for_each_count(&mut self, tuple: &Tuple, mut op: impl FnMut(usize, usize)) { if >::count(self, tuple) == 0 { @@ -559,27 +559,27 @@ pub(crate) mod filter_anti { use super::{Leaper, Leapers, Relation}; /// Wraps a Relation as a leaper. - pub struct FilterAnti<'leap, Key, Val, Tuple, Func> + pub struct FilterAnti<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, - Func: Fn(&Tuple) -> (Key, Val), + Func: Fn(&Tuple) -> (Key, Value), { - relation: &'leap Relation<(Key, Val)>, + relation: &'leap Relation<(Key, Value)>, key_func: Func, phantom: ::std::marker::PhantomData, } - impl<'leap, Key, Val, Tuple, Func> FilterAnti<'leap, Key, Val, Tuple, Func> + impl<'leap, Key, Value, Tuple, Func> FilterAnti<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, - Func: Fn(&Tuple) -> (Key, Val), + Func: Fn(&Tuple) -> (Key, Value), { /// Constructs a FilterAnti from a relation and key and value function. - pub fn from(relation: &'leap Relation<(Key, Val)>, key_func: Func) -> Self { + pub fn from(relation: &'leap Relation<(Key, Value)>, key_func: Func) -> Self { FilterAnti { relation, key_func, @@ -588,13 +588,13 @@ pub(crate) mod filter_anti { } } - impl<'leap, Key: Ord, Val: Ord + 'leap, Val2, Tuple: Ord, Func> Leaper<'leap, Tuple, Val2> - for FilterAnti<'leap, Key, Val, Tuple, Func> + impl<'leap, Key: Ord, Value: Ord + 'leap, Value2, Tuple: Ord, Func> Leaper<'leap, Tuple, Value2> + for FilterAnti<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, - Func: Fn(&Tuple) -> (Key, Val), + Func: Fn(&Tuple) -> (Key, Value), { fn count(&mut self, prefix: &Tuple) -> usize { let key_val = (self.key_func)(prefix); @@ -604,21 +604,21 @@ pub(crate) mod filter_anti { usize::max_value() } } - fn propose(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Val2>) { + fn propose(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Value2>) { panic!("FilterAnti::propose(): variable apparently unbound."); } - fn intersect(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Val2>) { + fn intersect(&mut self, _prefix: &Tuple, _values: &mut Vec<&'leap Value2>) { // Only here because we didn't return zero above, right? } } - impl<'leap, Key, Val, Tuple, Func> Leapers<'leap, Tuple, ()> - for FilterAnti<'leap, Key, Val, Tuple, Func> + impl<'leap, Key, Value, Tuple, Func> Leapers<'leap, Tuple, ()> + for FilterAnti<'leap, Key, Value, Tuple, Func> where Key: Ord + 'leap, - Val: Ord + 'leap, + Value: Ord + 'leap, Tuple: Ord, - Func: Fn(&Tuple) -> (Key, Val), + Func: Fn(&Tuple) -> (Key, Value), { fn for_each_count(&mut self, tuple: &Tuple, mut op: impl FnMut(usize, usize)) { if >::count(self, tuple) == 0 { @@ -647,11 +647,11 @@ pub(crate) mod filter_anti { /// /// By accepting a vector instead of a slice, we can do a small optimization when computing the /// midpoint. -fn binary_search(vec: &Vec, mut cmp: impl FnMut(&T) -> bool) -> usize { +fn binary_search(vec: &Vec, mut cmp: impl FnMut(&Tuple) -> bool) -> usize { // The midpoint calculation we use below is only correct for vectors with less than `isize::MAX` // elements. This is always true for vectors of sized types but maybe not for ZSTs? Sorting // ZSTs doesn't make much sense, so just forbid it here. - assert!(std::mem::size_of::() > 0); + assert!(std::mem::size_of::() > 0); // we maintain the invariant that `lo` many elements of `slice` satisfy `cmp`. // `hi` is maintained at the first element we know does not satisfy `cmp`. @@ -667,7 +667,7 @@ fn binary_search(vec: &Vec, mut cmp: impl FnMut(&T) -> bool) -> usize { // LLVM seems to be unable to prove that `mid` is always less than `vec.len()`, so use // `get_unchecked` to avoid a bounds check since this code is hot. - let el: &T = unsafe { vec.get_unchecked(mid) }; + let el: &Tuple = unsafe { vec.get_unchecked(mid) }; if cmp(el) { lo = mid + 1; } else { diff --git a/src/variable.rs b/src/variable.rs index f8623b6..1f160f4 100644 --- a/src/variable.rs +++ b/src/variable.rs @@ -53,8 +53,8 @@ pub struct Variable { impl Variable { /// Adds tuples that result from joining `input1` and `input2` -- /// each of the inputs must be a set of (Key, Value) tuples. Both - /// `input1` and `input2` must have the same type of key (`K`) but - /// they can have distinct value types (`V1` and `V2` + /// `input1` and `input2` must have the same type of key (`Key`) but + /// they can have distinct value types (`Value1` and `Value2` /// respectively). The `logic` closure will be invoked for each /// key that appears in both inputs; it is also given the two /// values, and from those it should construct the resulting @@ -89,15 +89,86 @@ impl Variable { /// let result = variable.complete(); /// assert_eq!(result.len(), 121); /// ``` - pub fn from_join<'me, K: Ord, V1: Ord, V2: Ord>( + pub fn from_join<'me, Key: Ord, Value1: Ord, Value2: Ord>( &self, - input1: &'me Variable<(K, V1)>, - input2: impl JoinInput<'me, (K, V2)>, - logic: impl FnMut(&K, &V1, &V2) -> Tuple, + input1: &'me Variable<(Key, Value1)>, + input2: impl JoinInput<'me, (Key, Value2)>, + logic: impl FnMut(&Key, &Value1, &Value2) -> Tuple, ) { join::join_into(input1, input2, self, logic) } + /// Adds tuples that result from joining `input1` and `input2` -- + /// each of the inputs must be a set of tuples. Both + /// `input1` and `input2` must have the same type of key (`Key`) + /// when seen through `accessor1` and `accessor2` respectively, but + /// they can have distinct value types (`Value1` and `Value2` + /// respectively). The `logic` closure will be invoked for each + /// key that appears in both inputs; it is also given the two + /// values, and from those it should construct the resulting + /// value. + /// + /// Note that `input1` must be a variable, but `input2` can be a + /// relation or a variable. Therefore, you cannot join two + /// relations with this method. This is not because the result + /// would be wrong, but because it would be inefficient: the + /// result from such a join cannot vary across iterations (as + /// relations are fixed), so you should prefer to invoke `insert` + /// on a relation created by `Relation::from_join` instead. + /// + /// # Examples + /// + /// This example starts a collection with tuples consisting of a name, a number and a boolean flag. + /// It creates a second collection with the names of all tuples whose number is found in the relation + /// which should leave us with 2 total tuples. + /// + /// Notice how we are able to use variables and relations containing tuples of arbitrary arity + /// and how we can pick elements as keys that are not at position 0 within the tuple. + /// + /// ``` + /// let mut iteration = Iteration::new(); + /// + /// let variable = iteration.variable::<(&'static str, usize, bool)>("variable"); + /// + /// variable.extend(vec![ + /// ("Alice", 0, true), + /// ("Bob", 1, false), + /// ("Eve", 2, false), + /// ("Mallory", 3, true), + /// ("Trent", 4, false), + /// ]); + /// + /// let names_with_true = iteration.variable::<&'static str>("names with true"); + /// + /// let relation: Relation<(usize, bool)> = Relation::from_vec(vec![(0, true), (2, false)]); + /// + /// while iteration.changed() { + /// names_with_true.from_join_by( + /// &variable, + /// &relation, + /// |(_name, number, _flag)| number, + /// |(number, _flag)| number, + /// |(name1, _number1, _flag1), (_number2, _flag2)| name1.clone(), + /// ); + /// } + /// + /// let result = names_with_true.complete(); + /// assert_eq!(result.len(), 2); + /// ``` + pub fn from_join_by<'me, Key: Ord, Tuple1: Ord, Tuple2: Ord, Accessor1, Accessor2>( + &self, + input1: &'me Variable, + input2: impl JoinInput<'me, Tuple2>, + accessor1: Accessor1, + accessor2: Accessor2, + logic: impl FnMut(&Tuple1, &Tuple2) -> Tuple, + ) where + Accessor1: Fn(&Tuple1) -> &Key, + Accessor2: Fn(&Tuple2) -> &Key, + { + join::join_into_by(input1, input2, self, accessor1, accessor2, logic) + } + /// Adds tuples from `input1` whose key is not present in `input2`. /// /// Note that `input1` must be a variable: if you have a relation @@ -127,15 +198,79 @@ impl Variable { /// let result = variable.complete(); /// assert_eq!(result.len(), 16); /// ``` - pub fn from_antijoin( + pub fn from_antijoin( &self, - input1: &Variable<(K, V)>, - input2: &Relation, - logic: impl FnMut(&K, &V) -> Tuple, + input1: &Variable<(Key, Value)>, + input2: &Relation, + logic: impl FnMut(&Key, &Value) -> Tuple, ) { self.insert(join::antijoin(input1, input2, logic)) } + /// Adds tuples from `input1` whose key is not present in `input2`. + /// + /// Note that `input1` must be a variable: if you have a relation + /// instead, you can use `Relation::from_antijoin` and then + /// `Variable::insert`. Note that the result will not vary during + /// the iteration. + /// + /// # Examples + /// + /// This example starts a collection with tuples consisting of a name, a number and a boolean flag. + /// It creates a second collection with the names of all tuples whose number is not found in the relation + /// which should leave us with 3 total tuples. + /// + /// Notice how we are able to use variables and relations containing tuples of arbitrary arity + /// and how we can pick elements as keys that are not at position 0 within the tuple. + /// + /// ``` + /// use datafrog::{Iteration, Relation}; + /// + /// let mut iteration = Iteration::new(); + /// + /// let variable = iteration.variable::<(&'static str, usize, bool)>("variable"); + /// + /// variable.extend(vec![ + /// ("Alice", 0, true), + /// ("Bob", 1, false), + /// ("Eve", 2, false), + /// ("Mallory", 3, true), + /// ("Trent", 4, false), + /// ]); + /// + /// let names = iteration.variable::<&'static str>("names"); + /// + /// let relation: Relation<(usize, bool)> = Relation::from_vec(vec![(0, true), (2, false)]); + /// + /// while iteration.changed() { + /// names.from_antijoin_by( + /// &variable, + /// &relation, + /// |(_name, number, _flag)| number, + /// |(number, _flag)| number, + /// |(name, _number, _flag)| name.clone(), + /// ); + /// } + /// + /// let result = names.complete(); + /// assert_eq!(result.len(), 3); + /// ``` + pub fn from_antijoin_by( + &self, + input1: &Variable, + input2: &Relation, + accessor1: Accessor1, + accessor2: Accessor2, + logic: impl FnMut(&Tuple1) -> Tuple, + ) where + Accessor1: Fn(&Tuple1) -> &Key, + Accessor2: Fn(&Tuple2) -> &Key, + { + self.insert(join::antijoin_by( + input1, input2, accessor1, accessor2, logic, + )) + } + /// Adds tuples that result from mapping `input`. /// /// # Examples @@ -165,7 +300,11 @@ impl Variable { /// let result = variable.complete(); /// assert_eq!(result.len(), 74); /// ``` - pub fn from_map(&self, input: &Variable, logic: impl FnMut(&T2) -> Tuple) { + pub fn from_map( + &self, + input: &Variable, + logic: impl FnMut(&Tuple2) -> Tuple, + ) { map::map_into(input, self, logic) } @@ -185,14 +324,14 @@ impl Variable { /// `leapers`, each of which is derived from a fixed relation. The `leapers` /// should be either a single leaper (of suitable type) or else a tuple of leapers. /// You can create a leaper in one of two ways: - /// - Extension: In this case, you have a relation of type `(K, Val)` for some - /// type `K`. You provide a closure that maps from `SourceTuple` to the key - /// `K`. If you use `relation.extend_with`, then any `Val` values the + /// - Extension: In this case, you have a relation of type `(Key, Val)` for some + /// type `Key`. You provide a closure that maps from `SourceTuple` to the key + /// `Key`. If you use `relation.extend_with`, then any `Val` values the /// relation provides will be added to the set of values; if you use /// `extend_anti`, then the `Val` values will be removed. - /// - Filtering: In this case, you have a relation of type `K` for some - /// type `K` and you provide a closure that maps from `SourceTuple` to - /// the key `K`. Filters don't provide values but they remove source + /// - Filtering: In this case, you have a relation of type `Key` for some + /// type `Key` and you provide a closure that maps from `SourceTuple` to + /// the key `Key`. Filters don't provide values but they remove source /// tuples. /// - Finally, you get a callback `logic` that accepts each `(SourceTuple, Val)` /// that was successfully joined (and not filtered) and which maps to the