@@ -771,7 +771,7 @@ Status PipelineXFragmentContext::_create_tree_helper(
771
771
ObjectPool* pool, const std::vector<TPlanNode>& tnodes,
772
772
const doris::TPipelineFragmentParams& request, const DescriptorTbl& descs,
773
773
OperatorXPtr parent, int * node_idx, OperatorXPtr* root, PipelinePtr& cur_pipe,
774
- int child_idx, const bool followed_by_shuffled_join ) {
774
+ int child_idx, const bool followed_by_shuffled_operator ) {
775
775
// propagate error case
776
776
if (*node_idx >= tnodes.size ()) {
777
777
// TODO: print thrift msg
@@ -782,11 +782,11 @@ Status PipelineXFragmentContext::_create_tree_helper(
782
782
const TPlanNode& tnode = tnodes[*node_idx];
783
783
784
784
int num_children = tnodes[*node_idx].num_children ;
785
- bool current_followed_by_shuffled_join = followed_by_shuffled_join ;
785
+ bool current_followed_by_shuffled_operator = followed_by_shuffled_operator ;
786
786
OperatorXPtr op = nullptr ;
787
787
RETURN_IF_ERROR (_create_operator (pool, tnodes[*node_idx], request, descs, op, cur_pipe,
788
788
parent == nullptr ? -1 : parent->node_id (), child_idx,
789
- followed_by_shuffled_join ));
789
+ followed_by_shuffled_operator ));
790
790
791
791
// assert(parent != nullptr || (node_idx == 0 && root_expr != nullptr));
792
792
if (parent != nullptr ) {
@@ -797,7 +797,7 @@ Status PipelineXFragmentContext::_create_tree_helper(
797
797
}
798
798
799
799
/* *
800
- * `ExchangeType::HASH_SHUFFLE` should be used if an operator is followed by a shuffled hash join.
800
+ * `ExchangeType::HASH_SHUFFLE` should be used if an operator is followed by a shuffled operator (shuffled hash join, union operator followed by co-located operators) .
801
801
*
802
802
* For plan:
803
803
* LocalExchange(id=0) -> Aggregation(id=1) -> ShuffledHashJoin(id=2)
@@ -811,8 +811,8 @@ Status PipelineXFragmentContext::_create_tree_helper(
811
811
cur_pipe->operator_xs ().empty ()
812
812
? cur_pipe->sink_x ()->require_shuffled_data_distribution ()
813
813
: op->require_shuffled_data_distribution ();
814
- current_followed_by_shuffled_join =
815
- (followed_by_shuffled_join || op->is_shuffled_hash_join ()) &&
814
+ current_followed_by_shuffled_operator =
815
+ (followed_by_shuffled_operator || op->is_shuffled_operator ()) &&
816
816
require_shuffled_data_distribution;
817
817
818
818
cur_pipe->_name .push_back (' -' );
@@ -823,7 +823,7 @@ Status PipelineXFragmentContext::_create_tree_helper(
823
823
for (int i = 0 ; i < num_children; i++) {
824
824
++*node_idx;
825
825
RETURN_IF_ERROR (_create_tree_helper (pool, tnodes, request, descs, op, node_idx, nullptr ,
826
- cur_pipe, i, current_followed_by_shuffled_join ));
826
+ cur_pipe, i, current_followed_by_shuffled_operator ));
827
827
828
828
// we are expecting a child, but have used all nodes
829
829
// this means we have been given a bad tree and must fail
@@ -865,13 +865,13 @@ Status PipelineXFragmentContext::_add_local_exchange_impl(
865
865
* `bucket_seq_to_instance_idx` is empty if no scan operator is contained in this fragment.
866
866
* So co-located operators(e.g. Agg, Analytic) should use `HASH_SHUFFLE` instead of `BUCKET_HASH_SHUFFLE`.
867
867
*/
868
- const bool followed_by_shuffled_join =
869
- operator_xs.size () > idx ? operator_xs[idx]->followed_by_shuffled_join ()
870
- : cur_pipe->sink_x ()->followed_by_shuffled_join ();
868
+ const bool followed_by_shuffled_operator =
869
+ operator_xs.size () > idx ? operator_xs[idx]->followed_by_shuffled_operator ()
870
+ : cur_pipe->sink_x ()->followed_by_shuffled_operator ();
871
871
const bool should_disable_bucket_shuffle =
872
872
bucket_seq_to_instance_idx.empty () &&
873
873
shuffle_idx_to_instance_idx.find (-1 ) == shuffle_idx_to_instance_idx.end () &&
874
- followed_by_shuffled_join ;
874
+ followed_by_shuffled_operator ;
875
875
sink.reset (new LocalExchangeSinkOperatorX (
876
876
sink_id, local_exchange_id,
877
877
should_disable_bucket_shuffle ? _total_instances : _num_instances,
@@ -1047,7 +1047,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1047
1047
const DescriptorTbl& descs, OperatorXPtr& op,
1048
1048
PipelinePtr& cur_pipe, int parent_idx,
1049
1049
int child_idx,
1050
- const bool followed_by_shuffled_join ) {
1050
+ const bool followed_by_shuffled_operator ) {
1051
1051
// We directly construct the operator from Thrift because the given array is in the order of preorder traversal.
1052
1052
// Therefore, here we need to use a stack-like structure.
1053
1053
_pipeline_parent_map.pop (cur_pipe, parent_idx, child_idx);
@@ -1121,7 +1121,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1121
1121
op.reset (new DistinctStreamingAggOperatorX (pool, next_operator_id (), tnode, descs,
1122
1122
_require_bucket_distribution));
1123
1123
RETURN_IF_ERROR (cur_pipe->add_operator (op));
1124
- op->set_followed_by_shuffled_join (followed_by_shuffled_join );
1124
+ op->set_followed_by_shuffled_operator (followed_by_shuffled_operator );
1125
1125
_require_bucket_distribution =
1126
1126
_require_bucket_distribution || op->require_data_distribution ();
1127
1127
} else if (tnode.agg_node .__isset .use_streaming_preaggregation &&
@@ -1152,7 +1152,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1152
1152
sink.reset (new AggSinkOperatorX (pool, next_sink_operator_id (), tnode, descs,
1153
1153
_require_bucket_distribution));
1154
1154
}
1155
- sink->set_followed_by_shuffled_join (followed_by_shuffled_join );
1155
+ sink->set_followed_by_shuffled_operator (followed_by_shuffled_operator );
1156
1156
_require_bucket_distribution =
1157
1157
_require_bucket_distribution || sink->require_data_distribution ();
1158
1158
sink->set_dests_id ({op->operator_id ()});
@@ -1203,8 +1203,8 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1203
1203
1204
1204
_pipeline_parent_map.push (op->node_id (), cur_pipe);
1205
1205
_pipeline_parent_map.push (op->node_id (), build_side_pipe);
1206
- sink->set_followed_by_shuffled_join (sink->is_shuffled_hash_join ());
1207
- op->set_followed_by_shuffled_join (op->is_shuffled_hash_join ());
1206
+ sink->set_followed_by_shuffled_operator (sink->is_shuffled_operator ());
1207
+ op->set_followed_by_shuffled_operator (op->is_shuffled_operator ());
1208
1208
} else {
1209
1209
op.reset (new HashJoinProbeOperatorX (pool, tnode, next_operator_id (), descs));
1210
1210
RETURN_IF_ERROR (cur_pipe->add_operator (op));
@@ -1225,8 +1225,8 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1225
1225
1226
1226
_pipeline_parent_map.push (op->node_id (), cur_pipe);
1227
1227
_pipeline_parent_map.push (op->node_id (), build_side_pipe);
1228
- sink->set_followed_by_shuffled_join (sink->is_shuffled_hash_join ());
1229
- op->set_followed_by_shuffled_join (op->is_shuffled_hash_join ());
1228
+ sink->set_followed_by_shuffled_operator (sink->is_shuffled_operator ());
1229
+ op->set_followed_by_shuffled_operator (op->is_shuffled_operator ());
1230
1230
}
1231
1231
_require_bucket_distribution =
1232
1232
_require_bucket_distribution || op->require_data_distribution ();
@@ -1256,6 +1256,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1256
1256
case TPlanNodeType::UNION_NODE: {
1257
1257
int child_count = tnode.num_children ;
1258
1258
op.reset (new UnionSourceOperatorX (pool, tnode, next_operator_id (), descs));
1259
+ op->set_followed_by_shuffled_operator (_require_bucket_distribution);
1259
1260
RETURN_IF_ERROR (cur_pipe->add_operator (op));
1260
1261
1261
1262
const auto downstream_pipeline_id = cur_pipe->id ();
@@ -1298,7 +1299,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1298
1299
sink.reset (new SortSinkOperatorX (pool, next_sink_operator_id (), tnode, descs,
1299
1300
_require_bucket_distribution));
1300
1301
}
1301
- sink->set_followed_by_shuffled_join (followed_by_shuffled_join );
1302
+ sink->set_followed_by_shuffled_operator (followed_by_shuffled_operator );
1302
1303
_require_bucket_distribution =
1303
1304
_require_bucket_distribution || sink->require_data_distribution ();
1304
1305
sink->set_dests_id ({op->operator_id ()});
@@ -1338,7 +1339,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1338
1339
DataSinkOperatorXPtr sink;
1339
1340
sink.reset (new AnalyticSinkOperatorX (pool, next_sink_operator_id (), tnode, descs,
1340
1341
_require_bucket_distribution));
1341
- sink->set_followed_by_shuffled_join (followed_by_shuffled_join );
1342
+ sink->set_followed_by_shuffled_operator (followed_by_shuffled_operator );
1342
1343
_require_bucket_distribution =
1343
1344
_require_bucket_distribution || sink->require_data_distribution ();
1344
1345
sink->set_dests_id ({op->operator_id ()});
@@ -1349,11 +1350,13 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1349
1350
case TPlanNodeType::INTERSECT_NODE: {
1350
1351
RETURN_IF_ERROR (_build_operators_for_set_operation_node<true >(
1351
1352
pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
1353
+ op->set_followed_by_shuffled_operator (_require_bucket_distribution);
1352
1354
break ;
1353
1355
}
1354
1356
case TPlanNodeType::EXCEPT_NODE: {
1355
1357
RETURN_IF_ERROR (_build_operators_for_set_operation_node<false >(
1356
1358
pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
1359
+ op->set_followed_by_shuffled_operator (_require_bucket_distribution);
1357
1360
break ;
1358
1361
}
1359
1362
case TPlanNodeType::REPEAT_NODE: {
0 commit comments