Skip to content

Commit e93604e

Browse files
mrhhsgwyxxxcat
authored andcommitted
[fix](ub) undefined behavior in FixedContainer (apache#39191)
## Proposed changes Undefined behavior occurs if there is a null value in the list. ``` /root/doris/be/src/vec/common/string_ref.h:271:54: runtime error: null pointer passed as argument 2, which is declared to never be null /var/local/ldb-toolchain/bin/../usr/include/string.h:64:33: note: nonnull attribute specified here #0 0x5616d072245d in doris::StringRef::eq(doris::StringRef const&) const /root/doris/be/src/vec/common/string_ref.h:271:41 #1 0x5616d072245d in doris::StringRef::operator==(doris::StringRef const&) const /root/doris/be/src/vec/common/string_ref.h:274:60 apache#2 0x5616d072245d in doris::FixedContainer::find(doris::StringRef const&) const /root/doris/be/src/exprs/hybrid_set.h:76:36 apache#3 0x5616d072245d in void doris::StringValueSet>::_find_batch(doris::vectorized::IColumn const&, unsigned long, doris::vectorized::PODArray, 16ul, 15ul> const*, doris::vectorized::PODArray, 16ul, 15ul>&) /root/doris/be/src/exprs/hybrid_set.h:688:63 apache#4 0x5616d0747857 in doris::vectorized::FunctionIn::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long) const /root/doris/be/src/vec/functions/in.h:170:21 apache#5 0x5616c741fa3a in doris::vectorized::DefaultExecutable::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long) const /root/doris/be/src/vec/functions/function.h:462:26 apache#6 0x5616cbb5b650 in doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp apache#7 0x5616cbb4e14e in doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:244:12 apache#8 0x5616cbb4e3c2 in doris::vectorized::PreparedFunctionImpl::execute(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:250:12 apache#9 0x5616c741cd68 in doris::vectorized::IFunctionBase::execute(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.h:190:19 apache#10 0x5616c74cf712 in doris::vectorized::VInPredicate::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vin_predicate.cpp:130:5 apache#11 0x5616c740d5c0 in doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*, std::vector>&) /root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:183:9 apache#12 0x5616c740ecf5 in doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:215:12 apache#13 0x5616c7462e24 in doris::vectorized::VCompoundPred::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vcompound_pred.h:127:38 apache#14 0x5616c74bccec in doris::vectorized::VExprContext::execute(doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vexpr_context.cpp:54:5 apache#15 0x5616c74c1dcc in doris::vectorized::VExprContext::execute_conjuncts(std::vector, std::allocator>> const&, std::vector, 16ul, 15ul>, std::allocator, 16ul, 15ul>>> const*, bool, doris::vectorized::Block*, doris::vectorized::PODArray, 16ul, 15ul>, bool) /root/doris/be/src/vec/exprs/vexpr_context.cpp:169:9 apache#16 0x5616c74c5108 in doris::vectorized::VExprContext::execute_conjuncts_and_filter_block(std::vector, std::allocator>> const&, doris::vectorized::Block*, std::vector>&, int, doris::vectorized::PODArray, 16ul, 15ul>&) /root/doris/be/src/vec/exprs/vexpr_context.cpp:322:5 apache#17 0x5616ad8a7f1a in doris::segment_v2::SegmentIterator::_execute_common_expr(unsigned short*, unsigned short&, doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2680:5 apache#18 0x5616ad89e86e in doris::segment_v2::SegmentIterator::_next_batch_internal(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2582:25 apache#19 0x5616ad892f5c in doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*)::$_0::operator()() const /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2315:9 apache#20 0x5616ad892f5c in doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2314:19 apache#21 0x5616ad6dd9cc in doris::segment_v2::LazyInitSegmentIterator::next_batch(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.h:44:33 apache#22 0x5616ad269d67 in doris::BetaRowsetReader::next_block(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/beta_rowset_reader.cpp:380:29 apache#23 0x5616de6de110 in doris::vectorized::VCollectIterator::Level0Iterator::_refresh() /root/doris/be/src/vec/olap/vcollect_iterator.h apache#24 0x5616de6c967f in doris::vectorized::VCollectIterator::Level0Iterator::refresh_current_row() /root/doris/be/src/vec/olap/vcollect_iterator.cpp:514:24 apache#25 0x5616de6ca8a6 in doris::vectorized::VCollectIterator::Level0Iterator::ensure_first_row_ref() /root/doris/be/src/vec/olap/vcollect_iterator.cpp:493:14 apache#26 0x5616de6d7008 in doris::vectorized::VCollectIterator::Level1Iterator::ensure_first_row_ref() /root/doris/be/src/vec/olap/vcollect_iterator.cpp:692:27 apache#27 0x5616de6bd200 in doris::vectorized::VCollectIterator::build_heap(std::vector, std::allocator>>&) /root/doris/be/src/vec/olap/vcollect_iterator.cpp:186:9 apache#28 0x5616de651b6c in doris::vectorized::BlockReader::_init_collect_iter(doris::TabletReader::ReaderParams const&) /root/doris/be/src/vec/olap/block_reader.cpp:157:5 apache#29 0x5616de65526f in doris::vectorized::BlockReader::init(doris::TabletReader::ReaderParams const&) /root/doris/be/src/vec/olap/block_reader.cpp:229:19 apache#30 0x5616e175a0f9 in doris::vectorized::NewOlapScanner::open(doris::RuntimeState*) /root/doris/be/src/vec/exec/scan/new_olap_scanner.cpp:237:32 apache#31 0x5616c736ad34 in doris::vectorized::ScannerScheduler::_scanner_scan(std::shared_ptr, std::shared_ptr) /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:236:5 apache#32 0x5616c736f05e in doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()::operator()() const::'lambda'()::operator()() const /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:176:21 apache#33 0x5616c736f05e in doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()::operator()() const /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:175:31 apache#34 0x5616c736f05e in void std::_invoke_impl, std::shared_ptr)::$_1::operator()() const::'lambda'()&>(std::_invoke_other, doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61:14 apache#35 0x5616c736f05e in std::enable_if, std::shared_ptr)::$1::operator()() const::'lambda'()&>, void>::type std::_invoke_r, std::shared_ptr)::$_1::operator()() const::'lambda'()&>(doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111:2 apache#36 0x5616c736f05e in std::_Function_handler, std::shared_ptr)::$_1::operator()() const::'lambda'()>::_M_invoke(std::_Any_data const&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291:9 apache#37 0x5616aeed6a3b in doris::ThreadPool::dispatch_thread() /root/doris/be/src/util/threadpool.cpp:543:24 apache#38 0x5616aeeae4f7 in doris::Thread::supervise_thread(void*) /root/doris/be/src/util/thread.cpp:498:5 apache#39 0x7f7e663e3ac2 in start_thread nptl/pthread_create.c:442:8 apache#40 0x7f7e6647584f misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /root/doris/be/src/vec/common/string_ref.h:271:54 in ```
1 parent 2f05c3a commit e93604e

File tree

4 files changed

+61
-1
lines changed

4 files changed

+61
-1
lines changed

be/src/exprs/hybrid_set.h

+35
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,13 @@
1717

1818
#pragma once
1919

20+
#include <glog/logging.h>
21+
22+
#include <type_traits>
23+
24+
#include "common/exception.h"
2025
#include "common/object_pool.h"
26+
#include "common/status.h"
2127
#include "exprs/runtime_filter.h"
2228
#include "runtime/decimalv2_value.h"
2329
#include "runtime/define_primitive_type.h"
@@ -60,8 +66,16 @@ class FixedContainer {
6066
}
6167
}
6268

69+
void check_size() {
70+
if (N != _size) {
71+
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
72+
"invalid size of FixedContainer<{}>: {}", N, _size);
73+
}
74+
}
75+
6376
// Use '|' instead of '||' has better performance by test.
6477
ALWAYS_INLINE bool find(const T& value) const {
78+
DCHECK_EQ(N, _size);
6579
if constexpr (N == 0) {
6680
return false;
6781
}
@@ -144,6 +158,12 @@ class FixedContainer {
144158
size_t _size {};
145159
};
146160

161+
template <typename T>
162+
struct IsFixedContainer : std::false_type {};
163+
164+
template <typename T, size_t N>
165+
struct IsFixedContainer<FixedContainer<T, N>> : std::true_type {};
166+
147167
/**
148168
* Dynamic Container uses phmap::flat_hash_set.
149169
* @tparam T Element Type
@@ -354,6 +374,11 @@ class HybridSet : public HybridSetBase {
354374
if constexpr (is_nullable) {
355375
null_map_data = null_map->data();
356376
}
377+
378+
if constexpr (IsFixedContainer<ContainerType>::value) {
379+
_set.check_size();
380+
}
381+
357382
auto* __restrict result_data = results.data();
358383
for (size_t i = 0; i < rows; ++i) {
359384
if constexpr (!is_nullable && !is_negative) {
@@ -507,6 +532,11 @@ class StringSet : public HybridSetBase {
507532
if constexpr (is_nullable) {
508533
null_map_data = null_map->data();
509534
}
535+
536+
if constexpr (IsFixedContainer<ContainerType>::value) {
537+
_set.check_size();
538+
}
539+
510540
auto* __restrict result_data = results.data();
511541
for (size_t i = 0; i < rows; ++i) {
512542
const auto& string_data = col.get_data_at(i).to_string();
@@ -675,6 +705,11 @@ class StringValueSet : public HybridSetBase {
675705
if constexpr (is_nullable) {
676706
null_map_data = null_map->data();
677707
}
708+
709+
if constexpr (IsFixedContainer<ContainerType>::value) {
710+
_set.check_size();
711+
}
712+
678713
auto* __restrict result_data = results.data();
679714
for (size_t i = 0; i < rows; ++i) {
680715
uint32_t len = offset[i] - offset[i - 1];

be/src/vec/functions/in.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ class FunctionIn : public IFunction {
114114
context->get_arg_type(0)->type == PrimitiveType::TYPE_VARCHAR ||
115115
context->get_arg_type(0)->type == PrimitiveType::TYPE_STRING) {
116116
// the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
117-
state->hybrid_set.reset(create_string_value_set((size_t)(context->get_num_args() - 1)));
117+
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context)));
118118
} else {
119119
state->hybrid_set.reset(
120120
create_set(context->get_arg_type(0)->type, get_size_with_out_null(context)));

regression-test/data/nereids_syntax_p0/inpredicate.out

+9
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,12 @@
3131
29 Supplier#000000029 VVSymB3fbwaN ARGENTINA4 ARGENTINA AMERICA 11-773-203-7342
3232
9 Supplier#000000009 ,gJ6K2MKveYxQT IRAN 6 IRAN MIDDLE EAST 20-338-906-3675
3333

34+
-- !in_predicate_11 --
35+
15 Supplier#000000015 DF35PepL5saAK INDIA 0 INDIA ASIA 18-687-542-7601
36+
37+
-- !in_predicate_12 --
38+
39+
-- !in_predicate_13 --
40+
41+
-- !in_predicate_14 --
42+

regression-test/suites/nereids_syntax_p0/inpredicate.groovy

+16
Original file line numberDiff line numberDiff line change
@@ -61,5 +61,21 @@ suite("inpredicate") {
6161
order_qt_in_predicate_10 """
6262
SELECT * FROM supplier WHERE s_suppkey not in (15);
6363
"""
64+
65+
order_qt_in_predicate_11 """
66+
SELECT * FROM supplier WHERE s_suppkey in (15, null);
67+
"""
68+
69+
order_qt_in_predicate_12 """
70+
SELECT * FROM supplier WHERE s_suppkey not in (15, null);
71+
"""
72+
73+
order_qt_in_predicate_13 """
74+
SELECT * FROM supplier WHERE s_nation in ('PERU', 'ETHIOPIA', null);
75+
"""
76+
77+
order_qt_in_predicate_14 """
78+
SELECT * FROM supplier WHERE s_nation not in ('PERU', 'ETHIOPIA', null);
79+
"""
6480
}
6581

0 commit comments

Comments
 (0)