Skip to content

Commit cecd214

Browse files
zclllyybbmrhhsg
andauthored
[branch-2.1](Column) refactor ColumnNullable to provide flags safety (apache#40769) (apache#40848)
pick apache#40769 Co-authored-by: Jerry Hu <[email protected]>
1 parent 963415c commit cecd214

File tree

8 files changed

+426
-100
lines changed

8 files changed

+426
-100
lines changed

be/src/vec/columns/column_nullable.cpp

+37-48
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,19 @@
3131
namespace doris::vectorized {
3232

3333
ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnPtr&& null_map_)
34-
: nested_column(std::move(nested_column_)), null_map(std::move(null_map_)) {
34+
: NullMapProvider(std::move(null_map_)), nested_column(std::move(nested_column_)) {
3535
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
3636
nested_column = get_nested_column().convert_to_full_column_if_const();
3737

3838
// after convert const column to full column, it may be a nullable column
3939
if (nested_column->is_nullable()) {
40-
assert_cast<ColumnNullable&>(*nested_column).apply_null_map((const ColumnUInt8&)*null_map);
41-
null_map = assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr();
40+
assert_cast<ColumnNullable&>(*nested_column)
41+
.apply_null_map(static_cast<const ColumnUInt8&>(get_null_map_column()));
42+
reset_null_map(assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr());
4243
nested_column = assert_cast<ColumnNullable&>(*nested_column).get_nested_column_ptr();
4344
}
4445

45-
if (is_column_const(*null_map)) {
46+
if (is_column_const(get_null_map_column())) [[unlikely]] {
4647
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
4748
"ColumnNullable cannot have constant null map");
4849
__builtin_unreachable();
@@ -69,7 +70,7 @@ void ColumnNullable::update_xxHash_with_value(size_t start, size_t end, uint64_t
6970
nested_column->update_xxHash_with_value(start, end, hash, nullptr);
7071
} else {
7172
const auto* __restrict real_null_data =
72-
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
73+
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
7374
for (int i = start; i < end; ++i) {
7475
if (real_null_data[i] != 0) {
7576
hash = HashUtil::xxHash64NullWithSeed(hash);
@@ -85,7 +86,7 @@ void ColumnNullable::update_crc_with_value(size_t start, size_t end, uint32_t& h
8586
nested_column->update_crc_with_value(start, end, hash, nullptr);
8687
} else {
8788
const auto* __restrict real_null_data =
88-
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
89+
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
8990
for (int i = start; i < end; ++i) {
9091
if (real_null_data[i] != 0) {
9192
hash = HashUtil::zlib_crc_hash_null(hash);
@@ -110,7 +111,7 @@ void ColumnNullable::update_crcs_with_value(uint32_t* __restrict hashes, doris::
110111
auto s = rows;
111112
DCHECK(s == size());
112113
const auto* __restrict real_null_data =
113-
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
114+
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
114115
if (!has_null()) {
115116
nested_column->update_crcs_with_value(hashes, type, rows, offset, nullptr);
116117
} else {
@@ -128,7 +129,7 @@ void ColumnNullable::update_hashes_with_value(uint64_t* __restrict hashes,
128129
DCHECK(null_data == nullptr);
129130
auto s = size();
130131
const auto* __restrict real_null_data =
131-
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
132+
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
132133
if (!has_null()) {
133134
nested_column->update_hashes_with_value(hashes, nullptr);
134135
} else {
@@ -183,24 +184,24 @@ StringRef ColumnNullable::get_data_at(size_t n) const {
183184
void ColumnNullable::insert_data(const char* pos, size_t length) {
184185
if (pos == nullptr) {
185186
get_nested_column().insert_default();
186-
_get_null_map_data().push_back(1);
187+
get_null_map_data().push_back(1);
187188
_has_null = true;
189+
_need_update_has_null = false;
188190
} else {
189191
get_nested_column().insert_data(pos, length);
190-
_get_null_map_data().push_back(0);
192+
_push_false_to_nullmap(1);
191193
}
192194
}
193195

194196
void ColumnNullable::insert_many_strings(const StringRef* strings, size_t num) {
195-
auto& null_map_data = _get_null_map_data();
196197
for (size_t i = 0; i != num; ++i) {
197198
if (strings[i].data == nullptr) {
198199
nested_column->insert_default();
199-
null_map_data.push_back(1);
200+
get_null_map_data().push_back(1);
200201
_has_null = true;
201202
} else {
202203
nested_column->insert_data(strings[i].data, strings[i].size);
203-
null_map_data.push_back(0);
204+
_push_false_to_nullmap(1);
204205
}
205206
}
206207
}
@@ -227,13 +228,14 @@ const char* ColumnNullable::deserialize_and_insert_from_arena(const char* pos) {
227228
UInt8 val = *reinterpret_cast<const UInt8*>(pos);
228229
pos += sizeof(val);
229230

230-
_get_null_map_data().push_back(val);
231+
get_null_map_data().push_back(val);
231232

232233
if (val == 0) {
233234
pos = get_nested_column().deserialize_and_insert_from_arena(pos);
234235
} else {
235236
get_nested_column().insert_default();
236237
_has_null = true;
238+
_need_update_has_null = false;
237239
}
238240

239241
return pos;
@@ -265,7 +267,7 @@ void ColumnNullable::serialize_vec(std::vector<StringRef>& keys, size_t num_rows
265267
}
266268

267269
void ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const size_t num_rows) {
268-
auto& arr = _get_null_map_data();
270+
auto& arr = get_null_map_data();
269271
const size_t old_size = arr.size();
270272
arr.resize(old_size + num_rows);
271273

@@ -288,31 +290,24 @@ void ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const size_t
288290
void ColumnNullable::insert_range_from_ignore_overflow(const doris::vectorized::IColumn& src,
289291
size_t start, size_t length) {
290292
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
291-
_get_null_map_column().insert_range_from(*nullable_col.null_map, start, length);
293+
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(), start, length);
292294
get_nested_column().insert_range_from_ignore_overflow(*nullable_col.nested_column, start,
293295
length);
294-
const auto& src_null_map_data = nullable_col.get_null_map_data();
295-
_has_null = has_null();
296-
_has_null |= simd::contain_byte(src_null_map_data.data() + start, length, 1);
297296
}
298297

299298
void ColumnNullable::insert_range_from(const IColumn& src, size_t start, size_t length) {
300299
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
301-
_get_null_map_column().insert_range_from(*nullable_col.null_map, start, length);
300+
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(), start, length);
302301
get_nested_column().insert_range_from(*nullable_col.nested_column, start, length);
303-
const auto& src_null_map_data = nullable_col.get_null_map_data();
304-
_has_null = has_null();
305-
_has_null |= simd::contain_byte(src_null_map_data.data() + start, length, 1);
306302
}
307303

308304
void ColumnNullable::insert_indices_from(const IColumn& src, const uint32_t* indices_begin,
309305
const uint32_t* indices_end) {
310306
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
311307
get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin,
312308
indices_end);
313-
_get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin,
314-
indices_end);
315-
_need_update_has_null = true;
309+
get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin,
310+
indices_end);
316311
}
317312

318313
void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
@@ -321,37 +316,37 @@ void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
321316
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
322317
get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin,
323318
indices_end);
324-
_get_null_map_column().insert_many_defaults(indices_end - indices_begin);
319+
_push_false_to_nullmap(indices_end - indices_begin);
325320
}
326321

327322
void ColumnNullable::insert(const Field& x) {
328323
if (x.is_null()) {
329324
get_nested_column().insert_default();
330-
_get_null_map_data().push_back(1);
325+
get_null_map_data().push_back(1);
331326
_has_null = true;
327+
_need_update_has_null = false;
332328
} else {
333329
get_nested_column().insert(x);
334-
_get_null_map_data().push_back(0);
330+
_push_false_to_nullmap(1);
335331
}
336332
}
337333

338334
void ColumnNullable::insert_from(const IColumn& src, size_t n) {
339335
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
340336
get_nested_column().insert_from(src_concrete.get_nested_column(), n);
341337
auto is_null = src_concrete.get_null_map_data()[n];
342-
_has_null |= is_null;
343-
_get_null_map_data().push_back(is_null);
338+
get_null_map_data().push_back(is_null);
344339
}
345340

346341
void ColumnNullable::insert_from_not_nullable(const IColumn& src, size_t n) {
347342
get_nested_column().insert_from(src, n);
348-
_get_null_map_data().push_back(0);
343+
_push_false_to_nullmap(1);
349344
}
350345

351346
void ColumnNullable::insert_range_from_not_nullable(const IColumn& src, size_t start,
352347
size_t length) {
353348
get_nested_column().insert_range_from(src, start, length);
354-
_get_null_map_data().resize_fill(_get_null_map_data().size() + length, 0);
349+
_push_false_to_nullmap(length);
355350
}
356351

357352
void ColumnNullable::insert_many_from_not_nullable(const IColumn& src, size_t position,
@@ -380,15 +375,14 @@ size_t ColumnNullable::filter(const Filter& filter) {
380375
}
381376

382377
Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) {
383-
const auto* nullable_col_ptr = reinterpret_cast<const ColumnNullable*>(col_ptr);
378+
auto* nullable_col_ptr = assert_cast<ColumnNullable*>(col_ptr);
384379
ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column;
385-
ColumnPtr null_map_ptr = nullable_col_ptr->null_map;
380+
381+
/// `get_null_map_data` will set `_need_update_has_null` to true
382+
auto& res_nullmap = nullable_col_ptr->get_null_map_data();
383+
386384
RETURN_IF_ERROR(get_nested_column().filter_by_selector(
387385
sel, sel_size, const_cast<doris::vectorized::IColumn*>(nest_col_ptr.get())));
388-
//insert cur nullmap into result nullmap which is empty
389-
auto& res_nullmap = reinterpret_cast<vectorized::ColumnVector<UInt8>*>(
390-
const_cast<doris::vectorized::IColumn*>(null_map_ptr.get()))
391-
->get_data();
392386
DCHECK(res_nullmap.empty());
393387
res_nullmap.resize(sel_size);
394388
auto& cur_nullmap = get_null_map_column().get_data();
@@ -535,15 +529,10 @@ void ColumnNullable::get_permutation(bool reverse, size_t limit, int null_direct
535529
}
536530
}
537531
}
538-
//
539-
//void ColumnNullable::gather(ColumnGathererStream & gatherer)
540-
//{
541-
// gatherer.gather(*this);
542-
//}
543532

544533
void ColumnNullable::reserve(size_t n) {
545534
get_nested_column().reserve(n);
546-
_get_null_map_data().reserve(n);
535+
get_null_map_data(false).reserve(n);
547536
}
548537

549538
void ColumnNullable::resize(size_t n) {
@@ -595,7 +584,7 @@ void ColumnNullable::apply_null_map(const ColumnNullable& other) {
595584
}
596585

597586
void ColumnNullable::check_consistency() const {
598-
if (null_map->size() != get_nested_column().size()) {
587+
if (get_null_map_column().size() != get_nested_column().size()) {
599588
throw Exception(ErrorCode::INTERNAL_ERROR,
600589
"Sizes of nested column and null map of Nullable column are not equal");
601590
}
@@ -609,8 +598,8 @@ void ColumnNullable::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
609598
}
610599

611600
void ColumnNullable::_update_has_null() {
612-
const UInt8* null_pos = _get_null_map_data().data();
613-
_has_null = simd::contain_byte(null_pos, _get_null_map_data().size(), 1);
601+
const UInt8* null_pos = get_null_map_data().data();
602+
_has_null = simd::contain_byte(null_pos, get_null_map_data().size(), 1);
614603
_need_update_has_null = false;
615604
}
616605

0 commit comments

Comments
 (0)