diff --git a/be/src/olap/row.h b/be/src/olap/row.h index b8919706ecf32f..f24540e13ebff1 100644 --- a/be/src/olap/row.h +++ b/be/src/olap/row.h @@ -187,6 +187,12 @@ void agg_finalize_row(const std::vector& ids, RowType* row, MemPool* m template uint32_t hash_row(const RowType& row, uint32_t seed) { for (uint32_t cid : row.schema()->column_ids()) { + FieldType type = row.schema()->column(cid)->type(); + // The approximation of float/double in a certain precision range, the binary of byte is not + // a fixed value, so these two types are ignored in calculating hash code. + if (type == OLAP_FIELD_TYPE_FLOAT || type == OLAP_FIELD_TYPE_DOUBLE) { + continue; + } seed = row.schema()->column(cid)->hash_code(row.cell(cid), seed); } return seed; diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp index c8ca28fbcb138e..d4624ad2008361 100644 --- a/be/src/olap/task/engine_checksum_task.cpp +++ b/be/src/olap/task/engine_checksum_task.cpp @@ -80,13 +80,7 @@ OLAPStatus EngineChecksumTask::_compute_checksum() { } } - // ignore float and double type considering to precision lose for (size_t i = 0; i < tablet->tablet_schema().num_columns(); ++i) { - FieldType type = tablet->tablet_schema().column(i).type(); - if (type == OLAP_FIELD_TYPE_FLOAT || type == OLAP_FIELD_TYPE_DOUBLE) { - continue; - } - reader_params.return_columns.push_back(i); } @@ -118,8 +112,8 @@ OLAPStatus EngineChecksumTask::_compute_checksum() { OLAP_LOG_WARNING("fail to read in reader. [res=%d]", res); return res; } - - row_checksum = hash_row(row, row_checksum); + // The value of checksum is independent of the sorting of data rows. + row_checksum ^= hash_row(row, 0); // the memory allocate by mem pool has been copied, // so we should release memory immediately mem_pool->clear();