Skip to content

Commit 88fccb8

Browse files
authored
[Chore](pick) pick #60141 #59410 (#61287)
pick #60141 #59410
1 parent ae91849 commit 88fccb8

File tree

14 files changed

+186
-84
lines changed

14 files changed

+186
-84
lines changed

be/src/pipeline/exec/hashjoin_build_sink.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,11 +206,8 @@ struct ProcessHashTableBuild {
206206
bool* has_null_key) {
207207
if (null_map) {
208208
// first row is mocked and is null
209-
// TODO: Need to test the for loop. break may better
210-
for (uint32_t i = 1; i < _rows; i++) {
211-
if ((*null_map)[i]) {
212-
*has_null_key = true;
213-
}
209+
if (simd::contain_one(null_map->data() + 1, _rows - 1)) {
210+
*has_null_key = true;
214211
}
215212
if (short_circuit_for_null && *has_null_key) {
216213
return Status::OK();
@@ -222,7 +219,7 @@ struct ProcessHashTableBuild {
222219
_rows, _batch_size, *has_null_key, hash_table_ctx.direct_mapping_range());
223220

224221
// In order to make the null keys equal when using single null eq, all null keys need to be set to default value.
225-
if (_build_raw_ptrs.size() == 1 && null_map) {
222+
if (_build_raw_ptrs.size() == 1 && null_map && *has_null_key) {
226223
_build_raw_ptrs[0]->assume_mutable()->replace_column_null_data(null_map->data());
227224
}
228225

be/src/pipeline/exec/join/process_hash_table_probe_impl.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ void ProcessHashTableProbe<JoinOpType>::build_side_output_column(vectorized::Mut
114114
_build_column_has_null[i] = false;
115115
if (_right_output_slot_flags[i] && column.is_nullable()) {
116116
const auto& nullable = assert_cast<const vectorized::ColumnNullable&>(column);
117-
_build_column_has_null[i] = !simd::contain_byte(
118-
nullable.get_null_map_data().data() + 1, nullable.size() - 1, 1);
117+
_build_column_has_null[i] = !simd::contain_one(
118+
nullable.get_null_map_data().data() + 1, nullable.size() - 1);
119119
}
120120
}
121121
}
@@ -192,7 +192,9 @@ typename HashTableType::State ProcessHashTableProbe<JoinOpType>::_init_probe_sid
192192
hash_table_ctx.arena.clear();
193193
// In order to make the null keys equal when using single null eq, all null keys need to be set to default value.
194194
if (_parent->_probe_columns.size() == 1 && null_map) {
195-
_parent->_probe_columns[0]->assume_mutable()->replace_column_null_data(null_map);
195+
if (simd::contain_one(null_map, probe_rows)) {
196+
_parent->_probe_columns[0]->assume_mutable()->replace_column_null_data(null_map);
197+
}
196198
}
197199

198200
hash_table_ctx.init_serialized_keys(_parent->_probe_columns, probe_rows, null_map, true,
@@ -382,8 +384,7 @@ Status ProcessHashTableProbe<JoinOpType>::finalize_block_with_filter(
382384
}
383385
const auto& column_filter =
384386
assert_cast<const vectorized::ColumnUInt8*>(filter_ptr.get())->get_data();
385-
bool need_filter =
386-
simd::count_zero_num((int8_t*)column_filter.data(), column_filter.size()) != 0;
387+
bool need_filter = simd::contain_zero(column_filter.data(), column_filter.size());
387388
if (need_filter) {
388389
row_indexs.filter(column_filter);
389390
}

be/src/pipeline/exec/nested_loop_join_probe_operator.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,7 @@ class NestedLoopJoinProbeLocalState final
9797
}
9898
if (!_cur_probe_row_visited_flags[i]) {
9999
_cur_probe_row_visited_flags[i] =
100-
simd::contain_byte<uint8_t>(filter.data() + offset, end - offset, 1)
101-
? 1
102-
: 0;
100+
simd::contain_one(filter.data() + offset, end - offset);
103101
}
104102
end = offset;
105103
}

be/src/util/simd/bits.h

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -261,14 +261,6 @@ static size_t find_byte(const T* data, size_t start, size_t end, T byte) {
261261
return (T*)p - data;
262262
}
263263

264-
template <typename T>
265-
bool contain_byte(const T* __restrict data, const size_t length, const signed char byte) {
266-
if (length == 0) {
267-
return false;
268-
}
269-
return nullptr != std::memchr(reinterpret_cast<const void*>(data), byte, length);
270-
}
271-
272264
inline size_t find_one(const std::vector<uint8_t>& vec, size_t start) {
273265
return find_byte<uint8_t>(vec, start, 1);
274266
}
@@ -281,5 +273,58 @@ inline size_t find_zero(const std::vector<uint8_t>& vec, size_t start) {
281273
return find_byte<uint8_t>(vec, start, 0);
282274
}
283275

276+
inline bool contain_one(const uint8_t* __restrict data, size_t size) {
277+
size_t i = 0;
278+
#if defined(__AVX2__)
279+
for (; i + 32 <= size; i += 32) {
280+
__m256i chunk = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(data + i));
281+
if (!_mm256_testz_si256(chunk, chunk)) {
282+
return true;
283+
}
284+
}
285+
#elif defined(__SSE2__)
286+
const __m128i zero = _mm_setzero_si128();
287+
for (; i + 16 <= size; i += 16) {
288+
__m128i chunk = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + i));
289+
if (_mm_movemask_epi8(_mm_cmpeq_epi8(chunk, zero)) != 0xFFFF) {
290+
return true;
291+
}
292+
}
293+
#endif
294+
for (; i < size; ++i) {
295+
if (data[i]) {
296+
return true;
297+
}
298+
}
299+
return false;
300+
}
301+
302+
inline bool contain_zero(const uint8_t* __restrict data, size_t size) {
303+
size_t i = 0;
304+
#if defined(__AVX2__)
305+
const __m256i zero = _mm256_setzero_si256();
306+
for (; i + 32 <= size; i += 32) {
307+
__m256i chunk = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(data + i));
308+
if (_mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, zero)) != 0) {
309+
return true;
310+
}
311+
}
312+
#elif defined(__SSE2__)
313+
const __m128i zero = _mm_setzero_si128();
314+
for (; i + 16 <= size; i += 16) {
315+
__m128i chunk = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + i));
316+
if (_mm_movemask_epi8(_mm_cmpeq_epi8(chunk, zero)) != 0) {
317+
return true;
318+
}
319+
}
320+
#endif
321+
for (; i < size; ++i) {
322+
if (!data[i]) {
323+
return true;
324+
}
325+
}
326+
return false;
327+
}
328+
284329
} // namespace doris::simd
285330
#include "common/compile_check_end.h"

be/src/vec/columns/column_decimal.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -486,10 +486,6 @@ void ColumnDecimal<T>::compare_internal(size_t rhs_row_id, const IColumn& rhs,
486486
template <PrimitiveType T>
487487
void ColumnDecimal<T>::replace_column_null_data(const uint8_t* __restrict null_map) {
488488
auto s = size();
489-
size_t null_count = s - simd::count_zero_num((const int8_t*)null_map, s);
490-
if (0 == null_count) {
491-
return;
492-
}
493489
for (size_t i = 0; i < s; ++i) {
494490
data[i] = null_map[i] ? value_type() : data[i];
495491
}

be/src/vec/columns/column_nullable.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ size_t ColumnNullable::serialize_impl(char* pos, const size_t row) const {
275275
}
276276

277277
void ColumnNullable::serialize_vec(StringRef* keys, size_t num_rows) const {
278-
const bool has_null = simd::contain_byte(get_null_map_data().data(), num_rows, 1);
278+
const bool has_null = simd::contain_one(get_null_map_data().data(), num_rows);
279279
if (has_null) {
280280
for (size_t i = 0; i < num_rows; ++i) {
281281
keys[i].size += serialize_impl(const_cast<char*>(keys[i].data + keys[i].size), i);
@@ -620,11 +620,11 @@ void ColumnNullable::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
620620
}
621621

622622
bool ColumnNullable::only_null() const {
623-
return !simd::contain_byte(get_null_map_data().data(), size(), 0);
623+
return !simd::contain_zero(get_null_map_data().data(), size());
624624
}
625625

626626
bool ColumnNullable::has_null(size_t begin, size_t end) const {
627-
return simd::contain_byte(get_null_map_data().data() + begin, end - begin, 1);
627+
return simd::contain_one(get_null_map_data().data() + begin, end - begin);
628628
}
629629

630630
bool ColumnNullable::has_null() const {

be/src/vec/columns/column_vector.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -476,12 +476,9 @@ MutableColumnPtr ColumnVector<T>::permute(const IColumn::Permutation& perm, size
476476
template <PrimitiveType T>
477477
void ColumnVector<T>::replace_column_null_data(const uint8_t* __restrict null_map) {
478478
auto s = size();
479-
size_t null_count = s - simd::count_zero_num((const int8_t*)null_map, s);
480-
if (0 == null_count) {
481-
return;
482-
}
479+
auto value = default_value();
483480
for (size_t i = 0; i < s; ++i) {
484-
data[i] = null_map[i] ? default_value() : data[i];
481+
data[i] = null_map[i] ? value : data[i];
485482
}
486483
}
487484

be/src/vec/common/hash_table/hash_key_type.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ inline HashKeyType get_hash_key_type_with_fixed(size_t size) {
8080
}
8181

8282
inline HashKeyType get_hash_key_type_fixed(const std::vector<vectorized::DataTypePtr>& data_types) {
83+
if (data_types.size() >= vectorized::BITSIZE) {
84+
return HashKeyType::serialized;
85+
}
86+
8387
bool has_null = false;
8488
size_t key_byte_size = 0;
8589

@@ -94,8 +98,7 @@ inline HashKeyType get_hash_key_type_fixed(const std::vector<vectorized::DataTyp
9498
}
9599
}
96100

97-
size_t bitmap_size = has_null ? vectorized::get_bitmap_size(data_types.size()) : 0;
98-
return get_hash_key_type_with_fixed(bitmap_size + key_byte_size);
101+
return get_hash_key_type_with_fixed(has_null + key_byte_size);
99102
}
100103

101104
inline HashKeyType get_hash_key_type(const std::vector<vectorized::DataTypePtr>& data_types) {

0 commit comments

Comments
 (0)