Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 61 additions & 30 deletions include/roaring/containers/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,41 +341,72 @@ static inline bool array_container_remove(array_container_t *arr,
/* Check whether x is present. */
inline bool array_container_contains(const array_container_t *arr,
uint16_t pos) {
// return binarySearch(arr->array, arr->cardinality, pos) >= 0;
// binary search with fallback to linear search for short ranges
int32_t low = 0;
const uint16_t *carr = (const uint16_t *)arr->array;
int32_t high = arr->cardinality - 1;
if (high > pos) {
// since elements are unique, x can be located only at index <= x
high = pos;
}
// while (high - low >= 0) {
while (high >= low + 16) {
int32_t middleIndex = (low + high) >> 1;
uint16_t middleValue = carr[middleIndex];
if (middleValue < pos) {
low = middleIndex + 1;
} else if (middleValue > pos) {
high = middleIndex - 1;
} else {
return true;
/**
* SIMD Quad algorithm
* Daniel Lemire, "You can beat the binary search," in Daniel Lemire's blog,
* April 27, 2026,
* https://lemire.me/blog/2026/04/27/you-can-beat-the-binary-search/.
*/
const int32_t gap = 16;
const uint16_t *carr = arr->array;
int32_t cardinality = arr->cardinality;
if (cardinality < gap) {
for (int32_t j = 0; j < cardinality; j++) {
if (carr[j] >= pos) return carr[j] == pos;
}
return false;
}

while (high >= low + 3) {
if (carr[low + 3] >= pos) {
return (carr[low] == pos) || (carr[low + 1] == pos) ||
(carr[low + 2] == pos) || (carr[low + 3] == pos);
int32_t num_blocks = cardinality / gap;
int32_t base = 0;
int32_t n = num_blocks;
while (n > 3) {
int32_t quarter = n >> 2;

int32_t k1 = carr[(base + quarter + 1) * gap - 1];
int32_t k2 = carr[(base + 2 * quarter + 1) * gap - 1];
int32_t k3 = carr[(base + 3 * quarter + 1) * gap - 1];

int32_t c1 = (k1 < pos);
int32_t c2 = (k2 < pos);
int32_t c3 = (k3 < pos);

base += (c1 + c2 + c3) * quarter;
n -= 3 * quarter;
}
while (n > 1) {
int32_t half = n >> 1;
base = (carr[(base + half + 1) * gap - 1] < pos) ? base + half : base;
n -= half;
}
int32_t lo = (carr[(base + 1) * gap - 1] < pos) ? base + 1 : base;

if (lo < num_blocks) {
const uint16_t *blk = carr + lo * gap;
#ifdef CROARING_USENEON
uint16x8_t needle = vdupq_n_u16(pos);
uint16x8_t v0 = vld1q_u16(blk);
uint16x8_t v1 = vld1q_u16(blk + 8);
uint16x8_t hit =
vorrq_u16(vceqq_u16(v0, needle), vceqq_u16(v1, needle));
return vmaxvq_u16(hit) != 0;
#elif defined(CROARING_IS_X64)
__m128i needle = _mm_set1_epi16((short)pos);
__m128i v0 = _mm_loadu_si128((const __m128i *)blk);
__m128i v1 = _mm_loadu_si128((const __m128i *)(blk + 8));
__m128i hit = _mm_or_si128(_mm_cmpeq_epi16(v0, needle),
_mm_cmpeq_epi16(v1, needle));
return _mm_movemask_epi8(hit) != 0;
#else
for (int32_t j = 0; j < gap; j++) {
if (blk[j] >= pos) return blk[j] == pos;
}
low += 4;
return false;
#endif
}

for (int i = low; i <= high; i++) {
uint16_t v = carr[i];
if (v >= pos) {
return (v == pos); // compiles into SETE
}
for (int32_t j = num_blocks * gap; j < cardinality; j++) {
uint16_t v = carr[j];
if (v >= pos) return (v == pos);
}
return false;
}
Expand Down
Loading