Skip to content

Commit 32ab04c

Browse files
authored
add read_ranges and read_prev_ranges iterator functions (#798)
This adds roaring_uint32_iterator_read_ranges / roaring_uint32_iterator_read_prev_ranges to the 32-bit bitmap API, roaring64_iterator_read_ranges / roaring64_iterator_read_prev_ranges to the 64-bit bitmap API, and read_ranges / read_prev_ranges to the C++ RoaringSetBitBiDirectionalIterator. Each function reads up to N maximal consecutive ranges from the iterator in a single call, merging across internal container boundaries.
1 parent 598077e commit 32ab04c

9 files changed

Lines changed: 1221 additions & 0 deletions

File tree

cpp/roaring/roaring.hh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,24 @@ class RoaringSetBitBiDirectionalIterator final {
10021002
api::roaring_uint32_iterator_move_equalorlarger(&i, val);
10031003
}
10041004

1005+
/**
1006+
* Reads up to ${count} ranges into ${buf}. Returns the number of ranges
1007+
* read. See roaring_uint32_iterator_read_ranges for full semantics.
1008+
*/
1009+
size_t read_ranges(api::roaring_uint32_range_closed_t *buf, size_t count) {
1010+
return api::roaring_uint32_iterator_read_ranges(&i, buf, count);
1011+
}
1012+
1013+
/**
1014+
* Reads up to ${count} ranges in reverse into ${buf}. Returns the number
1015+
* of ranges read. See roaring_uint32_iterator_read_prev_ranges for full
1016+
* semantics.
1017+
*/
1018+
size_t read_prev_ranges(api::roaring_uint32_range_closed_t *buf,
1019+
size_t count) {
1020+
return api::roaring_uint32_iterator_read_prev_ranges(&i, buf, count);
1021+
}
1022+
10051023
type_of_iterator &operator--() { // prefix --
10061024
api::roaring_uint32_iterator_previous(&i);
10071025
return *this;

include/roaring/containers/containers.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2627,6 +2627,35 @@ bool container_iterator_skip_backward(const container_t *c, uint8_t typecode,
26272627
uint32_t *consumed_count,
26282628
uint16_t *value_out);
26292629

2630+
/**
2631+
* Finds the end of the consecutive run starting at the current iterator
2632+
* position within a container. Returns the low16 of the last consecutive
2633+
* value. If there are more values in the container after the run,
2634+
* *has_more is set to true, the iterator is positioned at the next value,
2635+
* and *value is updated to that value. Otherwise *has_more is set to false.
2636+
*
2637+
* *value must be the low 16 bits of the current value at the iterator's
2638+
* position on entry.
2639+
*/
2640+
uint16_t container_iterator_find_run_end(const container_t *c, uint8_t typecode,
2641+
roaring_container_iterator_t *it,
2642+
uint16_t *value, bool *has_more);
2643+
2644+
/**
2645+
* Finds the start of the consecutive run ending at the current iterator
2646+
* position within a container. Returns the low16 of the first consecutive
2647+
* value. If there are more values in the container before the run,
2648+
* *has_more is set to true, the iterator is positioned at the previous value,
2649+
* and *value is updated to that value. Otherwise *has_more is set to false.
2650+
*
2651+
* *value must be the low 16 bits of the current value at the iterator's
2652+
* position on entry.
2653+
*/
2654+
uint16_t container_iterator_find_run_start(const container_t *c,
2655+
uint8_t typecode,
2656+
roaring_container_iterator_t *it,
2657+
uint16_t *value, bool *has_more);
2658+
26302659
#ifdef __cplusplus
26312660
}
26322661
}

include/roaring/roaring.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1261,6 +1261,60 @@ uint32_t roaring_uint32_iterator_skip(roaring_uint32_iterator_t *it,
12611261
uint32_t roaring_uint32_iterator_skip_backward(roaring_uint32_iterator_t *it,
12621262
uint32_t count);
12631263

1264+
typedef struct roaring_uint32_range_closed_s {
1265+
uint32_t min;
1266+
uint32_t max;
1267+
} roaring_uint32_range_closed_t;
1268+
1269+
/**
1270+
* Reads next ${count} ranges from iterator into user-supplied ${buf}.
1271+
* A range is defined as a maximal interval of consecutive values.
1272+
* For example, the set {1,2,3,5,6} contains two ranges: [1..3] and [5..6].
1273+
* Each range is represented as a struct {min,max}, both endpoints included.
1274+
* Consecutive values that span internal container boundaries are merged into
1275+
* a single range.
1276+
*
1277+
* Returns the number of read ranges.
1278+
* This number can be smaller than ${count}, which means that the iterator is
1279+
* drained.
1280+
*
1281+
* This function satisfies the semantics of iteration and can be used together
1282+
* with other iterator functions.
1283+
* - first range will start with ${it}->current_value
1284+
* - after the function returns, the iterator is positioned at the next element
1285+
* after the end of the last returned range, or ${it}->has_value is false if
1286+
* the bitmap is exhausted.
1287+
*/
1288+
size_t roaring_uint32_iterator_read_ranges(roaring_uint32_iterator_t *it,
1289+
roaring_uint32_range_closed_t *buf,
1290+
size_t count);
1291+
1292+
/**
1293+
* Reads previous ${count} ranges from iterator into user-supplied ${buf}.
1294+
* A range is defined as a maximal interval of consecutive values.
1295+
* For example, the set {1,2,3,5,6} contains two ranges: [1..3] and [5..6].
1296+
* Each range is represented as a struct {min,max}, both endpoints included.
1297+
* Consecutive values that span internal container boundaries are merged into
1298+
* a single range.
1299+
*
1300+
* Returns the number of read ranges.
1301+
* This number can be smaller than ${count}, which means that the iterator is
1302+
* drained.
1303+
*
1304+
* Ranges are returned in reverse order, e.g. the first range returned is the
1305+
* highest range (ending at the current value)
1306+
*
1307+
* This function satisfies the semantics of reverse iteration and can be used
1308+
* together with other iterator functions.
1309+
* - first range will end with ${it}->current_value
1310+
* - after the function returns, the iterator is positioned at the element
1311+
* before the beginning of the last returned range, or ${it}->has_value is
1312+
* false if the bitmap is exhausted.
1313+
*/
1314+
size_t roaring_uint32_iterator_read_prev_ranges(
1315+
roaring_uint32_iterator_t *it, roaring_uint32_range_closed_t *buf,
1316+
size_t count);
1317+
12641318
#ifdef __cplusplus
12651319
}
12661320
}

include/roaring/roaring64.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,58 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it,
804804
uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf,
805805
uint64_t count);
806806

807+
typedef struct roaring64_range_closed_s {
808+
uint64_t min;
809+
uint64_t max;
810+
} roaring64_range_closed_t;
811+
812+
/**
813+
* Reads next ${count} ranges from iterator into user-supplied ${buf}.
814+
* A range is defined as a maximal interval of consecutive values.
815+
* For example, the set {1,2,3,5,6} contains two ranges: [1..3] and [5..6].
816+
* Each range is represented as a struct {min,max}, both endpoints included.
817+
* Consecutive values that span internal container boundaries are merged into
818+
* a single range.
819+
*
820+
* Returns the number of read ranges.
821+
* This number can be smaller than ${count}, which means that the iterator is
822+
* drained.
823+
*
824+
* This function can be used together with other iterator functions.
825+
* - first range will start with the current iterator value
826+
* - after the function returns, the iterator is positioned at the next element
827+
* after the end of the last returned range, or has_value is false if
828+
* the bitmap is exhausted.
829+
*/
830+
size_t roaring64_iterator_read_ranges(roaring64_iterator_t *it,
831+
roaring64_range_closed_t *buf,
832+
size_t count);
833+
834+
/**
835+
* Reads previous ${count} ranges from iterator into user-supplied ${buf}.
836+
* A range is defined as a maximal interval of consecutive values.
837+
* For example, the set {1,2,3,5,6} contains two ranges: [1..3] and [5..6].
838+
* Each range is represented as a struct {min,max}, both endpoints included.
839+
* Consecutive values that span internal container boundaries are merged into
840+
* a single range.
841+
*
842+
* Returns the number of read ranges.
843+
* This number can be smaller than ${count}, which means that the iterator is
844+
* drained.
845+
*
846+
* Ranges are returned in reverse order, e.g. the first range returned is the
847+
* highest range (ending at the current value).
848+
*
849+
* This function can be used together with other iterator functions.
850+
* - first range will end with the current iterator value
851+
* - after the function returns, the iterator is positioned at the element
852+
* before the beginning of the last returned range, or has_value is false if
853+
* the bitmap is exhausted.
854+
*/
855+
size_t roaring64_iterator_read_prev_ranges(roaring64_iterator_t *it,
856+
roaring64_range_closed_t *buf,
857+
size_t count);
858+
807859
#ifdef __cplusplus
808860
} // extern "C"
809861
} // namespace roaring

src/containers/containers.c

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,171 @@ bool container_iterator_skip_backward(const container_t *c, uint8_t typecode,
794794
return has_value;
795795
}
796796

797+
uint16_t container_iterator_find_run_end(const container_t *c, uint8_t typecode,
798+
roaring_container_iterator_t *it,
799+
uint16_t *value, bool *has_more) {
800+
switch (typecode) {
801+
case RUN_CONTAINER_TYPE: {
802+
const run_container_t *rc = const_CAST_run(c);
803+
uint16_t run_end =
804+
rc->runs[it->index].value + rc->runs[it->index].length;
805+
it->index++;
806+
if (it->index < rc->n_runs) {
807+
*has_more = true;
808+
*value = rc->runs[it->index].value;
809+
} else {
810+
*has_more = false;
811+
}
812+
return run_end;
813+
}
814+
case ARRAY_CONTAINER_TYPE: {
815+
const array_container_t *ac = const_CAST_array(c);
816+
uint16_t v = *value;
817+
while (it->index + 1 < ac->cardinality &&
818+
ac->array[it->index + 1] == (uint16_t)(v + 1)) {
819+
it->index++;
820+
v++;
821+
}
822+
it->index++;
823+
if (it->index < ac->cardinality) {
824+
*has_more = true;
825+
*value = ac->array[it->index];
826+
} else {
827+
*has_more = false;
828+
}
829+
return v;
830+
}
831+
case BITSET_CONTAINER_TYPE: {
832+
const bitset_container_t *bc = const_CAST_bitset(c);
833+
uint32_t pos = (uint32_t)*value + 1;
834+
uint16_t run_end;
835+
if (pos >= (1 << 16)) {
836+
*has_more = false;
837+
return UINT16_MAX;
838+
}
839+
uint32_t wordindex = pos / 64;
840+
uint64_t word = ~bc->words[wordindex] & (UINT64_MAX << (pos % 64));
841+
while (word == 0 &&
842+
wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
843+
wordindex++;
844+
word = ~bc->words[wordindex];
845+
}
846+
if (word != 0) {
847+
run_end = (uint16_t)(wordindex * 64 +
848+
roaring_trailing_zeroes(word) - 1);
849+
} else {
850+
run_end = UINT16_MAX;
851+
}
852+
uint32_t next_pos = (uint32_t)run_end + 1;
853+
if (next_pos >= (1 << 16)) {
854+
*has_more = false;
855+
} else {
856+
wordindex = next_pos / 64;
857+
word = bc->words[wordindex] & (UINT64_MAX << (next_pos % 64));
858+
while (word == 0 &&
859+
wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
860+
wordindex++;
861+
word = bc->words[wordindex];
862+
}
863+
if (word != 0) {
864+
*has_more = true;
865+
it->index = wordindex * 64 + roaring_trailing_zeroes(word);
866+
*value = (uint16_t)it->index;
867+
} else {
868+
*has_more = false;
869+
}
870+
}
871+
return run_end;
872+
}
873+
default:
874+
assert(false);
875+
roaring_unreachable;
876+
return 0;
877+
}
878+
}
879+
880+
uint16_t container_iterator_find_run_start(const container_t *c,
881+
uint8_t typecode,
882+
roaring_container_iterator_t *it,
883+
uint16_t *value, bool *has_more) {
884+
switch (typecode) {
885+
case RUN_CONTAINER_TYPE: {
886+
const run_container_t *rc = const_CAST_run(c);
887+
uint16_t run_start = rc->runs[it->index].value;
888+
it->index--;
889+
if (it->index >= 0) {
890+
*has_more = true;
891+
*value = rc->runs[it->index].value + rc->runs[it->index].length;
892+
} else {
893+
*has_more = false;
894+
}
895+
return run_start;
896+
}
897+
case ARRAY_CONTAINER_TYPE: {
898+
const array_container_t *ac = const_CAST_array(c);
899+
uint16_t v = *value;
900+
while (it->index > 0 &&
901+
ac->array[it->index - 1] == (uint16_t)(v - 1)) {
902+
it->index--;
903+
v--;
904+
}
905+
it->index--;
906+
if (it->index >= 0) {
907+
*has_more = true;
908+
*value = ac->array[it->index];
909+
} else {
910+
*has_more = false;
911+
}
912+
return v;
913+
}
914+
case BITSET_CONTAINER_TYPE: {
915+
const bitset_container_t *bc = const_CAST_bitset(c);
916+
if (*value == 0) {
917+
*has_more = false;
918+
return 0;
919+
}
920+
uint32_t pos = (uint32_t)*value - 1;
921+
int32_t wordindex = (int32_t)(pos / 64);
922+
uint64_t word =
923+
~bc->words[wordindex] & (UINT64_MAX >> (63 - (pos % 64)));
924+
while (word == 0 && --wordindex >= 0) {
925+
word = ~bc->words[wordindex];
926+
}
927+
uint16_t run_start;
928+
if (word != 0) {
929+
run_start = (uint16_t)(wordindex * 64 +
930+
(63 - roaring_leading_zeroes(word)) + 1);
931+
} else {
932+
run_start = 0;
933+
}
934+
if (run_start == 0) {
935+
*has_more = false;
936+
} else {
937+
int32_t prev_pos = (int32_t)run_start - 1;
938+
wordindex = prev_pos / 64;
939+
word = bc->words[wordindex] &
940+
(UINT64_MAX >> (63 - (prev_pos % 64)));
941+
while (word == 0 && --wordindex >= 0) {
942+
word = bc->words[wordindex];
943+
}
944+
if (word != 0) {
945+
*has_more = true;
946+
it->index =
947+
wordindex * 64 + (63 - roaring_leading_zeroes(word));
948+
*value = (uint16_t)it->index;
949+
} else {
950+
*has_more = false;
951+
}
952+
}
953+
return run_start;
954+
}
955+
default:
956+
assert(false);
957+
roaring_unreachable;
958+
return 0;
959+
}
960+
}
961+
797962
#ifdef __cplusplus
798963
}
799964
}

0 commit comments

Comments
 (0)