@@ -2264,6 +2264,26 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo
22642264 // reset decimal_scale_params_index;
22652265 _decimal_scale_params_index = 0 ;
22662266 try {
2267+ // Condition cache HIT: skip consecutive false granules before reading
2268+ if (_condition_cache_ctx && _condition_cache_ctx->is_hit ) {
2269+ int64_t current_row = _row_reader->getRowNumber ();
2270+ auto & cache = *_condition_cache_ctx->filter_result ;
2271+ int64_t granule = current_row / ConditionCacheContext::GRANULE_SIZE;
2272+ int64_t max_granule = static_cast <int64_t >(cache.size ());
2273+ while (granule < max_granule && !cache[granule]) {
2274+ granule++;
2275+ }
2276+ if (granule >= max_granule) {
2277+ *eof = true ;
2278+ *read_rows = 0 ;
2279+ return Status::OK ();
2280+ }
2281+ int64_t target_row = granule * ConditionCacheContext::GRANULE_SIZE;
2282+ if (target_row > current_row) {
2283+ _row_reader->seekToRow (target_row);
2284+ }
2285+ }
2286+ _last_batch_row_offset = _row_reader->getRowNumber ();
22672287 rr = _row_reader->nextBatch (*_batch, block);
22682288 if (rr == 0 || _batch->numElements == 0 ) {
22692289 *eof = true ;
@@ -2363,6 +2383,26 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo
23632383 // reset decimal_scale_params_index;
23642384 _decimal_scale_params_index = 0 ;
23652385 try {
2386+ // Condition cache HIT: skip consecutive false granules before reading
2387+ if (_condition_cache_ctx && _condition_cache_ctx->is_hit ) {
2388+ int64_t current_row = _row_reader->getRowNumber ();
2389+ auto & cache = *_condition_cache_ctx->filter_result ;
2390+ int64_t granule = current_row / ConditionCacheContext::GRANULE_SIZE;
2391+ int64_t max_granule = static_cast <int64_t >(cache.size ());
2392+ while (granule < max_granule && !cache[granule]) {
2393+ granule++;
2394+ }
2395+ if (granule >= max_granule) {
2396+ *eof = true ;
2397+ *read_rows = 0 ;
2398+ return Status::OK ();
2399+ }
2400+ int64_t target_row = granule * ConditionCacheContext::GRANULE_SIZE;
2401+ if (target_row > current_row) {
2402+ _row_reader->seekToRow (target_row);
2403+ }
2404+ }
2405+ _last_batch_row_offset = _row_reader->getRowNumber ();
23662406 rr = _row_reader->nextBatch (*_batch, block);
23672407 if (rr == 0 || _batch->numElements == 0 ) {
23682408 *eof = true ;
@@ -2480,6 +2520,24 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo
24802520 bool can_filter_all = false ;
24812521 RETURN_IF_ERROR_OR_CATCH_EXCEPTION (VExprContext::execute_conjuncts (
24822522 filter_conjuncts, &filters, block, &result_filter, &can_filter_all));
2523+
2524+ // Condition cache MISS: mark granules with surviving rows (non-lazy path)
2525+ if (_condition_cache_ctx && !_condition_cache_ctx->is_hit ) {
2526+ auto & cache = *_condition_cache_ctx->filter_result ;
2527+ auto * filter_data = result_filter.data ();
2528+ size_t num_rows = block->rows ();
2529+ for (size_t i = 0 ; i < num_rows; i++) {
2530+ if (filter_data[i]) {
2531+ size_t granule = (_last_batch_row_offset + i) /
2532+ ConditionCacheContext::GRANULE_SIZE;
2533+ if (granule >= cache.size ()) {
2534+ cache.resize (granule + 1 , false );
2535+ }
2536+ cache[granule] = true ;
2537+ }
2538+ }
2539+ }
2540+
24832541 if (can_filter_all) {
24842542 for (auto & col : columns_to_filter) {
24852543 std::move (*block->get_by_position (col).column ).assume_mutable ()->clear ();
@@ -2697,6 +2755,21 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t s
26972755 sel[new_size] = i;
26982756 new_size += result_filter_data[i] ? 1 : 0 ;
26992757 }
2758+
2759+ // Condition cache MISS: mark granules with surviving rows
2760+ if (_condition_cache_ctx && !_condition_cache_ctx->is_hit && new_size > 0 ) {
2761+ auto & cache = *_condition_cache_ctx->filter_result ;
2762+ for (uint16_t i = 0 ; i < size; i++) {
2763+ if (result_filter_data[i]) {
2764+ size_t granule = (_last_batch_row_offset + i) / ConditionCacheContext::GRANULE_SIZE;
2765+ if (granule >= cache.size ()) {
2766+ cache.resize (granule + 1 , false );
2767+ }
2768+ cache[granule] = true ;
2769+ }
2770+ }
2771+ }
2772+
27002773 _statistics.lazy_read_filtered_rows += static_cast <int64_t >(size - new_size);
27012774 data.numElements = new_size;
27022775 return Status::OK ();
0 commit comments