|
85 | 85 | import org.hypertrace.core.documentstore.commons.DocStoreConstants; |
86 | 86 | import org.hypertrace.core.documentstore.expression.impl.AggregateExpression; |
87 | 87 | import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression; |
| 88 | +import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression; |
88 | 89 | import org.hypertrace.core.documentstore.expression.impl.ArrayRelationalFilterExpression; |
| 90 | +import org.hypertrace.core.documentstore.expression.impl.ArrayType; |
89 | 91 | import org.hypertrace.core.documentstore.expression.impl.ConstantExpression; |
90 | 92 | import org.hypertrace.core.documentstore.expression.impl.FunctionExpression; |
91 | 93 | import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression; |
@@ -300,6 +302,24 @@ public Stream<Arguments> provideArguments(final ExtensionContext context) { |
300 | 302 | } |
301 | 303 | } |
302 | 304 |
|
| 305 | + /** |
| 306 | + * Provides arguments for testing array operations with different expression types. Returns: |
| 307 | + * (datastoreName, expressionType) - "WITH_TYPE": ArrayIdentifierExpression WITH ArrayType |
| 308 | + * (optimized, type-aware casting) - "WITHOUT_TYPE": ArrayIdentifierExpression WITHOUT ArrayType |
| 309 | + * (fallback, text[] casting) |
| 310 | + */ |
| 311 | + private static class PostgresArrayTypeProvider implements ArgumentsProvider { |
| 312 | + |
| 313 | + @Override |
| 314 | + public Stream<Arguments> provideArguments(final ExtensionContext context) { |
| 315 | + return Stream.of( |
| 316 | + Arguments.of(POSTGRES_STORE, "WITH_TYPE"), // ArrayIdentifierExpression WITH ArrayType |
| 317 | + Arguments.of( |
| 318 | + POSTGRES_STORE, "WITHOUT_TYPE") // ArrayIdentifierExpression WITHOUT ArrayType |
| 319 | + ); |
| 320 | + } |
| 321 | + } |
| 322 | + |
303 | 323 | @ParameterizedTest |
304 | 324 | @ArgumentsSource(AllProvider.class) |
305 | 325 | public void testFindAll(String dataStoreName) throws IOException { |
@@ -3267,6 +3287,228 @@ void testFlatPostgresCollectionCount(String dataStoreName) { |
3267 | 3287 | assertEquals(3, soapCountQuery); |
3268 | 3288 | } |
3269 | 3289 |
|
| 3290 | + /** |
| 3291 | + * Tests IN and NOT_IN operators on primitive (non-JSON) fields in flat collections. These |
| 3292 | + * operators should use simple SQL IN clause instead of array overlap operator for optimal index |
| 3293 | + * usage. |
| 3294 | + */ |
| 3295 | + @ParameterizedTest |
| 3296 | + @ArgumentsSource(PostgresProvider.class) |
| 3297 | + void testFlatPostgresCollectionInAndNotInOperators(String dataStoreName) { |
| 3298 | + Datastore datastore = datastoreMap.get(dataStoreName); |
| 3299 | + Collection flatCollection = |
| 3300 | + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); |
| 3301 | + |
| 3302 | + // Test 1: IN operator on _id field |
| 3303 | + // Expected: 3 documents (IDs 1, 3, 5) |
| 3304 | + Query idInQuery = |
| 3305 | + Query.builder() |
| 3306 | + .setFilter( |
| 3307 | + RelationalExpression.of( |
| 3308 | + IdentifierExpression.of("_id"), |
| 3309 | + IN, |
| 3310 | + ConstantExpression.ofNumbers(List.of(1, 3, 5)))) |
| 3311 | + .build(); |
| 3312 | + |
| 3313 | + long idInCount = flatCollection.count(idInQuery); |
| 3314 | + assertEquals(3, idInCount, "IN operator on _id should find 3 documents"); |
| 3315 | + |
| 3316 | + // Test 2: IN operator on item field (string) |
| 3317 | + // Expected: 5 documents (IDs 1, 3, 4 for Shampoo and 1, 5, 8 for Soap) |
| 3318 | + Query itemInQuery = |
| 3319 | + Query.builder() |
| 3320 | + .setFilter( |
| 3321 | + RelationalExpression.of( |
| 3322 | + IdentifierExpression.of("item"), |
| 3323 | + IN, |
| 3324 | + ConstantExpression.ofStrings(List.of("Soap", "Shampoo")))) |
| 3325 | + .build(); |
| 3326 | + |
| 3327 | + long itemInCount = flatCollection.count(itemInQuery); |
| 3328 | + assertEquals( |
| 3329 | + 5, itemInCount, "IN operator on item should find 5 documents (3 Soap + 2 Shampoo)"); |
| 3330 | + |
| 3331 | + // Test 3: IN operator on price field (numeric) |
| 3332 | + // Expected: 5 documents (IDs 1, 8 for price=10 and 3, 4 for price=5) |
| 3333 | + Query priceInQuery = |
| 3334 | + Query.builder() |
| 3335 | + .setFilter( |
| 3336 | + RelationalExpression.of( |
| 3337 | + IdentifierExpression.of("price"), |
| 3338 | + IN, |
| 3339 | + ConstantExpression.ofNumbers(List.of(5, 10)))) |
| 3340 | + .build(); |
| 3341 | + |
| 3342 | + long priceInCount = flatCollection.count(priceInQuery); |
| 3343 | + assertEquals(4, priceInCount, "IN operator on price should find 4 documents"); |
| 3344 | + |
| 3345 | + // Test 4: NOT_IN operator on _id field |
| 3346 | + // Expected: 7 documents (all except IDs 1, 3, 5) |
| 3347 | + Query idNotInQuery = |
| 3348 | + Query.builder() |
| 3349 | + .setFilter( |
| 3350 | + RelationalExpression.of( |
| 3351 | + IdentifierExpression.of("_id"), |
| 3352 | + NOT_IN, |
| 3353 | + ConstantExpression.ofNumbers(List.of(1, 3, 5)))) |
| 3354 | + .build(); |
| 3355 | + |
| 3356 | + long idNotInCount = flatCollection.count(idNotInQuery); |
| 3357 | + assertEquals(7, idNotInCount, "NOT_IN operator on _id should find 7 documents"); |
| 3358 | + |
| 3359 | + // Test 5: NOT_IN operator on item field |
| 3360 | + // Expected: 5 documents (all except Soap items: IDs 2, 3, 4, 6, 7, 9, 10) |
| 3361 | + Query itemNotInQuery = |
| 3362 | + Query.builder() |
| 3363 | + .setFilter( |
| 3364 | + RelationalExpression.of( |
| 3365 | + IdentifierExpression.of("item"), |
| 3366 | + NOT_IN, |
| 3367 | + ConstantExpression.ofStrings(List.of("Soap")))) |
| 3368 | + .build(); |
| 3369 | + |
| 3370 | + long itemNotInCount = flatCollection.count(itemNotInQuery); |
| 3371 | + assertEquals(7, itemNotInCount, "NOT_IN operator on item should find 7 documents"); |
| 3372 | + |
| 3373 | + // Test 6: Combined IN with other filters (AND) |
| 3374 | + // Filter: _id IN (1, 3, 5, 7) AND price >= 10 |
| 3375 | + // Expected: 2 documents (ID 1 with price=10, ID 5 with price=20) |
| 3376 | + Query combinedQuery = |
| 3377 | + Query.builder() |
| 3378 | + .setFilter( |
| 3379 | + LogicalExpression.builder() |
| 3380 | + .operator(LogicalOperator.AND) |
| 3381 | + .operand( |
| 3382 | + RelationalExpression.of( |
| 3383 | + IdentifierExpression.of("_id"), |
| 3384 | + IN, |
| 3385 | + ConstantExpression.ofNumbers(List.of(1, 3, 5, 7)))) |
| 3386 | + .operand( |
| 3387 | + RelationalExpression.of( |
| 3388 | + IdentifierExpression.of("price"), GTE, ConstantExpression.of(10))) |
| 3389 | + .build()) |
| 3390 | + .build(); |
| 3391 | + |
| 3392 | + long combinedCount = flatCollection.count(combinedQuery); |
| 3393 | + assertEquals(2, combinedCount, "Combined IN with >= filter should find 2 documents"); |
| 3394 | + } |
| 3395 | + |
| 3396 | + /** |
| 3397 | + * Tests IN and NOT_IN operators on array fields in flat collections. Array fields use the |
| 3398 | + * PostgreSQL array overlap operator (&&) for IN operations, which checks if the array contains |
| 3399 | + * ANY of the provided values. |
| 3400 | + * |
| 3401 | + * <p>This test is parameterized to test three scenarios: 1. ArrayIdentifierExpression WITH |
| 3402 | + * ArrayType - optimized queries with type-aware casting 2. ArrayIdentifierExpression WITHOUT |
| 3403 | + * ArrayType - fallback with text[] casting both sides 3. IdentifierExpression - backward |
| 3404 | + * compatibility with text[] casting both sides |
| 3405 | + */ |
| 3406 | + @ParameterizedTest |
| 3407 | + @ArgumentsSource(PostgresArrayTypeProvider.class) |
| 3408 | + void testFlatPostgresCollectionInAndNotInOperatorsForArrays( |
| 3409 | + String dataStoreName, String expressionType) { |
| 3410 | + Datastore datastore = datastoreMap.get(dataStoreName); |
| 3411 | + Collection flatCollection = |
| 3412 | + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); |
| 3413 | + |
| 3414 | + String typeDesc = |
| 3415 | + expressionType.equals("WITH_TYPE") |
| 3416 | + ? "WITH ArrayType (optimized)" |
| 3417 | + : "WITHOUT ArrayType (fallback)"; |
| 3418 | + |
| 3419 | + // Test 1: IN operator on tags array field (string array) |
| 3420 | + // Find documents where tags contains "hygiene" OR "grooming" |
| 3421 | + // Expected: IDs 1, 5, 8 (hygiene) + IDs 6, 7 (grooming) = 5 documents |
| 3422 | + Query tagsInQuery = |
| 3423 | + Query.builder() |
| 3424 | + .setFilter( |
| 3425 | + RelationalExpression.of( |
| 3426 | + expressionType.equals("WITH_TYPE") |
| 3427 | + ? ArrayIdentifierExpression.of("tags", ArrayType.TEXT) |
| 3428 | + : ArrayIdentifierExpression.of("tags"), |
| 3429 | + IN, |
| 3430 | + ConstantExpression.ofStrings(List.of("hygiene", "grooming")))) |
| 3431 | + .build(); |
| 3432 | + |
| 3433 | + long tagsInCount = flatCollection.count(tagsInQuery); |
| 3434 | + assertEquals( |
| 3435 | + 5, |
| 3436 | + tagsInCount, |
| 3437 | + String.format( |
| 3438 | + "IN operator on tags array %s should find 5 documents with hygiene or grooming", |
| 3439 | + typeDesc)); |
| 3440 | + |
| 3441 | + // Test 2: IN operator on numbers array field (numeric array) |
| 3442 | + // Find documents where numbers array contains 1 OR 10 |
| 3443 | + // Expected: ID 1 has {1,2,3}, ID 2 has {10,20} = 2 documents |
| 3444 | + Query numbersInQuery = |
| 3445 | + Query.builder() |
| 3446 | + .setFilter( |
| 3447 | + RelationalExpression.of( |
| 3448 | + expressionType.equals("WITH_TYPE") |
| 3449 | + ? ArrayIdentifierExpression.of("numbers", ArrayType.INTEGER) |
| 3450 | + : ArrayIdentifierExpression.of("numbers"), |
| 3451 | + IN, |
| 3452 | + ConstantExpression.ofNumbers(List.of(1, 10)))) |
| 3453 | + .build(); |
| 3454 | + |
| 3455 | + long numbersInCount = flatCollection.count(numbersInQuery); |
| 3456 | + assertEquals( |
| 3457 | + 2, |
| 3458 | + numbersInCount, |
| 3459 | + String.format("IN operator on numbers array %s should find 2 documents", typeDesc)); |
| 3460 | + |
| 3461 | + // Test 3: NOT_IN operator on tags array field |
| 3462 | + // Find documents where tags does NOT contain "hygiene" |
| 3463 | + // Expected: All documents except IDs 1, 5, 8 = 7 documents |
| 3464 | + // Note: This includes NULL tags (ID 9) and empty array (ID 10) |
| 3465 | + Query tagsNotInQuery = |
| 3466 | + Query.builder() |
| 3467 | + .setFilter( |
| 3468 | + RelationalExpression.of( |
| 3469 | + expressionType.equals("WITH_TYPE") |
| 3470 | + ? ArrayIdentifierExpression.of("tags", ArrayType.TEXT) |
| 3471 | + : ArrayIdentifierExpression.of("tags"), |
| 3472 | + NOT_IN, |
| 3473 | + ConstantExpression.ofStrings(List.of("hygiene")))) |
| 3474 | + .build(); |
| 3475 | + |
| 3476 | + long tagsNotInCount = flatCollection.count(tagsNotInQuery); |
| 3477 | + assertEquals( |
| 3478 | + 7, |
| 3479 | + tagsNotInCount, |
| 3480 | + String.format( |
| 3481 | + "NOT_IN operator on tags array %s should find 7 documents without hygiene", |
| 3482 | + typeDesc)); |
| 3483 | + |
| 3484 | + // Test 4: Combined array IN with scalar filter |
| 3485 | + // Find documents where tags contains "premium" AND price >= 5 |
| 3486 | + // Expected: ID 1 (premium, price=10) + ID 3 (premium, price=5) = 2 documents |
| 3487 | + Query combinedArrayQuery = |
| 3488 | + Query.builder() |
| 3489 | + .setFilter( |
| 3490 | + LogicalExpression.builder() |
| 3491 | + .operator(LogicalOperator.AND) |
| 3492 | + .operand( |
| 3493 | + RelationalExpression.of( |
| 3494 | + expressionType.equals("WITH_TYPE") |
| 3495 | + ? ArrayIdentifierExpression.of("tags", ArrayType.TEXT) |
| 3496 | + : ArrayIdentifierExpression.of("tags"), |
| 3497 | + IN, |
| 3498 | + ConstantExpression.ofStrings(List.of("premium")))) |
| 3499 | + .operand( |
| 3500 | + RelationalExpression.of( |
| 3501 | + IdentifierExpression.of("price"), GTE, ConstantExpression.of(5))) |
| 3502 | + .build()) |
| 3503 | + .build(); |
| 3504 | + |
| 3505 | + long combinedArrayCount = flatCollection.count(combinedArrayQuery); |
| 3506 | + assertEquals( |
| 3507 | + 2, |
| 3508 | + combinedArrayCount, |
| 3509 | + String.format("Combined array IN with >= filter %s should find 2 documents", typeDesc)); |
| 3510 | + } |
| 3511 | + |
3270 | 3512 | /** |
3271 | 3513 | * This test is disabled for now because flat collections do not support search on nested |
3272 | 3514 | * queries in JSONB fields (ex. props.brand) |
|
0 commit comments