Skip to content

Commit d411cbc

Browse files
authored
[Postgres] Optimise IN and NOT_IN Queries for Primitive and ARRAY Fields (#251)
1 parent 61e844b commit d411cbc

18 files changed

+1121
-52
lines changed

document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,9 @@
8585
import org.hypertrace.core.documentstore.commons.DocStoreConstants;
8686
import org.hypertrace.core.documentstore.expression.impl.AggregateExpression;
8787
import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression;
88+
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
8889
import org.hypertrace.core.documentstore.expression.impl.ArrayRelationalFilterExpression;
90+
import org.hypertrace.core.documentstore.expression.impl.ArrayType;
8991
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
9092
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
9193
import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression;
@@ -300,6 +302,24 @@ public Stream<Arguments> provideArguments(final ExtensionContext context) {
300302
}
301303
}
302304

305+
/**
306+
* Provides arguments for testing array operations with different expression types. Returns:
307+
* (datastoreName, expressionType) - "WITH_TYPE": ArrayIdentifierExpression WITH ArrayType
308+
* (optimized, type-aware casting) - "WITHOUT_TYPE": ArrayIdentifierExpression WITHOUT ArrayType
309+
* (fallback, text[] casting)
310+
*/
311+
private static class PostgresArrayTypeProvider implements ArgumentsProvider {
312+
313+
@Override
314+
public Stream<Arguments> provideArguments(final ExtensionContext context) {
315+
return Stream.of(
316+
Arguments.of(POSTGRES_STORE, "WITH_TYPE"), // ArrayIdentifierExpression WITH ArrayType
317+
Arguments.of(
318+
POSTGRES_STORE, "WITHOUT_TYPE") // ArrayIdentifierExpression WITHOUT ArrayType
319+
);
320+
}
321+
}
322+
303323
@ParameterizedTest
304324
@ArgumentsSource(AllProvider.class)
305325
public void testFindAll(String dataStoreName) throws IOException {
@@ -3267,6 +3287,228 @@ void testFlatPostgresCollectionCount(String dataStoreName) {
32673287
assertEquals(3, soapCountQuery);
32683288
}
32693289

3290+
/**
3291+
* Tests IN and NOT_IN operators on primitive (non-JSON) fields in flat collections. These
3292+
* operators should use simple SQL IN clause instead of array overlap operator for optimal index
3293+
* usage.
3294+
*/
3295+
@ParameterizedTest
3296+
@ArgumentsSource(PostgresProvider.class)
3297+
void testFlatPostgresCollectionInAndNotInOperators(String dataStoreName) {
3298+
Datastore datastore = datastoreMap.get(dataStoreName);
3299+
Collection flatCollection =
3300+
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);
3301+
3302+
// Test 1: IN operator on _id field
3303+
// Expected: 3 documents (IDs 1, 3, 5)
3304+
Query idInQuery =
3305+
Query.builder()
3306+
.setFilter(
3307+
RelationalExpression.of(
3308+
IdentifierExpression.of("_id"),
3309+
IN,
3310+
ConstantExpression.ofNumbers(List.of(1, 3, 5))))
3311+
.build();
3312+
3313+
long idInCount = flatCollection.count(idInQuery);
3314+
assertEquals(3, idInCount, "IN operator on _id should find 3 documents");
3315+
3316+
// Test 2: IN operator on item field (string)
3317+
// Expected: 5 documents (IDs 1, 3, 4 for Shampoo and 1, 5, 8 for Soap)
3318+
Query itemInQuery =
3319+
Query.builder()
3320+
.setFilter(
3321+
RelationalExpression.of(
3322+
IdentifierExpression.of("item"),
3323+
IN,
3324+
ConstantExpression.ofStrings(List.of("Soap", "Shampoo"))))
3325+
.build();
3326+
3327+
long itemInCount = flatCollection.count(itemInQuery);
3328+
assertEquals(
3329+
5, itemInCount, "IN operator on item should find 5 documents (3 Soap + 2 Shampoo)");
3330+
3331+
// Test 3: IN operator on price field (numeric)
3332+
// Expected: 5 documents (IDs 1, 8 for price=10 and 3, 4 for price=5)
3333+
Query priceInQuery =
3334+
Query.builder()
3335+
.setFilter(
3336+
RelationalExpression.of(
3337+
IdentifierExpression.of("price"),
3338+
IN,
3339+
ConstantExpression.ofNumbers(List.of(5, 10))))
3340+
.build();
3341+
3342+
long priceInCount = flatCollection.count(priceInQuery);
3343+
assertEquals(4, priceInCount, "IN operator on price should find 4 documents");
3344+
3345+
// Test 4: NOT_IN operator on _id field
3346+
// Expected: 7 documents (all except IDs 1, 3, 5)
3347+
Query idNotInQuery =
3348+
Query.builder()
3349+
.setFilter(
3350+
RelationalExpression.of(
3351+
IdentifierExpression.of("_id"),
3352+
NOT_IN,
3353+
ConstantExpression.ofNumbers(List.of(1, 3, 5))))
3354+
.build();
3355+
3356+
long idNotInCount = flatCollection.count(idNotInQuery);
3357+
assertEquals(7, idNotInCount, "NOT_IN operator on _id should find 7 documents");
3358+
3359+
// Test 5: NOT_IN operator on item field
3360+
// Expected: 5 documents (all except Soap items: IDs 2, 3, 4, 6, 7, 9, 10)
3361+
Query itemNotInQuery =
3362+
Query.builder()
3363+
.setFilter(
3364+
RelationalExpression.of(
3365+
IdentifierExpression.of("item"),
3366+
NOT_IN,
3367+
ConstantExpression.ofStrings(List.of("Soap"))))
3368+
.build();
3369+
3370+
long itemNotInCount = flatCollection.count(itemNotInQuery);
3371+
assertEquals(7, itemNotInCount, "NOT_IN operator on item should find 7 documents");
3372+
3373+
// Test 6: Combined IN with other filters (AND)
3374+
// Filter: _id IN (1, 3, 5, 7) AND price >= 10
3375+
// Expected: 2 documents (ID 1 with price=10, ID 5 with price=20)
3376+
Query combinedQuery =
3377+
Query.builder()
3378+
.setFilter(
3379+
LogicalExpression.builder()
3380+
.operator(LogicalOperator.AND)
3381+
.operand(
3382+
RelationalExpression.of(
3383+
IdentifierExpression.of("_id"),
3384+
IN,
3385+
ConstantExpression.ofNumbers(List.of(1, 3, 5, 7))))
3386+
.operand(
3387+
RelationalExpression.of(
3388+
IdentifierExpression.of("price"), GTE, ConstantExpression.of(10)))
3389+
.build())
3390+
.build();
3391+
3392+
long combinedCount = flatCollection.count(combinedQuery);
3393+
assertEquals(2, combinedCount, "Combined IN with >= filter should find 2 documents");
3394+
}
3395+
3396+
/**
3397+
* Tests IN and NOT_IN operators on array fields in flat collections. Array fields use the
3398+
* PostgreSQL array overlap operator (&&) for IN operations, which checks if the array contains
3399+
* ANY of the provided values.
3400+
*
3401+
* <p>This test is parameterized to test three scenarios: 1. ArrayIdentifierExpression WITH
3402+
* ArrayType - optimized queries with type-aware casting 2. ArrayIdentifierExpression WITHOUT
3403+
* ArrayType - fallback with text[] casting both sides 3. IdentifierExpression - backward
3404+
* compatibility with text[] casting both sides
3405+
*/
3406+
@ParameterizedTest
3407+
@ArgumentsSource(PostgresArrayTypeProvider.class)
3408+
void testFlatPostgresCollectionInAndNotInOperatorsForArrays(
3409+
String dataStoreName, String expressionType) {
3410+
Datastore datastore = datastoreMap.get(dataStoreName);
3411+
Collection flatCollection =
3412+
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);
3413+
3414+
String typeDesc =
3415+
expressionType.equals("WITH_TYPE")
3416+
? "WITH ArrayType (optimized)"
3417+
: "WITHOUT ArrayType (fallback)";
3418+
3419+
// Test 1: IN operator on tags array field (string array)
3420+
// Find documents where tags contains "hygiene" OR "grooming"
3421+
// Expected: IDs 1, 5, 8 (hygiene) + IDs 6, 7 (grooming) = 5 documents
3422+
Query tagsInQuery =
3423+
Query.builder()
3424+
.setFilter(
3425+
RelationalExpression.of(
3426+
expressionType.equals("WITH_TYPE")
3427+
? ArrayIdentifierExpression.of("tags", ArrayType.TEXT)
3428+
: ArrayIdentifierExpression.of("tags"),
3429+
IN,
3430+
ConstantExpression.ofStrings(List.of("hygiene", "grooming"))))
3431+
.build();
3432+
3433+
long tagsInCount = flatCollection.count(tagsInQuery);
3434+
assertEquals(
3435+
5,
3436+
tagsInCount,
3437+
String.format(
3438+
"IN operator on tags array %s should find 5 documents with hygiene or grooming",
3439+
typeDesc));
3440+
3441+
// Test 2: IN operator on numbers array field (numeric array)
3442+
// Find documents where numbers array contains 1 OR 10
3443+
// Expected: ID 1 has {1,2,3}, ID 2 has {10,20} = 2 documents
3444+
Query numbersInQuery =
3445+
Query.builder()
3446+
.setFilter(
3447+
RelationalExpression.of(
3448+
expressionType.equals("WITH_TYPE")
3449+
? ArrayIdentifierExpression.of("numbers", ArrayType.INTEGER)
3450+
: ArrayIdentifierExpression.of("numbers"),
3451+
IN,
3452+
ConstantExpression.ofNumbers(List.of(1, 10))))
3453+
.build();
3454+
3455+
long numbersInCount = flatCollection.count(numbersInQuery);
3456+
assertEquals(
3457+
2,
3458+
numbersInCount,
3459+
String.format("IN operator on numbers array %s should find 2 documents", typeDesc));
3460+
3461+
// Test 3: NOT_IN operator on tags array field
3462+
// Find documents where tags does NOT contain "hygiene"
3463+
// Expected: All documents except IDs 1, 5, 8 = 7 documents
3464+
// Note: This includes NULL tags (ID 9) and empty array (ID 10)
3465+
Query tagsNotInQuery =
3466+
Query.builder()
3467+
.setFilter(
3468+
RelationalExpression.of(
3469+
expressionType.equals("WITH_TYPE")
3470+
? ArrayIdentifierExpression.of("tags", ArrayType.TEXT)
3471+
: ArrayIdentifierExpression.of("tags"),
3472+
NOT_IN,
3473+
ConstantExpression.ofStrings(List.of("hygiene"))))
3474+
.build();
3475+
3476+
long tagsNotInCount = flatCollection.count(tagsNotInQuery);
3477+
assertEquals(
3478+
7,
3479+
tagsNotInCount,
3480+
String.format(
3481+
"NOT_IN operator on tags array %s should find 7 documents without hygiene",
3482+
typeDesc));
3483+
3484+
// Test 4: Combined array IN with scalar filter
3485+
// Find documents where tags contains "premium" AND price >= 5
3486+
// Expected: ID 1 (premium, price=10) + ID 3 (premium, price=5) = 2 documents
3487+
Query combinedArrayQuery =
3488+
Query.builder()
3489+
.setFilter(
3490+
LogicalExpression.builder()
3491+
.operator(LogicalOperator.AND)
3492+
.operand(
3493+
RelationalExpression.of(
3494+
expressionType.equals("WITH_TYPE")
3495+
? ArrayIdentifierExpression.of("tags", ArrayType.TEXT)
3496+
: ArrayIdentifierExpression.of("tags"),
3497+
IN,
3498+
ConstantExpression.ofStrings(List.of("premium"))))
3499+
.operand(
3500+
RelationalExpression.of(
3501+
IdentifierExpression.of("price"), GTE, ConstantExpression.of(5)))
3502+
.build())
3503+
.build();
3504+
3505+
long combinedArrayCount = flatCollection.count(combinedArrayQuery);
3506+
assertEquals(
3507+
2,
3508+
combinedArrayCount,
3509+
String.format("Combined array IN with >= filter %s should find 2 documents", typeDesc));
3510+
}
3511+
32703512
/**
32713513
* This test is disabled for now because flat collections do not support search on nested
32723514
* queries in JSONB fields (ex. props.brand)

document-store/src/main/java/org/hypertrace/core/documentstore/expression/impl/ArrayIdentifierExpression.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package org.hypertrace.core.documentstore.expression.impl;
22

3+
import java.util.Optional;
34
import lombok.EqualsAndHashCode;
5+
import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor;
46

57
/**
68
* Represents an identifier expression for array-typed fields. This allows parsers to apply
@@ -12,11 +14,36 @@
1214
@EqualsAndHashCode(callSuper = true)
1315
public class ArrayIdentifierExpression extends IdentifierExpression {
1416

17+
private final ArrayType arrayType;
18+
1519
public ArrayIdentifierExpression(String name) {
20+
this(name, null);
21+
}
22+
23+
public ArrayIdentifierExpression(String name, ArrayType arrayType) {
1624
super(name);
25+
this.arrayType = arrayType;
1726
}
1827

1928
public static ArrayIdentifierExpression of(String name) {
2029
return new ArrayIdentifierExpression(name);
2130
}
31+
32+
public static ArrayIdentifierExpression of(String name, ArrayType arrayType) {
33+
return new ArrayIdentifierExpression(name, arrayType);
34+
}
35+
36+
/** Returns the array type if specified, empty otherwise */
37+
public Optional<ArrayType> getArrayType() {
38+
return Optional.ofNullable(arrayType);
39+
}
40+
41+
/**
42+
* Accepts a SelectTypeExpressionVisitor and dispatches to the ArrayIdentifierExpression-specific
43+
* visit method.
44+
*/
45+
@Override
46+
public <T> T accept(final SelectTypeExpressionVisitor visitor) {
47+
return visitor.visit(this);
48+
}
2249
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package org.hypertrace.core.documentstore.expression.impl;
2+
3+
import lombok.Getter;
4+
5+
public enum ArrayType {
6+
TEXT("text[]"),
7+
INTEGER("integer[]"),
8+
BOOLEAN("boolean[]"),
9+
DOUBLE_PRECISION("double precision[]");
10+
11+
@Getter private final String postgresType;
12+
13+
ArrayType(String postgresType) {
14+
this.postgresType = postgresType;
15+
}
16+
}

document-store/src/main/java/org/hypertrace/core/documentstore/expression/impl/JsonIdentifierExpression.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import lombok.EqualsAndHashCode;
55
import lombok.Getter;
66
import org.hypertrace.core.documentstore.parser.FieldTransformationVisitor;
7+
import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor;
78
import org.hypertrace.core.documentstore.postgres.utils.BasicPostgresSecurityValidator;
89

910
/**
@@ -68,6 +69,15 @@ public <T> T accept(final FieldTransformationVisitor<T> visitor) {
6869
return visitor.visit(this);
6970
}
7071

72+
/**
73+
* Accepts a SelectTypeExpressionVisitor and dispatches to the JsonIdentifierExpression-specific
74+
* visit method.
75+
*/
76+
@Override
77+
public <T> T accept(final SelectTypeExpressionVisitor visitor) {
78+
return visitor.visit(this);
79+
}
80+
7181
@Override
7282
public String toString() {
7383
return String.format(

document-store/src/main/java/org/hypertrace/core/documentstore/parser/SelectTypeExpressionVisitor.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33
import org.hypertrace.core.documentstore.expression.impl.AggregateExpression;
44
import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression;
5+
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
56
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
67
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression.DocumentConstantExpression;
78
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
89
import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression;
10+
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
911

1012
public interface SelectTypeExpressionVisitor {
1113
<T> T visit(final AggregateExpression expression);
@@ -19,4 +21,20 @@ public interface SelectTypeExpressionVisitor {
1921
<T> T visit(final IdentifierExpression expression);
2022

2123
<T> T visit(final AliasedIdentifierExpression expression);
24+
25+
/**
26+
* Visit an ArrayIdentifierExpression. Default implementation delegates to
27+
* visit(IdentifierExpression) since ArrayIdentifierExpression extends IdentifierExpression.
28+
*/
29+
default <T> T visit(final ArrayIdentifierExpression expression) {
30+
return visit((IdentifierExpression) expression);
31+
}
32+
33+
/**
34+
* Visit a JsonIdentifierExpression. Default implementation delegates to
35+
* visit(IdentifierExpression) since JsonIdentifierExpression extends IdentifierExpression.
36+
*/
37+
default <T> T visit(final JsonIdentifierExpression expression) {
38+
return visit((IdentifierExpression) expression);
39+
}
2240
}

0 commit comments

Comments
 (0)