Skip to content

Commit c7bb6ad

Browse files
authored
Optimise IN queries for json fields in flat collections (#252)
1 parent d411cbc commit c7bb6ad

File tree

11 files changed

+460
-502
lines changed

11 files changed

+460
-502
lines changed

document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java

Lines changed: 160 additions & 183 deletions
Large diffs are not rendered by default.

document-store/src/main/java/org/hypertrace/core/documentstore/expression/impl/JsonArrayIdentifierExpression.java

Lines changed: 0 additions & 50 deletions
This file was deleted.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package org.hypertrace.core.documentstore.expression.impl;
2+
3+
/** Represents the type of JSON fields in flat collections */
4+
public enum JsonFieldType {
5+
STRING,
6+
NUMBER,
7+
BOOLEAN,
8+
STRING_ARRAY,
9+
NUMBER_ARRAY,
10+
BOOLEAN_ARRAY,
11+
OBJECT_ARRAY,
12+
OBJECT
13+
}

document-store/src/main/java/org/hypertrace/core/documentstore/expression/impl/JsonIdentifierExpression.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.hypertrace.core.documentstore.expression.impl;
22

33
import java.util.List;
4+
import java.util.Optional;
45
import lombok.EqualsAndHashCode;
56
import lombok.Getter;
67
import org.hypertrace.core.documentstore.parser.FieldTransformationVisitor;
@@ -20,6 +21,7 @@ public class JsonIdentifierExpression extends IdentifierExpression {
2021

2122
String columnName; // e.g., "customAttr" (the top-level JSONB column)
2223
List<String> jsonPath; // e.g., ["myAttribute", "nestedField"]
24+
JsonFieldType fieldType; // Optional: PRIMITIVE or ARRAY for optimization
2325

2426
public static JsonIdentifierExpression of(final String columnName) {
2527
throw new IllegalArgumentException(
@@ -34,7 +36,20 @@ public static JsonIdentifierExpression of(final String columnName, final String.
3436
return of(columnName, List.of(pathElements));
3537
}
3638

39+
public static JsonIdentifierExpression of(
40+
final String columnName, final JsonFieldType fieldType, final String... pathElements) {
41+
if (pathElements == null || pathElements.length == 0) {
42+
throw new IllegalArgumentException("JSON path cannot be null or empty");
43+
}
44+
return of(columnName, fieldType, List.of(pathElements));
45+
}
46+
3747
public static JsonIdentifierExpression of(final String columnName, final List<String> jsonPath) {
48+
return of(columnName, null, jsonPath);
49+
}
50+
51+
public static JsonIdentifierExpression of(
52+
final String columnName, final JsonFieldType fieldType, final List<String> jsonPath) {
3853
BasicPostgresSecurityValidator.getDefault().validateIdentifier(columnName);
3954

4055
if (jsonPath == null || jsonPath.isEmpty()) {
@@ -47,13 +62,20 @@ public static JsonIdentifierExpression of(final String columnName, final List<St
4762

4863
// Construct full name for compatibility: "customAttr.myAttribute"
4964
String fullName = columnName + "." + String.join(".", unmodifiablePath);
50-
return new JsonIdentifierExpression(fullName, columnName, unmodifiablePath);
65+
return new JsonIdentifierExpression(fullName, columnName, unmodifiablePath, fieldType);
5166
}
5267

53-
protected JsonIdentifierExpression(String name, String columnName, List<String> jsonPath) {
68+
protected JsonIdentifierExpression(
69+
String name, String columnName, List<String> jsonPath, JsonFieldType fieldType) {
5470
super(name);
5571
this.columnName = columnName;
5672
this.jsonPath = jsonPath;
73+
this.fieldType = fieldType;
74+
}
75+
76+
/** Returns the JSON field type if specified, empty otherwise */
77+
public Optional<JsonFieldType> getFieldType() {
78+
return Optional.ofNullable(fieldType);
5779
}
5880

5981
/**

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresInParserSelector.java

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,21 @@
77
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression.DocumentConstantExpression;
88
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
99
import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression;
10+
import org.hypertrace.core.documentstore.expression.impl.JsonFieldType;
1011
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
1112
import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor;
1213
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.nonjson.field.PostgresInRelationalFilterParserArrayField;
1314
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.nonjson.field.PostgresInRelationalFilterParserScalarField;
1415

1516
class PostgresInParserSelector implements SelectTypeExpressionVisitor {
1617

18+
// Parsers for different expression types
1719
private static final PostgresInRelationalFilterParserInterface jsonFieldInFilterParser =
18-
new PostgresInRelationalFilterParser();
20+
new PostgresInRelationalFilterParser(); // Fallback for JSON without type info
21+
private static final PostgresInRelationalFilterParserInterface jsonPrimitiveInFilterParser =
22+
new PostgresInRelationalFilterParserJsonPrimitive(); // Optimized for JSON primitives
23+
private static final PostgresInRelationalFilterParserInterface jsonArrayInFilterParser =
24+
new PostgresInRelationalFilterParserJsonArray(); // Optimized for JSON arrays
1925
private static final PostgresInRelationalFilterParserInterface scalarFieldInFilterParser =
2026
new PostgresInRelationalFilterParserScalarField();
2127
private static final PostgresInRelationalFilterParserInterface arrayFieldInFilterParser =
@@ -29,7 +35,28 @@ class PostgresInParserSelector implements SelectTypeExpressionVisitor {
2935

3036
@Override
3137
public PostgresInRelationalFilterParserInterface visit(JsonIdentifierExpression expression) {
32-
return jsonFieldInFilterParser;
38+
// JsonFieldType is required for optimized SQL generation
39+
JsonFieldType fieldType = getFieldType(expression);
40+
41+
switch (fieldType) {
42+
case STRING:
43+
case NUMBER:
44+
case BOOLEAN:
45+
// Primitives: use ->> (extract as text) with appropriate casting
46+
return jsonPrimitiveInFilterParser;
47+
case STRING_ARRAY:
48+
case NUMBER_ARRAY:
49+
case BOOLEAN_ARRAY:
50+
case OBJECT_ARRAY:
51+
// Typed arrays: use -> with @> and typed jsonb_build_array
52+
return jsonArrayInFilterParser;
53+
case OBJECT:
54+
// Objects: use -> with @> (future: needs separate parser)
55+
throw new UnsupportedOperationException(
56+
"IN operator on OBJECT type is not yet supported. Use primitive or array types.");
57+
default:
58+
throw new IllegalArgumentException("Unsupported JsonFieldType: " + fieldType);
59+
}
3360
}
3461

3562
@Override
@@ -68,4 +95,14 @@ public PostgresInRelationalFilterParserInterface visit(FunctionExpression expres
6895
public PostgresInRelationalFilterParserInterface visit(AliasedIdentifierExpression expression) {
6996
return isFlatCollection ? scalarFieldInFilterParser : jsonFieldInFilterParser;
7097
}
98+
99+
private static JsonFieldType getFieldType(JsonIdentifierExpression expression) {
100+
return expression
101+
.getFieldType()
102+
.orElseThrow(
103+
() ->
104+
new IllegalArgumentException(
105+
"JsonFieldType must be specified for JsonIdentifierExpression in IN operations. "
106+
+ "Use JsonIdentifierExpression.of(column, JsonFieldType.*, path...)"));
107+
}
71108
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter;
2+
3+
import java.util.stream.Collectors;
4+
import java.util.stream.StreamSupport;
5+
import org.hypertrace.core.documentstore.expression.impl.JsonFieldType;
6+
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
7+
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
8+
import org.hypertrace.core.documentstore.postgres.Params;
9+
10+
/**
11+
* Optimized parser for IN operations on JSON array fields with type-specific casting.
12+
*
13+
* <p>Uses JSONB containment operator (@>) with typed jsonb_build_array for "contains any"
14+
* semantics:
15+
*
16+
* <ul>
17+
* <li><b>STRING_ARRAY:</b> {@code "document" -> 'tags' @> jsonb_build_array(?::text)}
18+
* <li><b>NUMBER_ARRAY:</b> {@code "document" -> 'scores' @> jsonb_build_array(?::numeric)}
19+
* <li><b>BOOLEAN_ARRAY:</b> {@code "document" -> 'flags' @> jsonb_build_array(?::boolean)}
20+
* <li><b>OBJECT_ARRAY:</b> {@code "document" -> 'items' @> jsonb_build_array(?::jsonb)}
21+
* </ul>
22+
*
23+
* <p>This checks if the JSON array contains ANY of the provided values, using efficient JSONB
24+
* containment instead of defensive type checking.
25+
*/
26+
public class PostgresInRelationalFilterParserJsonArray
27+
implements PostgresInRelationalFilterParserInterface {
28+
29+
@Override
30+
public String parse(
31+
final RelationalExpression expression, final PostgresRelationalFilterContext context) {
32+
final String parsedLhs = expression.getLhs().accept(context.lhsParser());
33+
final Iterable<Object> parsedRhs = expression.getRhs().accept(context.rhsParser());
34+
35+
// Extract field type for typed array handling (guaranteed to be present by selector)
36+
JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs();
37+
JsonFieldType fieldType =
38+
jsonExpr
39+
.getFieldType()
40+
.orElseThrow(
41+
() ->
42+
new IllegalStateException(
43+
"JsonFieldType must be present - this should have been caught by the selector"));
44+
45+
return prepareFilterStringForInOperator(
46+
parsedLhs, parsedRhs, fieldType, context.getParamsBuilder());
47+
}
48+
49+
private String prepareFilterStringForInOperator(
50+
final String parsedLhs,
51+
final Iterable<Object> parsedRhs,
52+
final JsonFieldType fieldType,
53+
final Params.Builder paramsBuilder) {
54+
55+
// Determine the appropriate type cast for jsonb_build_array elements
56+
String typeCast = getTypeCastForArray(fieldType);
57+
58+
// For JSON arrays, we use the @> containment operator
59+
// Check if ANY of the RHS values is contained in the LHS array
60+
String orConditions =
61+
StreamSupport.stream(parsedRhs.spliterator(), false)
62+
.map(
63+
value -> {
64+
paramsBuilder.addObjectParam(value);
65+
return String.format("%s @> jsonb_build_array(?%s)", parsedLhs, typeCast);
66+
})
67+
.collect(Collectors.joining(" OR "));
68+
69+
// Wrap in parentheses if multiple conditions
70+
return StreamSupport.stream(parsedRhs.spliterator(), false).count() > 1
71+
? String.format("(%s)", orConditions)
72+
: orConditions;
73+
}
74+
75+
/**
76+
* Returns the PostgreSQL type cast string for jsonb_build_array elements based on array type.
77+
*
78+
* @param fieldType The JSON field type (must not be null)
79+
* @return Type cast string (e.g., "::text", "::numeric")
80+
*/
81+
private String getTypeCastForArray(JsonFieldType fieldType) {
82+
switch (fieldType) {
83+
case STRING_ARRAY:
84+
return "::text";
85+
case NUMBER_ARRAY:
86+
return "::numeric";
87+
case BOOLEAN_ARRAY:
88+
return "::boolean";
89+
case OBJECT_ARRAY:
90+
return "::jsonb";
91+
default:
92+
throw new IllegalArgumentException(
93+
"Unsupported array type: " + fieldType + ". Expected *_ARRAY types.");
94+
}
95+
}
96+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter;
2+
3+
import java.util.stream.Collectors;
4+
import java.util.stream.StreamSupport;
5+
import org.hypertrace.core.documentstore.expression.impl.JsonFieldType;
6+
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
7+
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
8+
import org.hypertrace.core.documentstore.postgres.Params;
9+
10+
/**
11+
* Optimized parser for IN operations on JSON primitive fields (string, number, boolean) with proper
12+
* type casting.
13+
*
14+
* <p>Generates efficient SQL using {@code ->>} operator with appropriate PostgreSQL casting:
15+
*
16+
* <ul>
17+
* <li><b>STRING:</b> {@code "document" ->> 'item' IN ('Soap', 'Shampoo')}
18+
* <li><b>NUMBER:</b> {@code CAST("document" ->> 'price' AS NUMERIC) IN (10, 20)}
19+
* <li><b>BOOLEAN:</b> {@code CAST("document" ->> 'active' AS BOOLEAN) IN (true, false)}
20+
* </ul>
21+
*
22+
* <p>This is much more efficient than the defensive approach that checks both array and scalar
23+
* types, and ensures correct type comparisons.
24+
*/
25+
public class PostgresInRelationalFilterParserJsonPrimitive
26+
implements PostgresInRelationalFilterParserInterface {
27+
28+
@Override
29+
public String parse(
30+
final RelationalExpression expression, final PostgresRelationalFilterContext context) {
31+
String parsedLhs = expression.getLhs().accept(context.lhsParser());
32+
final Iterable<Object> parsedRhs = expression.getRhs().accept(context.rhsParser());
33+
34+
// Extract field type for proper casting (guaranteed to be present by selector)
35+
JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs();
36+
JsonFieldType fieldType =
37+
jsonExpr
38+
.getFieldType()
39+
.orElseThrow(
40+
() ->
41+
new IllegalStateException(
42+
"JsonFieldType must be present - this should have been caught by the selector"));
43+
44+
// For JSON primitives, we need ->> (text extraction) instead of -> (jsonb extraction)
45+
// The LHS parser generates: "props"->'brand' (returns JSONB)
46+
// We need: "props"->>'brand' (returns TEXT)
47+
// Replace the last -> with ->> for primitive type extraction
48+
int lastArrowIndex = parsedLhs.lastIndexOf("->");
49+
if (lastArrowIndex != -1) {
50+
parsedLhs =
51+
parsedLhs.substring(0, lastArrowIndex) + "->>" + parsedLhs.substring(lastArrowIndex + 2);
52+
}
53+
54+
return prepareFilterStringForInOperator(
55+
parsedLhs, parsedRhs, fieldType, context.getParamsBuilder());
56+
}
57+
58+
private String prepareFilterStringForInOperator(
59+
final String parsedLhs,
60+
final Iterable<Object> parsedRhs,
61+
final JsonFieldType fieldType,
62+
final Params.Builder paramsBuilder) {
63+
64+
String placeholders =
65+
StreamSupport.stream(parsedRhs.spliterator(), false)
66+
.map(
67+
value -> {
68+
paramsBuilder.addObjectParam(value);
69+
return "?";
70+
})
71+
.collect(Collectors.joining(", "));
72+
73+
// Apply appropriate casting based on field type
74+
String lhsWithCast = parsedLhs;
75+
if (fieldType == JsonFieldType.NUMBER) {
76+
lhsWithCast = String.format("CAST(%s AS NUMERIC)", parsedLhs);
77+
} else if (fieldType == JsonFieldType.BOOLEAN) {
78+
lhsWithCast = String.format("CAST(%s AS BOOLEAN)", parsedLhs);
79+
}
80+
// STRING or null fieldType: no casting needed
81+
82+
return String.format("%s IN (%s)", lhsWithCast, placeholders);
83+
}
84+
}

0 commit comments

Comments
 (0)