Skip to content

Commit a02a279

Browse files
authored
Support GROUP BY on Nested JSONB Fields in Flat Postgres Collections (#247)
1 parent 4192bde commit a02a279

File tree

5 files changed

+180
-3
lines changed

5 files changed

+180
-3
lines changed

document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3982,6 +3982,56 @@ void testFlatVsNestedCollectionNestedFieldSelections(String dataStoreName) throw
39823982
dataStoreName, flatBrandNoAliasIterator, "query/no_alias_response.json", 8);
39833983
}
39843984

3985+
/**
3986+
* Tests that GROUP BY with UNNEST on JSONB array fields produces consistent results across
3987+
* nested and flat collections. This validates that both collection types properly unnest arrays
3988+
* and group by individual elements (not entire arrays).
3989+
*/
3990+
@ParameterizedTest
3991+
@ArgumentsSource(PostgresProvider.class)
3992+
void testFlatVsNestedCollectionGroupByArrayField(String dataStoreName) throws IOException {
3993+
Datastore datastore = datastoreMap.get(dataStoreName);
3994+
3995+
Collection nestedCollection = datastore.getCollection(COLLECTION_NAME);
3996+
Collection flatCollection =
3997+
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);
3998+
3999+
// Nested collection: GROUP BY with UNNEST on props.colors array
4000+
// Uses dot notation for nested collections
4001+
Query nestedGroupByQuery =
4002+
Query.builder()
4003+
.addSelection(IdentifierExpression.of("props.colors"), "color")
4004+
.addSelection(AggregateExpression.of(COUNT, ConstantExpression.of(1)), "count")
4005+
.addFromClause(UnnestExpression.of(IdentifierExpression.of("props.colors"), false))
4006+
.addAggregation(IdentifierExpression.of("props.colors"))
4007+
.addSort(IdentifierExpression.of("props.colors"), ASC)
4008+
.build();
4009+
4010+
// Flat collection: GROUP BY with UNNEST on props.colors array
4011+
// Uses JsonIdentifierExpression for JSONB columns
4012+
Query flatGroupByQuery =
4013+
Query.builder()
4014+
.addSelection(JsonIdentifierExpression.of("props", "colors"), "color")
4015+
.addSelection(AggregateExpression.of(COUNT, ConstantExpression.of(1)), "count")
4016+
.addFromClause(
4017+
UnnestExpression.of(JsonIdentifierExpression.of("props", "colors"), false))
4018+
.addAggregation(JsonIdentifierExpression.of("props", "colors"))
4019+
.addSort(JsonIdentifierExpression.of("props", "colors"), ASC)
4020+
.build();
4021+
4022+
// Execute queries
4023+
Iterator<Document> nestedResultIterator = nestedCollection.aggregate(nestedGroupByQuery);
4024+
Iterator<Document> flatResultIterator = flatCollection.aggregate(flatGroupByQuery);
4025+
4026+
// Both should produce the same results: grouping by individual color elements
4027+
// Expected: Black (1), Blue (2), Green (1), Orange (1)
4028+
assertDocsAndSizeEqualWithoutOrder(
4029+
dataStoreName, nestedResultIterator, "query/group_by_colors_comparison_response.json", 4);
4030+
4031+
assertDocsAndSizeEqualWithoutOrder(
4032+
dataStoreName, flatResultIterator, "query/group_by_colors_comparison_response.json", 4);
4033+
}
4034+
39854035
/**
39864036
* Tests UNNEST operation on JSONB array fields in flat collections. This validates that
39874037
* jsonb_array_elements() is used for JSONB arrays (props.colors) instead of unnest() which is
@@ -4019,6 +4069,57 @@ void testFlatCollectionUnnestJsonbArray(String dataStoreName) throws IOException
40194069
assertEquals(5, count, "Should find 5 color entries after unnesting JSONB arrays");
40204070
}
40214071

4072+
@ParameterizedTest
4073+
@ArgumentsSource(PostgresProvider.class)
4074+
void testFlatCollectionGroupByJsonbScalarField(String dataStoreName) throws IOException {
4075+
Datastore datastore = datastoreMap.get(dataStoreName);
4076+
Collection flatCollection =
4077+
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);
4078+
4079+
// Test GROUP BY on JSONB scalar field: props.brand
4080+
// This tests grouping by a nested string field in a JSONB column
4081+
// Data: 3 rows have brands (Dettol, Sunsilk, Lifebuoy), 7 rows have NULL/missing brand
4082+
// GROUP BY on JSONB fields groups NULL values together (standard SQL behavior)
4083+
Query groupByBrandQuery =
4084+
Query.builder()
4085+
.addSelection(JsonIdentifierExpression.of("props", "brand"))
4086+
.addSelection(AggregateExpression.of(COUNT, ConstantExpression.of(1)), "count")
4087+
.addAggregation(JsonIdentifierExpression.of("props", "brand"))
4088+
.addSort(JsonIdentifierExpression.of("props", "brand"), ASC)
4089+
.build();
4090+
4091+
Iterator<Document> resultIterator = flatCollection.aggregate(groupByBrandQuery);
4092+
assertDocsAndSizeEqualWithoutOrder(
4093+
dataStoreName, resultIterator, "query/flat_jsonb_group_by_brand_test_response.json", 4);
4094+
}
4095+
4096+
@ParameterizedTest
4097+
@ArgumentsSource(PostgresProvider.class)
4098+
void testFlatCollectionGroupByJsonbArrayField(String dataStoreName) throws IOException {
4099+
Datastore datastore = datastoreMap.get(dataStoreName);
4100+
Collection flatCollection =
4101+
datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT);
4102+
4103+
// Test GROUP BY on JSONB array field: props.colors with UNNEST
4104+
// This tests grouping by individual elements (after unnesting) in a JSONB array
4105+
// Behavior should match nested collections: UNNEST flattens array, GROUP BY groups elements
4106+
// Data: Row 1 has ["Blue", "Green"], Row 3 has ["Black"], Row 5 has ["Orange", "Blue"]
4107+
// Expected: Blue (2), Green (1), Black (1), Orange (1) - 4 distinct color groups
4108+
Query groupByColorsQuery =
4109+
Query.builder()
4110+
.addSelection(JsonIdentifierExpression.of("props", "colors"), "color")
4111+
.addSelection(AggregateExpression.of(COUNT, ConstantExpression.of(1)), "count")
4112+
.addFromClause(
4113+
UnnestExpression.of(JsonIdentifierExpression.of("props", "colors"), false))
4114+
.addAggregation(JsonIdentifierExpression.of("props", "colors"))
4115+
.addSort(JsonIdentifierExpression.of("props", "colors"), ASC)
4116+
.build();
4117+
4118+
Iterator<Document> resultIterator = flatCollection.aggregate(groupByColorsQuery);
4119+
assertDocsAndSizeEqualWithoutOrder(
4120+
dataStoreName, resultIterator, "query/flat_jsonb_group_by_colors_test_response.json", 4);
4121+
}
4122+
40224123
@ParameterizedTest
40234124
@ArgumentsSource(PostgresProvider.class)
40244125
void testFlatCollectionArrayAnyOnJsonbArray(String dataStoreName) {
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
[
2+
{
3+
"count": 7
4+
},
5+
{
6+
"props": {
7+
"brand": "Dettol"
8+
},
9+
"count": 1
10+
},
11+
{
12+
"props": {
13+
"brand": "Lifebuoy"
14+
},
15+
"count": 1
16+
},
17+
{
18+
"props": {
19+
"brand": "Sunsilk"
20+
},
21+
"count": 1
22+
}
23+
]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[
2+
{
3+
"color": "Black",
4+
"count": 1
5+
},
6+
{
7+
"color": "Blue",
8+
"count": 2
9+
},
10+
{
11+
"color": "Green",
12+
"count": 1
13+
},
14+
{
15+
"color": "Orange",
16+
"count": 1
17+
}
18+
]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[
2+
{
3+
"color": "Black",
4+
"count": 1
5+
},
6+
{
7+
"color": "Blue",
8+
"count": 2
9+
},
10+
{
11+
"color": "Green",
12+
"count": 1
13+
},
14+
{
15+
"color": "Orange",
16+
"count": 1
17+
}
18+
]

document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/FlatPostgresFieldTransformer.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,26 @@ public FieldToPgColumn visit(JsonIdentifierExpression expression) {
5353
BasicPostgresSecurityValidator.getDefault().validateIdentifier(expression.getColumnName());
5454
BasicPostgresSecurityValidator.getDefault().validateJsonPath(expression.getJsonPath());
5555

56-
String nestedPath = String.join(".", expression.getJsonPath());
57-
return new FieldToPgColumn(
58-
nestedPath, PostgresUtils.wrapFieldNamesWithDoubleQuotes(expression.getColumnName()));
56+
String fieldName = expression.getName();
57+
58+
FieldToPgColumn transformedCol;
59+
60+
// Check if this JSONB field has been unnested (e.g., "props.colors" -> "props_colors_encoded")
61+
// If unnested, return the direct column reference to the unnested alias
62+
if (pgColMapping.containsKey(fieldName)) {
63+
String unnestColumnName = pgColMapping.get(fieldName);
64+
BasicPostgresSecurityValidator.getDefault().validateIdentifier(unnestColumnName);
65+
// Return direct column access (no JSONB path) for unnested fields
66+
transformedCol =
67+
new FieldToPgColumn(null, PostgresUtils.wrapFieldNamesWithDoubleQuotes(unnestColumnName));
68+
} else {
69+
// Not unnested - use normal JSONB accessor
70+
String nestedPath = String.join(".", expression.getJsonPath());
71+
transformedCol =
72+
new FieldToPgColumn(
73+
nestedPath, PostgresUtils.wrapFieldNamesWithDoubleQuotes(expression.getColumnName()));
74+
}
75+
return transformedCol;
5976
}
6077

6178
@Override

0 commit comments

Comments
 (0)