Skip to content

Commit 84bb2e1

Browse files
authored
Merge pull request #1044 from data-integrations/PLUGIN-1257
[PLUGIN-1257]Fixed issue with deduplicate plugin failing when no filter condition is specified
2 parents 2ab3420 + 79cbfde commit 84bb2e1

File tree

3 files changed

+34
-13
lines changed

3 files changed

+34
-13
lines changed

src/main/java/io/cdap/plugin/gcp/bigquery/sqlengine/builder/BigQueryBaseSQLBuilder.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ public abstract class BigQueryBaseSQLBuilder {
4848
public static final String ORDER_ASC = "ASC";
4949
public static final String SELECT_DEDUPLICATE_STATEMENT = "SELECT * EXCEPT(`%s`) FROM (%s) WHERE `%s` = 1";
5050
public static final String ROW_NUMBER_PARTITION_COLUMN =
51-
"ROW_NUMBER() OVER ( PARTITION BY %s ORDER BY %s ) AS `%s`";
51+
"ROW_NUMBER() OVER ( %s ) AS `%s`";
52+
public static final String PARTITION_BY = "PARTITION BY ";
53+
public static final String ORDER_BY = "ORDER BY ";
5254
public static final String NULLS_LAST = "NULLS LAST";
5355
public static final String IF_FUNCTION = "IF";
5456
public static final String ZERO = "0";

src/main/java/io/cdap/plugin/gcp/bigquery/sqlengine/builder/BigQueryDeduplicateSQLBuilder.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,15 @@ protected String getSelectedFields(DeduplicateAggregationDefinition def) {
9696
*/
9797
@VisibleForTesting
9898
protected String getRowNumColumn(DeduplicateAggregationDefinition def) {
99-
String partitionByFields = getPartitionByFields(def.getGroupByExpressions());
100-
String orderByFields = getOrderByFields(def.getFilterExpressions());
99+
StringBuilder window = new StringBuilder();
100+
// Add partition by clause for windowing
101+
window.append(PARTITION_BY).append(getPartitionByFields(def.getGroupByExpressions()));
102+
// Add ordering clause if specified
103+
if (def.getFilterExpressions() != null && def.getFilterExpressions().size() > 0) {
104+
window.append(SPACE).append(ORDER_BY).append(getOrderByFields(def.getFilterExpressions()));
105+
}
101106
return String.format(ROW_NUMBER_PARTITION_COLUMN,
102-
partitionByFields,
103-
orderByFields,
107+
window.toString(),
104108
rowNumColumnAlias);
105109
}
106110

src/test/java/io/cdap/plugin/gcp/bigquery/sqlengine/builder/BigQueryDeduplicateSQLBuilderTest.java

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,12 @@ public class BigQueryDeduplicateSQLBuilderTest {
3838
private Map<String, Expression> selectFields;
3939
private List<Expression> dedupFields;
4040
private List<DeduplicateAggregationDefinition.FilterExpression> filterFields;
41-
private DeduplicateAggregationDefinition def;
41+
private DeduplicateAggregationDefinition fullDefinition;
42+
private DeduplicateAggregationDefinition onlyDedupFieldsDefinition;
4243

4344
@Before
4445
public void setUp() {
4546
factory = new SQLExpressionFactory();
46-
DeduplicateAggregationDefinition.Builder builder = DeduplicateAggregationDefinition.builder();
4747

4848
// Build aggregation definition
4949
selectFields = new LinkedHashMap<>();
@@ -65,10 +65,17 @@ public void setUp() {
6565
filterFields.add(new DeduplicateAggregationDefinition.FilterExpression(
6666
factory.compile("f"), DeduplicateAggregationDefinition.FilterFunction.MIN));
6767

68-
builder.select(selectFields).dedupOn(dedupFields).filterDuplicatesBy(filterFields);
69-
def = builder.build();
70-
71-
helper = new BigQueryDeduplicateSQLBuilder(def, "select * from tbl", "ds", "the_row_number");
68+
fullDefinition = DeduplicateAggregationDefinition.builder()
69+
.select(selectFields)
70+
.dedupOn(dedupFields)
71+
.filterDuplicatesBy(filterFields)
72+
.build();
73+
onlyDedupFieldsDefinition = DeduplicateAggregationDefinition.builder()
74+
.select(selectFields)
75+
.dedupOn(dedupFields)
76+
.build();
77+
78+
helper = new BigQueryDeduplicateSQLBuilder(fullDefinition, "select * from tbl", "ds", "the_row_number");
7279
}
7380

7481
@Test
@@ -114,13 +121,21 @@ public void testGetSelectedFields() {
114121
+ "f AS f , "
115122
+ "ROW_NUMBER() OVER ( PARTITION BY c , d , e ORDER BY e DESC NULLS LAST , f ASC NULLS LAST ) AS" +
116123
" `the_row_number`",
117-
helper.getSelectedFields(def));
124+
helper.getSelectedFields(fullDefinition));
118125
}
119126

120127
@Test
121128
public void testGetRowNumColumn() {
122129
Assert.assertEquals("ROW_NUMBER() OVER ( PARTITION BY c , d , e ORDER BY e DESC NULLS LAST , " +
123-
"f ASC NULLS LAST ) AS `the_row_number`", helper.getRowNumColumn(def));
130+
"f ASC NULLS LAST ) AS `the_row_number`",
131+
helper.getRowNumColumn(fullDefinition));
132+
}
133+
134+
135+
@Test
136+
public void testGetRowNumColumnWithoutOrderFields() {
137+
Assert.assertEquals("ROW_NUMBER() OVER ( PARTITION BY c , d , e ) AS `the_row_number`",
138+
helper.getRowNumColumn(onlyDedupFieldsDefinition));
124139
}
125140

126141
@Test

0 commit comments

Comments
 (0)