Skip to content

Commit e9abbe0

Browse files
committed
Address review feedback for AQUMV join exact-match
Fix three issues in aqumv_query_is_exact_match(): - Add groupDistinct comparison (GROUP BY vs GROUP BY DISTINCT) - Add limitOption comparison (LIMIT vs FETCH FIRST WITH TIES) - Clear qp_extra in-place via aqumv_context->qp_extra instead of allocating a local char array; move standard_qp_extra typedef to planner.h so aqumv.c can reference the proper struct type Add test cases 26-28 to verify the new comparisons: - LIMIT vs FETCH FIRST WITH TIES non-match and exact match - GROUP BY DISTINCT vs GROUP BY non-match
1 parent dba1a32 commit e9abbe0

File tree

5 files changed

+237
-18
lines changed

5 files changed

+237
-18
lines changed

src/backend/optimizer/plan/aqumv.c

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,6 +1045,8 @@ aqumv_query_is_exact_match(Query *raw_parse, Query *viewQuery)
10451045
/* Compare GROUP BY, HAVING, ORDER BY, DISTINCT, LIMIT */
10461046
if (!equal(raw_parse->groupClause, viewQuery->groupClause))
10471047
return false;
1048+
if (raw_parse->groupDistinct != viewQuery->groupDistinct)
1049+
return false;
10481050
if (!equal(raw_parse->havingQual, viewQuery->havingQual))
10491051
return false;
10501052
if (!equal(raw_parse->sortClause, viewQuery->sortClause))
@@ -1055,6 +1057,8 @@ aqumv_query_is_exact_match(Query *raw_parse, Query *viewQuery)
10551057
return false;
10561058
if (!equal(raw_parse->limitOffset, viewQuery->limitOffset))
10571059
return false;
1060+
if (raw_parse->limitOption != viewQuery->limitOption)
1061+
return false;
10581062

10591063
/* Compare boolean flags */
10601064
if (raw_parse->hasAggs != viewQuery->hasAggs)
@@ -1285,20 +1289,19 @@ answer_query_using_materialized_views_for_join(PlannerInfo *root, AqumvContext a
12851289
/*
12861290
* Plan the MV scan.
12871291
*
1288-
* We need a clean qp_extra with no groupClause or activeWindows,
1289-
* because the rewritten viewQuery is a simple SELECT from the MV
1290-
* with no GROUP BY, windowing, etc. The standard_qp_callback uses
1291-
* qp_extra->groupClause to compute group_pathkeys, which would fail
1292-
* if it still contained the original query's GROUP BY expressions.
1292+
* Clear qp_extra's groupClause and activeWindows because the
1293+
* rewritten viewQuery is a simple SELECT from the MV with no
1294+
* GROUP BY or windowing. standard_qp_callback would otherwise
1295+
* try to compute group_pathkeys from stale expressions.
12931296
*
1294-
* standard_qp_extra is { List *activeWindows; List *groupClause; },
1295-
* so a zeroed struct of that size works correctly (both fields NIL).
1297+
* Safe: grouping_planner() no longer reads qp_extra after AQUMV.
12961298
*/
12971299
{
1298-
char clean_qp_extra[2 * sizeof(List *)];
1299-
memset(clean_qp_extra, 0, sizeof(clean_qp_extra));
1300-
mv_final_rel = query_planner(subroot, qp_callback, clean_qp_extra);
1300+
standard_qp_extra *qp = (standard_qp_extra *) aqumv_context->qp_extra;
1301+
qp->activeWindows = NIL;
1302+
qp->groupClause = NIL;
13011303
}
1304+
mv_final_rel = query_planner(subroot, qp_callback, aqumv_context->qp_extra);
13021305

13031306
/* Cost-based decision: use MV only if cheaper. */
13041307
if (mv_final_rel->cheapest_total_path->total_cost < current_rel->cheapest_total_path->total_cost)

src/backend/optimizer/plan/planner.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,7 @@ create_upper_paths_hook_type create_upper_paths_hook = NULL;
120120
#define EXPRKIND_TABLEFUNC_LATERAL 12
121121
#define EXPRKIND_WINDOW_BOUND 13
122122

123-
/* Passthrough data for standard_qp_callback */
124-
typedef struct
125-
{
126-
List *activeWindows; /* active windows, if any */
127-
List *groupClause; /* overrides parse->groupClause */
128-
} standard_qp_extra;
123+
/* standard_qp_extra is defined in optimizer/planner.h */
129124

130125
/*
131126
* Data specific to grouping sets

src/include/optimizer/planner.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,11 @@ extern bool optimizer_init;
6666

6767
extern void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
6868

69+
/* Passthrough data for standard_qp_callback */
70+
typedef struct
71+
{
72+
List *activeWindows; /* active windows, if any */
73+
List *groupClause; /* overrides parse->groupClause */
74+
} standard_qp_extra;
75+
6976
#endif /* PLANNER_H */

src/test/regress/expected/matview_data.out

Lines changed: 153 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2037,7 +2037,158 @@ select c.region, o.status, count(*) as cnt, sum(o.amount) as total
20372037
north | delivered | 16 | 16800.00
20382038
(6 rows)
20392039

2040+
-- 26. Non-match: LIMIT vs FETCH FIRST WITH TIES (limitOption differs)
2041+
create materialized view mv_aqj_limit_test as
2042+
select o.order_id, o.amount
2043+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
2044+
where o.status = 'shipped'
2045+
order by o.order_id limit 5;
2046+
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table.
2047+
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
2048+
analyze mv_aqj_limit_test;
2049+
set enable_answer_query_using_materialized_views = on;
2050+
-- Same tables/WHERE/ORDER BY but FETCH FIRST WITH TIES: should NOT match
2051+
explain(costs off)
2052+
select o.order_id, o.amount
2053+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
2054+
where o.status = 'shipped'
2055+
order by o.order_id fetch first 5 rows with ties;
2056+
QUERY PLAN
2057+
---------------------------------------------------------------------------
2058+
Limit
2059+
-> Gather Motion 3:1 (slice1; segments: 3)
2060+
Merge Key: o.order_id
2061+
-> Limit
2062+
-> Sort
2063+
Sort Key: o.order_id
2064+
-> Hash Join
2065+
Hash Cond: (c.customer_id = o.customer_id)
2066+
-> Broadcast Motion 3:3 (slice2; segments: 3)
2067+
-> Seq Scan on aqj_customers c
2068+
-> Hash
2069+
-> Seq Scan on aqj_orders o
2070+
Filter: (status = 'shipped'::text)
2071+
Optimizer: Postgres query optimizer
2072+
(14 rows)
2073+
2074+
-- Identical LIMIT query: should match
2075+
explain(costs off)
2076+
select o.order_id, o.amount
2077+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
2078+
where o.status = 'shipped'
2079+
order by o.order_id limit 5;
2080+
QUERY PLAN
2081+
-------------------------------------------------
2082+
Limit
2083+
-> Gather Motion 3:1 (slice1; segments: 3)
2084+
-> Limit
2085+
-> Seq Scan on mv_aqj_limit_test
2086+
Optimizer: Postgres query optimizer
2087+
(5 rows)
2088+
2089+
-- 27. Match: FETCH FIRST WITH TIES exact match
2090+
create materialized view mv_aqj_with_ties as
2091+
select o.order_id, o.amount
2092+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
2093+
where o.status = 'pending'
2094+
order by o.order_id fetch first 5 rows with ties;
2095+
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table.
2096+
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
2097+
analyze mv_aqj_with_ties;
2098+
set enable_answer_query_using_materialized_views = off;
2099+
explain(costs off)
2100+
select o.order_id, o.amount
2101+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
2102+
where o.status = 'pending'
2103+
order by o.order_id fetch first 5 rows with ties;
2104+
QUERY PLAN
2105+
---------------------------------------------------------------------------
2106+
Limit
2107+
-> Gather Motion 3:1 (slice1; segments: 3)
2108+
Merge Key: o.order_id
2109+
-> Limit
2110+
-> Sort
2111+
Sort Key: o.order_id
2112+
-> Hash Join
2113+
Hash Cond: (c.customer_id = o.customer_id)
2114+
-> Broadcast Motion 3:3 (slice2; segments: 3)
2115+
-> Seq Scan on aqj_customers c
2116+
-> Hash
2117+
-> Seq Scan on aqj_orders o
2118+
Filter: (status = 'pending'::text)
2119+
Optimizer: Postgres query optimizer
2120+
(14 rows)
2121+
2122+
select o.order_id, o.amount
2123+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
2124+
where o.status = 'pending'
2125+
order by o.order_id fetch first 5 rows with ties;
2126+
order_id | amount
2127+
----------+--------
2128+
1 | 10.50
2129+
5 | 52.50
2130+
9 | 94.50
2131+
13 | 136.50
2132+
17 | 178.50
2133+
(5 rows)
2134+
2135+
set enable_answer_query_using_materialized_views = on;
2136+
explain(costs off)
2137+
select o.order_id, o.amount
2138+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
2139+
where o.status = 'pending'
2140+
order by o.order_id fetch first 5 rows with ties;
2141+
QUERY PLAN
2142+
------------------------------------------------
2143+
Limit
2144+
-> Gather Motion 3:1 (slice1; segments: 3)
2145+
-> Limit
2146+
-> Seq Scan on mv_aqj_with_ties
2147+
Optimizer: Postgres query optimizer
2148+
(5 rows)
2149+
2150+
select o.order_id, o.amount
2151+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
2152+
where o.status = 'pending'
2153+
order by o.order_id fetch first 5 rows with ties;
2154+
order_id | amount
2155+
----------+--------
2156+
1 | 10.50
2157+
5 | 52.50
2158+
9 | 94.50
2159+
13 | 136.50
2160+
17 | 178.50
2161+
(5 rows)
2162+
2163+
-- 28. Non-match: GROUP BY vs GROUP BY DISTINCT (groupDistinct differs)
2164+
-- MV mv_aqj_grp_multi uses GROUP BY (groupDistinct=false, registered in catalog)
2165+
-- Query uses GROUP BY DISTINCT — should NOT match
2166+
set enable_answer_query_using_materialized_views = on;
2167+
explain(costs off)
2168+
select c.region, o.status, count(*) as cnt, sum(o.amount) as total
2169+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
2170+
group by distinct c.region, o.status;
2171+
QUERY PLAN
2172+
---------------------------------------------------------------------------------
2173+
Gather Motion 3:1 (slice1; segments: 3)
2174+
-> Finalize HashAggregate
2175+
Group Key: c.region, o.status
2176+
-> Redistribute Motion 3:3 (slice2; segments: 3)
2177+
Hash Key: c.region, o.status
2178+
-> Streaming Partial HashAggregate
2179+
Group Key: c.region, o.status
2180+
-> Hash Join
2181+
Hash Cond: (o.customer_id = c.customer_id)
2182+
-> Seq Scan on aqj_orders o
2183+
-> Hash
2184+
-> Broadcast Motion 3:3 (slice3; segments: 3)
2185+
-> Seq Scan on aqj_customers c
2186+
Optimizer: Postgres query optimizer
2187+
(14 rows)
2188+
20402189
-- Clean up AQUMV join test objects
2190+
drop materialized view mv_aqj_with_ties;
2191+
drop materialized view mv_aqj_limit_test;
20412192
drop materialized view mv_aqj_implicit3;
20422193
drop materialized view mv_aqj_3way_agg;
20432194
drop materialized view mv_aqj_grp_multi;
@@ -2412,10 +2563,10 @@ select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
24122563
-----------+------------
24132564
mv_par2 | u
24142565
mv_par2_1 | u
2415-
mv_par1_1 | i
2566+
mv_par1_2 | i
24162567
mv_par1 | i
24172568
mv_par | i
2418-
mv_par1_2 | i
2569+
mv_par1_1 | i
24192570
(6 rows)
24202571

24212572
abort;

src/test/regress/sql/matview_data.sql

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,70 @@ select c.region, o.status, count(*) as cnt, sum(o.amount) as total
855855
group by c.region, o.status
856856
order by c.region, o.status limit 6;
857857

858+
-- 26. Non-match: LIMIT vs FETCH FIRST WITH TIES (limitOption differs)
859+
create materialized view mv_aqj_limit_test as
860+
select o.order_id, o.amount
861+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
862+
where o.status = 'shipped'
863+
order by o.order_id limit 5;
864+
analyze mv_aqj_limit_test;
865+
866+
set enable_answer_query_using_materialized_views = on;
867+
-- Same tables/WHERE/ORDER BY but FETCH FIRST WITH TIES: should NOT match
868+
explain(costs off)
869+
select o.order_id, o.amount
870+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
871+
where o.status = 'shipped'
872+
order by o.order_id fetch first 5 rows with ties;
873+
-- Identical LIMIT query: should match
874+
explain(costs off)
875+
select o.order_id, o.amount
876+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
877+
where o.status = 'shipped'
878+
order by o.order_id limit 5;
879+
880+
-- 27. Match: FETCH FIRST WITH TIES exact match
881+
create materialized view mv_aqj_with_ties as
882+
select o.order_id, o.amount
883+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
884+
where o.status = 'pending'
885+
order by o.order_id fetch first 5 rows with ties;
886+
analyze mv_aqj_with_ties;
887+
888+
set enable_answer_query_using_materialized_views = off;
889+
explain(costs off)
890+
select o.order_id, o.amount
891+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
892+
where o.status = 'pending'
893+
order by o.order_id fetch first 5 rows with ties;
894+
select o.order_id, o.amount
895+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
896+
where o.status = 'pending'
897+
order by o.order_id fetch first 5 rows with ties;
898+
899+
set enable_answer_query_using_materialized_views = on;
900+
explain(costs off)
901+
select o.order_id, o.amount
902+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
903+
where o.status = 'pending'
904+
order by o.order_id fetch first 5 rows with ties;
905+
select o.order_id, o.amount
906+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
907+
where o.status = 'pending'
908+
order by o.order_id fetch first 5 rows with ties;
909+
910+
-- 28. Non-match: GROUP BY vs GROUP BY DISTINCT (groupDistinct differs)
911+
-- MV mv_aqj_grp_multi uses GROUP BY (groupDistinct=false, registered in catalog)
912+
-- Query uses GROUP BY DISTINCT — should NOT match
913+
set enable_answer_query_using_materialized_views = on;
914+
explain(costs off)
915+
select c.region, o.status, count(*) as cnt, sum(o.amount) as total
916+
from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
917+
group by distinct c.region, o.status;
918+
858919
-- Clean up AQUMV join test objects
920+
drop materialized view mv_aqj_with_ties;
921+
drop materialized view mv_aqj_limit_test;
859922
drop materialized view mv_aqj_implicit3;
860923
drop materialized view mv_aqj_3way_agg;
861924
drop materialized view mv_aqj_grp_multi;

0 commit comments

Comments
 (0)