Skip to content

Commit 9bae068

Browse files
def-claude
andcommitted
sql-parser: shrink Expr<Raw> from 240 to 72 bytes via boxing large variants
Two complementary optimizations that shrink the core AST Expr enum: 1. Expr::Function(Function<T>) → Function(Box<Function<T>>). Function<Raw> is 240 bytes and was the single largest variant, inflating the entire enum. Boxing reduces it to 8 bytes inline. 2. Expr::Cast { data_type: T::DataType } → { data_type: Box<T::DataType> }. RawDataType can be up to ~48 bytes; boxing reduces to 8 bytes. Expr<Raw> is the most numerous AST node type — every expression in every SQL query is represented as one. The savings compound because Expr is stored recursively: every Vec<Expr<T>> element and every Box<Expr<T>> allocation saves 168 bytes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b1e200b commit 9bae068

File tree

6 files changed

+48
-46
lines changed

6 files changed

+48
-46
lines changed

src/sql-parser/src/ast/defs/expr.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ pub enum Expr<T: AstInfo> {
108108
/// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))`
109109
Cast {
110110
expr: Box<Expr<T>>,
111-
data_type: T::DataType,
111+
data_type: Box<T::DataType>,
112112
},
113113
/// `expr COLLATE collation`
114114
Collate {
@@ -141,7 +141,7 @@ pub enum Expr<T: AstInfo> {
141141
/// A literal value, such as string, number, date or NULL
142142
Value(Value),
143143
/// Scalar function call e.g. `LEFT(foo, 5)`
144-
Function(Function<T>),
144+
Function(Box<Function<T>>),
145145
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
146146
///
147147
/// Note we only recognize a complete single expression as `<condition>`,
@@ -559,18 +559,18 @@ impl<T: AstInfo> Expr<T> {
559559
pub fn cast(self, data_type: T::DataType) -> Expr<T> {
560560
Expr::Cast {
561561
expr: Box::new(self),
562-
data_type,
562+
data_type: Box::new(data_type),
563563
}
564564
}
565565

566566
pub fn call(name: T::ItemName, args: Vec<Expr<T>>) -> Expr<T> {
567-
Expr::Function(Function {
567+
Expr::Function(Box::new(Function {
568568
name,
569569
args: FunctionArgs::args(args),
570570
filter: None,
571571
over: None,
572572
distinct: false,
573-
})
573+
}))
574574
}
575575

576576
pub fn call_nullary(name: T::ItemName) -> Expr<T> {

src/sql-parser/src/parser.rs

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,7 @@ impl<'a> Parser<'a> {
552552
} else {
553553
Ok(Expr::Cast {
554554
expr: Box::new(Expr::Value(Value::String(parser.parse_literal_string()?))),
555-
data_type,
555+
data_type: Box::new(data_type),
556556
})
557557
}
558558
}));
@@ -564,7 +564,7 @@ impl<'a> Parser<'a> {
564564
(Token::LBracket, _) => {
565565
self.prev_token();
566566
let function = self.parse_named_function()?;
567-
Ok(Expr::Function(function))
567+
Ok(Expr::Function(Box::new(function)))
568568
}
569569
(Token::Keyword(TRUE) | Token::Keyword(FALSE) | Token::Keyword(NULL), _) => {
570570
self.prev_token();
@@ -920,12 +920,12 @@ impl<'a> Parser<'a> {
920920
) {
921921
Ok(Expr::Cast {
922922
expr: Box::new(Expr::Nested(Box::new(expr))),
923-
data_type,
923+
data_type: Box::new(data_type),
924924
})
925925
} else {
926926
Ok(Expr::Cast {
927927
expr: Box::new(expr),
928-
data_type,
928+
data_type: Box::new(data_type),
929929
})
930930
}
931931
}
@@ -970,13 +970,13 @@ impl<'a> Parser<'a> {
970970
self.expect_keyword(FROM)?;
971971
let expr = self.parse_expr()?;
972972
self.expect_token(&Token::RParen)?;
973-
Ok(Expr::Function(Function {
973+
Ok(Expr::Function(Box::new(Function {
974974
name: RawItemName::Name(UnresolvedItemName::unqualified(ident!("extract"))),
975975
args: FunctionArgs::args(vec![Expr::Value(Value::String(field)), expr]),
976976
filter: None,
977977
over: None,
978978
distinct: false,
979-
}))
979+
})))
980980
}
981981

982982
fn parse_row_expr(&mut self) -> Result<Expr<Raw>, ParserError> {
@@ -1033,13 +1033,13 @@ impl<'a> Parser<'a> {
10331033
}
10341034
}
10351035
self.expect_token(&Token::RParen)?;
1036-
Ok(Expr::Function(Function {
1036+
Ok(Expr::Function(Box::new(Function {
10371037
name: RawItemName::Name(UnresolvedItemName::unqualified(name)),
10381038
args: FunctionArgs::args(exprs),
10391039
filter: None,
10401040
over: None,
10411041
distinct: false,
1042-
}))
1042+
})))
10431043
}
10441044

10451045
// Parse calls to position(), which has the special form position('string' in 'string').
@@ -1051,13 +1051,13 @@ impl<'a> Parser<'a> {
10511051
self.expect_token(&Token::Keyword(IN))?;
10521052
let haystack = self.parse_expr()?;
10531053
self.expect_token(&Token::RParen)?;
1054-
Ok(Expr::Function(Function {
1054+
Ok(Expr::Function(Box::new(Function {
10551055
name: RawItemName::Name(UnresolvedItemName::unqualified(ident!("position"))),
10561056
args: FunctionArgs::args(vec![needle, haystack]),
10571057
filter: None,
10581058
over: None,
10591059
distinct: false,
1060-
}))
1060+
})))
10611061
}
10621062

10631063
/// Parse calls to normalize(), which can take the form:
@@ -1080,13 +1080,13 @@ impl<'a> Parser<'a> {
10801080
};
10811081

10821082
self.expect_token(&Token::RParen)?;
1083-
Ok(Expr::Function(Function {
1083+
Ok(Expr::Function(Box::new(Function {
10841084
name: RawItemName::Name(UnresolvedItemName::unqualified(ident!("normalize"))),
10851085
args: FunctionArgs::args(args),
10861086
filter: None,
10871087
over: None,
10881088
distinct: false,
1089-
}))
1089+
})))
10901090
}
10911091

10921092
/// Parse an INTERVAL literal.
@@ -1273,15 +1273,15 @@ impl<'a> Parser<'a> {
12731273
}),
12741274
AT => {
12751275
self.expect_keywords(&[TIME, ZONE])?;
1276-
Ok(Expr::Function(Function {
1276+
Ok(Expr::Function(Box::new(Function {
12771277
name: RawItemName::Name(UnresolvedItemName::unqualified(ident!(
12781278
"timezone"
12791279
))),
12801280
args: FunctionArgs::args(vec![self.parse_subexpr(precedence)?, expr]),
12811281
filter: None,
12821282
over: None,
12831283
distinct: false,
1284-
}))
1284+
})))
12851285
}
12861286
COLLATE => Ok(Expr::Collate {
12871287
expr: Box::new(expr),
@@ -1410,13 +1410,13 @@ impl<'a> Parser<'a> {
14101410
}
14111411

14121412
self.expect_token(&Token::RParen)?;
1413-
Ok(Expr::Function(Function {
1413+
Ok(Expr::Function(Box::new(Function {
14141414
name: RawItemName::Name(UnresolvedItemName::unqualified(ident!("substring"))),
14151415
args: FunctionArgs::args(exprs),
14161416
filter: None,
14171417
over: None,
14181418
distinct: false,
1419-
}))
1419+
})))
14201420
}
14211421

14221422
/// Parse an operator reference.
@@ -1568,7 +1568,7 @@ impl<'a> Parser<'a> {
15681568
fn parse_pg_cast(&mut self, expr: Expr<Raw>) -> Result<Expr<Raw>, ParserError> {
15691569
Ok(Expr::Cast {
15701570
expr: Box::new(expr),
1571-
data_type: self.parse_data_type()?,
1571+
data_type: Box::new(self.parse_data_type()?),
15721572
})
15731573
}
15741574

@@ -7407,7 +7407,7 @@ impl<'a> Parser<'a> {
74077407
} else if self.peek_token() == Some(Token::LParen) {
74087408
let function =
74097409
self.parse_function(RawItemName::Name(UnresolvedItemName(id_parts)))?;
7410-
Ok(Expr::Function(function))
7410+
Ok(Expr::Function(Box::new(function)))
74117411
} else {
74127412
Ok(Expr::Identifier(id_parts))
74137413
}

src/sql/src/plan/query.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2493,7 +2493,7 @@ fn plan_select_from_where(
24932493
agg_exprs.push(plan_aggregate_common(ecx, &sql_function)?);
24942494
group_scope
24952495
.items
2496-
.push(ScopeItem::from_expr(Expr::Function(sql_function.clone())));
2496+
.push(ScopeItem::from_expr(Expr::Function(Box::new(sql_function.clone()))));
24972497
}
24982498
if !agg_exprs.is_empty() || !group_key.is_empty() || s.having.is_some() {
24992499
// apply GROUP BY / aggregates
@@ -6361,7 +6361,7 @@ impl<'a> VisitMut<'_, Aug> for AggregateTableFuncVisitor<'a> {
63616361
));
63626362
return;
63636363
}
6364-
table_func = Some(func.clone());
6364+
table_func = Some((**func).clone());
63656365
}
63666366
}
63676367
// Since we will descend into the table func below, don't add its own disallow

src/sql/src/plan/side_effecting_func.rs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -160,20 +160,23 @@ fn extract_sef_call(
160160
}
161161
let [
162162
SelectItem::Expr {
163-
expr:
164-
Expr::Function(Function {
165-
name,
166-
args: FunctionArgs::Args { args, order_by },
167-
filter: None,
168-
over: None,
169-
distinct: false,
170-
}),
163+
expr: Expr::Function(func),
171164
alias: None,
172165
},
173166
] = &projection[..]
174167
else {
175168
return Ok(None);
176169
};
170+
let Function {
171+
name,
172+
args: FunctionArgs::Args { args, order_by },
173+
filter: None,
174+
over: None,
175+
distinct: false,
176+
} = &**func
177+
else {
178+
return Ok(None);
179+
};
177180
if !order_by.is_empty() {
178181
return Ok(None);
179182
}

src/sql/src/plan/transform_ast.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ impl<'a> FuncRewriter<'a> {
125125
"aggregate functions are not supported in functions in FROM".to_string(),
126126
))
127127
}
128-
Expr::Function(Function {
128+
Expr::Function(Box::new(Function {
129129
name,
130130
args: FunctionArgs::Args {
131131
args: vec![expr],
@@ -134,7 +134,7 @@ impl<'a> FuncRewriter<'a> {
134134
filter,
135135
over,
136136
distinct,
137-
})
137+
}))
138138
}
139139

140140
fn plan_avg(

src/sql/src/pure.rs

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2644,7 +2644,7 @@ pub fn purify_create_materialized_view_options(
26442644
.expect("we should be able to resolve mz_now");
26452645
(
26462646
item.id(),
2647-
Expr::Function(Function {
2647+
Expr::Function(Box::new(Function {
26482648
name: ResolvedItemName::Item {
26492649
id: item.id(),
26502650
qualifiers: item.name().qualifiers.clone(),
@@ -2659,7 +2659,7 @@ pub fn purify_create_materialized_view_options(
26592659
filter: None,
26602660
over: None,
26612661
distinct: false,
2662-
}),
2662+
})),
26632663
)
26642664
};
26652665
// Prepare the `mz_timestamp` type.
@@ -2834,22 +2834,21 @@ impl MzNowPurifierVisitor {
28342834
impl VisitMut<'_, Aug> for MzNowPurifierVisitor {
28352835
fn visit_expr_mut(&mut self, expr: &'_ mut Expr<Aug>) {
28362836
match expr {
2837-
Expr::Function(Function {
2838-
name:
2839-
ResolvedItemName::Item {
2840-
full_name: FullItemName { item, .. },
2841-
..
2842-
},
2843-
..
2844-
}) if item == &MZ_NOW_NAME.to_string() => {
2837+
Expr::Function(func)
2838+
if matches!(
2839+
&func.name,
2840+
ResolvedItemName::Item { full_name: FullItemName { item, .. }, .. }
2841+
if item.as_str() == MZ_NOW_NAME
2842+
) =>
2843+
{
28452844
let mz_now = self.mz_now.expect(
28462845
"we should have chosen a timestamp if the expression contains mz_now()",
28472846
);
28482847
// We substitute `mz_now()` with number + a cast to `mz_timestamp`. The cast is to
28492848
// not alter the type of the expression.
28502849
*expr = Expr::Cast {
28512850
expr: Box::new(Expr::Value(Value::Number(mz_now.to_string()))),
2852-
data_type: self.mz_timestamp_type.clone(),
2851+
data_type: Box::new(self.mz_timestamp_type.clone()),
28532852
};
28542853
self.introduced_mz_timestamp = true;
28552854
}

0 commit comments

Comments
 (0)