Skip to content

Commit 46f2234

Browse files
authored
GenericDialect: support colon operator for JsonAccess (apache#2124)
1 parent c4d9e39 commit 46f2234

File tree

6 files changed

+174
-116
lines changed

6 files changed

+174
-116
lines changed

src/dialect/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,13 @@ pub trait Dialect: Debug + Any {
759759
Token::DoubleColon | Token::ExclamationMark | Token::LBracket | Token::CaretAt => {
760760
Ok(p!(DoubleColon))
761761
}
762+
Token::Colon => match parser.peek_nth_token(1).token {
763+
// When colon is followed by a string or a number, it's usually in MAP syntax.
764+
Token::SingleQuotedString(_) | Token::Number(_, _) => Ok(self.prec_unknown()),
765+
// In other cases, it's used in semi-structured data traversal like in variant or JSON
766+
// string columns. See `JsonAccess`.
767+
_ => Ok(p!(Colon)),
768+
},
762769
Token::Arrow
763770
| Token::LongArrow
764771
| Token::HashArrow
@@ -812,6 +819,7 @@ pub trait Dialect: Debug + Any {
812819
Precedence::Ampersand => 23,
813820
Precedence::Caret => 22,
814821
Precedence::Pipe => 21,
822+
Precedence::Colon => 21,
815823
Precedence::Between => 20,
816824
Precedence::Eq => 20,
817825
Precedence::Like => 19,
@@ -1269,6 +1277,8 @@ pub enum Precedence {
12691277
Caret,
12701278
/// Bitwise `OR` / pipe operator (`|`).
12711279
Pipe,
1280+
/// `:` operator for json/variant access.
1281+
Colon,
12721282
/// `BETWEEN` operator.
12731283
Between,
12741284
/// Equality operator (`=`).

src/dialect/mssql.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,15 @@ impl Dialect for MsSqlDialect {
160160
None
161161
}
162162
}
163+
164+
fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
165+
let token = parser.peek_token();
166+
match token.token {
167+
// lowest prec to prevent it from turning into a binary op
168+
Token::Colon => Some(Ok(self.prec_unknown())),
169+
_ => None,
170+
}
171+
}
163172
}
164173

165174
impl MsSqlDialect {

src/dialect/postgresql.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ impl Dialect for PostgreSqlDialect {
136136
| Token::ShiftRight
137137
| Token::ShiftLeft
138138
| Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)),
139+
// lowest prec to prevent it from turning into a binary op
140+
Token::Colon => Some(Ok(self.prec_unknown())),
139141
_ => None,
140142
}
141143
}
@@ -159,6 +161,7 @@ impl Dialect for PostgreSqlDialect {
159161
Precedence::Ampersand => PG_OTHER_PREC,
160162
Precedence::Caret => CARET_PREC,
161163
Precedence::Pipe => PG_OTHER_PREC,
164+
Precedence::Colon => PG_OTHER_PREC,
162165
Precedence::Between => BETWEEN_LIKE_PREC,
163166
Precedence::Eq => EQ_PREC,
164167
Precedence::Like => BETWEEN_LIKE_PREC,

src/parser/mod.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3918,7 +3918,7 @@ impl<'a> Parser<'a> {
39183918
expr: Box::new(expr),
39193919
})
39203920
} else if Token::LBracket == *tok && self.dialect.supports_partiql()
3921-
|| (dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == *tok)
3921+
|| (Token::Colon == *tok)
39223922
{
39233923
self.prev_token();
39243924
self.parse_json_access(expr)
@@ -3954,7 +3954,8 @@ impl<'a> Parser<'a> {
39543954
let lower_bound = if self.consume_token(&Token::Colon) {
39553955
None
39563956
} else {
3957-
Some(self.parse_expr()?)
3957+
// parse expr until we hit a colon (or any token with lower precedence)
3958+
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
39583959
};
39593960

39603961
// check for end
@@ -3982,7 +3983,8 @@ impl<'a> Parser<'a> {
39823983
stride: None,
39833984
});
39843985
} else {
3985-
Some(self.parse_expr()?)
3986+
// parse expr until we hit a colon (or any token with lower precedence)
3987+
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
39863988
};
39873989

39883990
// check for end

tests/sqlparser_common.rs

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18067,3 +18067,148 @@ fn test_binary_kw_as_cast() {
1806718067
all_dialects_where(|d| d.supports_binary_kw_as_cast())
1806818068
.one_statement_parses_to("SELECT BINARY 1+1", "SELECT CAST(1 + 1 AS BINARY)");
1806918069
}
18070+
18071+
#[test]
18072+
fn parse_semi_structured_data_traversal() {
18073+
let dialects = TestedDialects::new(vec![
18074+
Box::new(GenericDialect {}),
18075+
Box::new(SnowflakeDialect {}),
18076+
Box::new(DatabricksDialect {}),
18077+
]);
18078+
18079+
// most basic case
18080+
let sql = "SELECT a:b FROM t";
18081+
let select = dialects.verified_only_select(sql);
18082+
assert_eq!(
18083+
SelectItem::UnnamedExpr(Expr::JsonAccess {
18084+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18085+
path: JsonPath {
18086+
path: vec![JsonPathElem::Dot {
18087+
key: "b".to_owned(),
18088+
quoted: false
18089+
}]
18090+
},
18091+
}),
18092+
select.projection[0]
18093+
);
18094+
18095+
// identifier can be quoted
18096+
let sql = r#"SELECT a:"my long object key name" FROM t"#;
18097+
let select = dialects.verified_only_select(sql);
18098+
assert_eq!(
18099+
SelectItem::UnnamedExpr(Expr::JsonAccess {
18100+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18101+
path: JsonPath {
18102+
path: vec![JsonPathElem::Dot {
18103+
key: "my long object key name".to_owned(),
18104+
quoted: true
18105+
}]
18106+
},
18107+
}),
18108+
select.projection[0]
18109+
);
18110+
18111+
dialects.verified_stmt("SELECT a:b::INT FROM t");
18112+
18113+
// unquoted keywords are permitted in the object key
18114+
let sql = "SELECT a:select, a:from FROM t";
18115+
let select = dialects.verified_only_select(sql);
18116+
assert_eq!(
18117+
vec![
18118+
SelectItem::UnnamedExpr(Expr::JsonAccess {
18119+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18120+
path: JsonPath {
18121+
path: vec![JsonPathElem::Dot {
18122+
key: "select".to_owned(),
18123+
quoted: false
18124+
}]
18125+
},
18126+
}),
18127+
SelectItem::UnnamedExpr(Expr::JsonAccess {
18128+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18129+
path: JsonPath {
18130+
path: vec![JsonPathElem::Dot {
18131+
key: "from".to_owned(),
18132+
quoted: false
18133+
}]
18134+
},
18135+
})
18136+
],
18137+
select.projection
18138+
);
18139+
18140+
// multiple levels can be traversed
18141+
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
18142+
let sql = r#"SELECT a:foo."bar".baz"#;
18143+
let select = dialects.verified_only_select(sql);
18144+
assert_eq!(
18145+
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
18146+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18147+
path: JsonPath {
18148+
path: vec![
18149+
JsonPathElem::Dot {
18150+
key: "foo".to_owned(),
18151+
quoted: false,
18152+
},
18153+
JsonPathElem::Dot {
18154+
key: "bar".to_owned(),
18155+
quoted: true,
18156+
},
18157+
JsonPathElem::Dot {
18158+
key: "baz".to_owned(),
18159+
quoted: false,
18160+
}
18161+
]
18162+
},
18163+
})],
18164+
select.projection
18165+
);
18166+
18167+
// dot and bracket notation can be mixed (starting with : case)
18168+
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
18169+
let sql = r#"SELECT a:foo[0].bar"#;
18170+
let select = dialects.verified_only_select(sql);
18171+
assert_eq!(
18172+
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
18173+
value: Box::new(Expr::Identifier(Ident::new("a"))),
18174+
path: JsonPath {
18175+
path: vec![
18176+
JsonPathElem::Dot {
18177+
key: "foo".to_owned(),
18178+
quoted: false,
18179+
},
18180+
JsonPathElem::Bracket {
18181+
key: Expr::value(number("0")),
18182+
},
18183+
JsonPathElem::Dot {
18184+
key: "bar".to_owned(),
18185+
quoted: false,
18186+
}
18187+
]
18188+
},
18189+
})],
18190+
select.projection
18191+
);
18192+
}
18193+
18194+
#[test]
18195+
fn parse_array_subscript() {
18196+
let dialects = all_dialects_except(|d| {
18197+
d.is::<MsSqlDialect>()
18198+
|| d.is::<SnowflakeDialect>()
18199+
|| d.is::<SQLiteDialect>()
18200+
|| d.is::<RedshiftSqlDialect>()
18201+
});
18202+
18203+
dialects.verified_stmt("SELECT arr[1]");
18204+
dialects.verified_stmt("SELECT arr[:]");
18205+
dialects.verified_stmt("SELECT arr[1:2]");
18206+
dialects.verified_stmt("SELECT arr[1:2:4]");
18207+
dialects.verified_stmt("SELECT arr[1:array_length(arr)]");
18208+
dialects.verified_stmt("SELECT arr[array_length(arr) - 1:array_length(arr)]");
18209+
dialects
18210+
.verified_stmt("SELECT arr[array_length(arr) - 2:array_length(arr) - 1:array_length(arr)]");
18211+
18212+
dialects.verified_stmt("SELECT arr[1][2]");
18213+
dialects.verified_stmt("SELECT arr[:][:]");
18214+
}

tests/sqlparser_snowflake.rs

Lines changed: 2 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -1265,37 +1265,8 @@ fn parse_lateral_flatten() {
12651265
// https://docs.snowflake.com/en/user-guide/querying-semistructured
12661266
#[test]
12671267
fn parse_semi_structured_data_traversal() {
1268-
// most basic case
1269-
let sql = "SELECT a:b FROM t";
1270-
let select = snowflake().verified_only_select(sql);
1271-
assert_eq!(
1272-
SelectItem::UnnamedExpr(Expr::JsonAccess {
1273-
value: Box::new(Expr::Identifier(Ident::new("a"))),
1274-
path: JsonPath {
1275-
path: vec![JsonPathElem::Dot {
1276-
key: "b".to_owned(),
1277-
quoted: false
1278-
}]
1279-
},
1280-
}),
1281-
select.projection[0]
1282-
);
1283-
1284-
// identifier can be quoted
1285-
let sql = r#"SELECT a:"my long object key name" FROM t"#;
1286-
let select = snowflake().verified_only_select(sql);
1287-
assert_eq!(
1288-
SelectItem::UnnamedExpr(Expr::JsonAccess {
1289-
value: Box::new(Expr::Identifier(Ident::new("a"))),
1290-
path: JsonPath {
1291-
path: vec![JsonPathElem::Dot {
1292-
key: "my long object key name".to_owned(),
1293-
quoted: true
1294-
}]
1295-
},
1296-
}),
1297-
select.projection[0]
1298-
);
1268+
// see `tests/sqlparser_common.rs` -> `parse_semi_structured_data_traversal` for more test
1269+
// cases. This test only has Snowflake-specific syntax like array access.
12991270

13001271
// expressions are allowed in bracket notation
13011272
let sql = r#"SELECT a[2 + 2] FROM t"#;
@@ -1316,88 +1287,6 @@ fn parse_semi_structured_data_traversal() {
13161287
select.projection[0]
13171288
);
13181289

1319-
snowflake().verified_stmt("SELECT a:b::INT FROM t");
1320-
1321-
// unquoted keywords are permitted in the object key
1322-
let sql = "SELECT a:select, a:from FROM t";
1323-
let select = snowflake().verified_only_select(sql);
1324-
assert_eq!(
1325-
vec![
1326-
SelectItem::UnnamedExpr(Expr::JsonAccess {
1327-
value: Box::new(Expr::Identifier(Ident::new("a"))),
1328-
path: JsonPath {
1329-
path: vec![JsonPathElem::Dot {
1330-
key: "select".to_owned(),
1331-
quoted: false
1332-
}]
1333-
},
1334-
}),
1335-
SelectItem::UnnamedExpr(Expr::JsonAccess {
1336-
value: Box::new(Expr::Identifier(Ident::new("a"))),
1337-
path: JsonPath {
1338-
path: vec![JsonPathElem::Dot {
1339-
key: "from".to_owned(),
1340-
quoted: false
1341-
}]
1342-
},
1343-
})
1344-
],
1345-
select.projection
1346-
);
1347-
1348-
// multiple levels can be traversed
1349-
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
1350-
let sql = r#"SELECT a:foo."bar".baz"#;
1351-
let select = snowflake().verified_only_select(sql);
1352-
assert_eq!(
1353-
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
1354-
value: Box::new(Expr::Identifier(Ident::new("a"))),
1355-
path: JsonPath {
1356-
path: vec![
1357-
JsonPathElem::Dot {
1358-
key: "foo".to_owned(),
1359-
quoted: false,
1360-
},
1361-
JsonPathElem::Dot {
1362-
key: "bar".to_owned(),
1363-
quoted: true,
1364-
},
1365-
JsonPathElem::Dot {
1366-
key: "baz".to_owned(),
1367-
quoted: false,
1368-
}
1369-
]
1370-
},
1371-
})],
1372-
select.projection
1373-
);
1374-
1375-
// dot and bracket notation can be mixed (starting with : case)
1376-
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
1377-
let sql = r#"SELECT a:foo[0].bar"#;
1378-
let select = snowflake().verified_only_select(sql);
1379-
assert_eq!(
1380-
vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
1381-
value: Box::new(Expr::Identifier(Ident::new("a"))),
1382-
path: JsonPath {
1383-
path: vec![
1384-
JsonPathElem::Dot {
1385-
key: "foo".to_owned(),
1386-
quoted: false,
1387-
},
1388-
JsonPathElem::Bracket {
1389-
key: Expr::value(number("0")),
1390-
},
1391-
JsonPathElem::Dot {
1392-
key: "bar".to_owned(),
1393-
quoted: false,
1394-
}
1395-
]
1396-
},
1397-
})],
1398-
select.projection
1399-
);
1400-
14011290
// dot and bracket notation can be mixed (starting with bracket case)
14021291
// https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
14031292
let sql = r#"SELECT a[0].foo.bar"#;

0 commit comments

Comments
 (0)