Skip to content

Commit 35ed36c

Browse files
tbvjaos510peter
andauthored
fix(traverse): fix panic when truncating non-ASCII variable names (#16265)
## Summary Fix panic when `get_var_name_from_node` processes non-ASCII variable names (Korean, CJK, Greek, etc.). ## Problem When transpiling code with non-ASCII variable names like Korean: ```typescript const [서비스이용약관_query, 개인정보수집동의_query] = useQueries({...}); ``` The following panic occurs: `thread 'tokio-runtime-worker' panicked at oxc_traverse/src/ast_operations/gather_node_parts.rs:26:14: assertion failed: self.is_char_boundary(new_len)` ## Cause get_var_name_from_node was using byte-based truncate(20): name.truncate(20); // Cuts at byte 20, not character 20 Korean characters are 3 bytes in UTF-8, so "서비스이용약관" (7 chars = 21 bytes) gets cut in the middle of '관', causing the panic. ## Solution Changed to character-based truncation to match https://github.com/babel/babel/blob/419644f27c5c59deb19e71aaabd417a3bc5483ca/packages/babel-traverse/src/scope/index.ts#L210: ```typescript if name.len() > 20 { name = name.chars().take(20).collect(); } ``` The len() > 20 check avoids unnecessary allocation when string is already short enough (20 bytes guarantees ≤20 chars). ## Test Plan Added tests for: - 2-byte UTF-8 (Greek letters) - 3-byte UTF-8 (Korean characters) - 4-byte UTF-8 - Mixed ASCII + multi-byte Co-authored-by: peter <[email protected]>
1 parent 02bdf90 commit 35ed36c

File tree

1 file changed

+80
-2
lines changed

1 file changed

+80
-2
lines changed

crates/oxc_traverse/src/ast_operations/gather_node_parts.rs

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,23 @@ pub fn get_var_name_from_node<'a, N: GatherNodeParts<'a>>(node: &N) -> String {
2222

2323
if name.is_empty() {
2424
name = "ref".to_string();
25-
} else {
26-
name.truncate(20);
25+
} else if name.len() > 20 {
26+
// Truncate to ~20 bytes, respecting UTF-8 char boundaries.
27+
// This diverges slightly from Babel (which limits to 20 chars),
28+
// but the goal is just to avoid overly long variable names.
29+
let bytes = name.as_bytes();
30+
if bytes[19] < 0x80 {
31+
// 20th byte is ASCII, safe to truncate here
32+
name.truncate(20);
33+
} else {
34+
// Byte 19 may be in the middle of a multi-byte char.
35+
// Walk back to find the start byte (continuation bytes are 10xxxxxx).
36+
let mut truncate_at = 19;
37+
while truncate_at > 0 && (bytes[truncate_at] & 0xC0) == 0x80 {
38+
truncate_at -= 1;
39+
}
40+
name.truncate(truncate_at);
41+
}
2742
}
2843

2944
to_identifier(name)
@@ -563,3 +578,66 @@ impl<'a> GatherNodeParts<'a> for JSXIdentifier<'a> {
563578
f(self.name.as_str());
564579
}
565580
}
581+
582+
#[cfg(test)]
583+
mod tests {
584+
use super::*;
585+
586+
/// Test wrapper that implements GatherNodeParts for testing purposes
587+
struct TestNode<'a>(&'a str);
588+
589+
impl<'a> GatherNodeParts<'a> for TestNode<'a> {
590+
fn gather<F: FnMut(&str)>(&self, f: &mut F) {
591+
f(self.0);
592+
}
593+
}
594+
595+
#[test]
596+
fn test_get_var_name_truncation_limits_to_approximately_20_bytes() {
597+
// ASCII: 20 bytes - no truncation
598+
let node = TestNode("abcdefghijklmnopqrst");
599+
assert_eq!(get_var_name_from_node(&node), "abcdefghijklmnopqrst");
600+
601+
// ASCII: 21 bytes -> 20 bytes
602+
let node = TestNode("abcdefghijklmnopqrstu");
603+
assert_eq!(get_var_name_from_node(&node), "abcdefghijklmnopqrst");
604+
605+
// 2-byte UTF-8 (Greek): 21 chars (42 bytes) -> 9 chars (18 bytes)
606+
let node = TestNode("αβγδεζηθικλμνξοπρστυφ");
607+
assert_eq!(get_var_name_from_node(&node), "αβγδεζηθι");
608+
609+
// 3-byte UTF-8 (Korean): 10 chars (30 bytes) -> 6 chars (18 bytes)
610+
let node = TestNode("가나다라마바사아자차");
611+
assert_eq!(get_var_name_from_node(&node), "가나다라마바");
612+
613+
// 4-byte UTF-8 (CJK Ext B): 6 chars (24 bytes) -> 4 chars (16 bytes)
614+
let node = TestNode("𠀀𠀁𠀂𠀃𠀄𠀅");
615+
assert_eq!(get_var_name_from_node(&node), "𠀀𠀁𠀂𠀃");
616+
617+
// Mixed ASCII + Greek: 21 bytes -> 19 bytes
618+
let node = TestNode("test_αβγδεζηθ");
619+
assert_eq!(get_var_name_from_node(&node), "test_αβγδεζη");
620+
621+
// Short string - no truncation
622+
let node = TestNode("short");
623+
assert_eq!(get_var_name_from_node(&node), "short");
624+
625+
// Exactly 20 bytes - no truncation
626+
let node = TestNode("αβγδεζηθικ");
627+
assert_eq!(get_var_name_from_node(&node), "αβγδεζηθικ");
628+
}
629+
630+
#[test]
631+
fn test_get_var_name_empty_returns_ref() {
632+
let node = TestNode("");
633+
let result = get_var_name_from_node(&node);
634+
assert_eq!(result, "ref");
635+
}
636+
637+
#[test]
638+
fn test_get_var_name_strips_leading_underscores() {
639+
let node = TestNode("___foo");
640+
let result = get_var_name_from_node(&node);
641+
assert_eq!(result, "foo");
642+
}
643+
}

0 commit comments

Comments
 (0)