From 7001a268c0042ffbe478114b7f90bd5b633092b3 Mon Sep 17 00:00:00 2001 From: TwoFinger <> Date: Sun, 5 Oct 2025 10:35:54 +0800 Subject: [PATCH 1/5] add SuccessorML line comments --- README.md | 3 +++ src/Main.smlnj.sml | 1 + src/base/AstAllows.sml | 4 ++++ src/lex-mlb/MLBLexer.sml | 1 + src/lex/Lexer.sml | 20 ++++++++++++---- src/parse-mlb/ParseAnnotations.sml | 19 +++++++++++++++ src/smlfmt.sml | 8 +++++++ src/syntax-highlighting/SyntaxHighlighter.sml | 1 + test/succeed/line-comments.mlb | 5 ++++ test/succeed/successor-ml/line-comments.sml | 23 +++++++++++++++++++ 10 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 test/succeed/line-comments.mlb create mode 100644 test/succeed/successor-ml/line-comments.sml diff --git a/README.md b/README.md index d23c29a..dfaf41d 100644 --- a/README.md +++ b/README.md @@ -143,3 +143,6 @@ for UTF-8 characters within strings. `-allow-sig-withtype [true|false]` (default `false`) controls whether or not SuccessorML `withtype` in signatures syntax is allowed. + +`-allow-line-comments [true|false]` (default `false`) controls whether or not +SuccessorML line comments syntax is allowed. diff --git a/src/Main.smlnj.sml b/src/Main.smlnj.sml index 5bd99f8..220560a 100644 --- a/src/Main.smlnj.sml +++ b/src/Main.smlnj.sml @@ -66,6 +66,7 @@ struct , recordPun = false , orPat = false , extendedText = false + , lineComment = false } diff --git a/src/base/AstAllows.sml b/src/base/AstAllows.sml index 1f5b297..ac66859 100644 --- a/src/base/AstAllows.sml +++ b/src/base/AstAllows.sml @@ -14,6 +14,7 @@ sig , orPat: bool , extendedText: bool , sigWithtype: bool + , lineComment: bool } -> t @@ -23,6 +24,7 @@ sig val orPat: t -> bool val extendedText: t -> bool val sigWithtype: t -> bool + val lineComment: t -> bool end = struct datatype t = @@ -33,6 +35,7 @@ struct , orPat: bool , extendedText: bool , sigWithtype: bool + , lineComment: bool } fun make x = T x @@ -42,4 +45,5 @@ struct fun orPat (T x) = #orPat x fun extendedText (T x) = #extendedText x fun sigWithtype (T x) = #sigWithtype x + fun lineComment (T x) = #lineComment x end diff --git a/src/lex-mlb/MLBLexer.sml b/src/lex-mlb/MLBLexer.sml index ca43aa9..c538e75 100644 --- a/src/lex-mlb/MLBLexer.sml +++ b/src/lex-mlb/MLBLexer.sml @@ -34,6 +34,7 @@ struct , orPat = false , extendedText = false , sigWithtype = false + , lineComment = false } in case Lexer.next smlLexerAllows src of diff --git a/src/lex/Lexer.sml b/src/lex/Lexer.sml index f5ac51f..8d6a8da 100644 --- a/src/lex/Lexer.sml +++ b/src/lex/Lexer.sml @@ -620,7 +620,7 @@ struct *) and loop_afterOpenParen s = if is #"*" at s then - loop_inComment (s + 1) {commentStart = s - 1, nesting = 1} + loop_inComment (s + 1) {commentStart = s - 1, nesting = 1, lastCommentStart = s - 1} else success (mkr Token.OpenParen (s - 1, s)) @@ -628,15 +628,18 @@ struct (** Inside a comment that started at `commentStart` * `nesting` is always >= 0 and indicates how many open-comments we've seen. *) - and loop_inComment s {commentStart, nesting} = + and loop_inComment s {commentStart, nesting, lastCommentStart} = if nesting = 0 then success (mk Token.Comment (commentStart, s)) else if is #"(" at s andalso is #"*" at s + 1 then loop_inComment (s + 2) - {commentStart = commentStart, nesting = nesting + 1} + {commentStart = commentStart, nesting = nesting + 1, lastCommentStart = s} else if is #"*" at s andalso is #")" at s + 1 then loop_inComment (s + 2) - {commentStart = commentStart, nesting = nesting - 1} + {commentStart = commentStart, nesting = nesting - 1, lastCommentStart = lastCommentStart} + else if (AstAllows.lineComment allows) andalso s = lastCommentStart + 2 andalso is #")" at s then + loop_inLineComment (s + 1) + {commentStart = commentStart, nesting = nesting, lastCommentStart = s} else if isEndOfFileAt s then error { pos = slice (commentStart, commentStart + 2) @@ -645,8 +648,15 @@ struct } else loop_inComment (s + 1) - {commentStart = commentStart, nesting = nesting} + {commentStart = commentStart, nesting = nesting, lastCommentStart = lastCommentStart} + and loop_inLineComment s {commentStart, nesting, lastCommentStart} = + if is #"\n" at s then + loop_inComment (s + 1) + {commentStart = commentStart, nesting = nesting - 1, lastCommentStart = lastCommentStart} + else + loop_inLineComment (s + 1) + {commentStart = commentStart, nesting = nesting, lastCommentStart = lastCommentStart} in loop_topLevel startOffset diff --git a/src/parse-mlb/ParseAnnotations.sml b/src/parse-mlb/ParseAnnotations.sml index 01f4074..242068b 100644 --- a/src/parse-mlb/ParseAnnotations.sml +++ b/src/parse-mlb/ParseAnnotations.sml @@ -22,6 +22,7 @@ struct , orPat = b , extendedText = b , sigWithtype = b + , lineComment = b } @@ -33,6 +34,7 @@ struct , orPat = AstAllows.orPat a , extendedText = AstAllows.extendedText a , sigWithtype = AstAllows.sigWithtype a + , lineComment = AstAllows.lineComment a } @@ -44,6 +46,7 @@ struct , orPat = AstAllows.orPat a , extendedText = AstAllows.extendedText a , sigWithtype = AstAllows.sigWithtype a + , lineComment = AstAllows.lineComment a } @@ -55,6 +58,7 @@ struct , orPat = b , extendedText = AstAllows.extendedText a , sigWithtype = AstAllows.sigWithtype a + , lineComment = AstAllows.lineComment a } @@ -66,6 +70,7 @@ struct , orPat = AstAllows.orPat a , extendedText = b , sigWithtype = AstAllows.sigWithtype a + , lineComment = AstAllows.lineComment a } fun allowSigWithtype a b = @@ -76,6 +81,18 @@ struct , orPat = AstAllows.orPat a , extendedText = AstAllows.extendedText a , sigWithtype = b + , lineComment = AstAllows.lineComment a + } + + fun allowLineComments a b = + AstAllows.make + { optBar = AstAllows.optBar a + , topExp = AstAllows.topExp a + , recordPun = AstAllows.recordPun a + , orPat = AstAllows.orPat a + , extendedText = AstAllows.extendedText a + , sigWithtype = AstAllows.sigWithtype a + , lineComment = b } @@ -111,6 +128,8 @@ struct allowExtendedTextConsts allows false | ["allowSigWithtype", "true"] => allowSigWithtype allows true | ["allowSigWithtype", "false"] => allowSigWithtype allows false + | ["allowLineComments", "true"] => allowLineComments allows true + | ["allowLineComments", "false"] => allowLineComments allows false | _ => allows diff --git a/src/smlfmt.sml b/src/smlfmt.sml index 91eac66..bb84f12 100644 --- a/src/smlfmt.sml +++ b/src/smlfmt.sml @@ -79,6 +79,11 @@ val optionalArgDesc = \ Valid options are: true, false\n\ \ (default 'false')\n\ \\n\ + \ [-allow-line-comments B] Enable/disable SuccessorML line comments\n\ + \ syntax.\n\ + \ Valid options are: true, false\n\ + \ (default 'false')\n\ + \\n\ \ [--help] print this message\n" @@ -105,6 +110,8 @@ val allowExtendedText = CommandLineArgs.parseBool "allow-extended-text-consts" allowSuccessorML val allowSigWithtype = CommandLineArgs.parseBool "allow-sig-withtype" allowSuccessorML +val allowLineComments = + CommandLineArgs.parseBool "allow-line-comments" allowSuccessorML val doDebug = CommandLineArgs.parseFlag "debug-engine" val doForce = CommandLineArgs.parseFlag "force" @@ -126,6 +133,7 @@ val allows = AstAllows.make , orPat = allowOrPat , extendedText = allowExtendedText , sigWithtype = allowSigWithtype + , lineComment = allowLineComments } val _ = diff --git a/src/syntax-highlighting/SyntaxHighlighter.sml b/src/syntax-highlighting/SyntaxHighlighter.sml index 77fa07d..a519854 100644 --- a/src/syntax-highlighting/SyntaxHighlighter.sml +++ b/src/syntax-highlighting/SyntaxHighlighter.sml @@ -112,6 +112,7 @@ struct , orPat = true , extendedText = true , sigWithtype = true + , lineComment = true } val startOffset = Source.absoluteStartOffset src diff --git a/test/succeed/line-comments.mlb b/test/succeed/line-comments.mlb new file mode 100644 index 0000000..94e346d --- /dev/null +++ b/test/succeed/line-comments.mlb @@ -0,0 +1,5 @@ +$(SML_LIB)/basis/basis.mlb +ann "allowLineComments true" in + successor-ml/line-comments.sml + star-close-paren-but-not-comment.sml +end diff --git a/test/succeed/successor-ml/line-comments.sml b/test/succeed/successor-ml/line-comments.sml new file mode 100644 index 0000000..8c61bd3 --- /dev/null +++ b/test/succeed/successor-ml/line-comments.sml @@ -0,0 +1,23 @@ +val x = + (*) whole line + case foobar of (*) end of line + + (* + (*) nested + *) + + (* + (*) nested with closing marker *) + *) + + (* with closing ) paren + *) + + (* after two (* nested block comments *) + (*) line comment + *) + + (* in two (* nested block comments + (*) line comment + *) *) + 3 => 2 From 0cee20db2fe5337833b5883ae43fe31d74003080 Mon Sep 17 00:00:00 2001 From: TwoFinger <> Date: Tue, 7 Oct 2025 00:39:17 +0800 Subject: [PATCH 2/5] reformat Lexer.sml --- src/lex/Lexer.sml | 54 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/src/lex/Lexer.sml b/src/lex/Lexer.sml index 8d6a8da..16ab00c 100644 --- a/src/lex/Lexer.sml +++ b/src/lex/Lexer.sml @@ -620,7 +620,8 @@ struct *) and loop_afterOpenParen s = if is #"*" at s then - loop_inComment (s + 1) {commentStart = s - 1, nesting = 1, lastCommentStart = s - 1} + loop_inComment (s + 1) + {commentStart = s - 1, nesting = 1, lastCommentStart = s - 1} else success (mkr Token.OpenParen (s - 1, s)) @@ -629,18 +630,38 @@ struct * `nesting` is always >= 0 and indicates how many open-comments we've seen. *) and loop_inComment s {commentStart, nesting, lastCommentStart} = - if nesting = 0 then + if + nesting = 0 + then success (mk Token.Comment (commentStart, s)) - else if is #"(" at s andalso is #"*" at s + 1 then + else if + is #"(" at s andalso is #"*" at s + 1 + then loop_inComment (s + 2) - {commentStart = commentStart, nesting = nesting + 1, lastCommentStart = s} - else if is #"*" at s andalso is #")" at s + 1 then + { commentStart = commentStart + , nesting = nesting + 1 + , lastCommentStart = s + } + else if + is #"*" at s andalso is #")" at s + 1 + then loop_inComment (s + 2) - {commentStart = commentStart, nesting = nesting - 1, lastCommentStart = lastCommentStart} - else if (AstAllows.lineComment allows) andalso s = lastCommentStart + 2 andalso is #")" at s then + { commentStart = commentStart + , nesting = nesting - 1 + , lastCommentStart = lastCommentStart + } + else if + (AstAllows.lineComment allows) andalso s = lastCommentStart + 2 + andalso is #")" at s + then loop_inLineComment (s + 1) - {commentStart = commentStart, nesting = nesting, lastCommentStart = s} - else if isEndOfFileAt s then + { commentStart = commentStart + , nesting = nesting + , lastCommentStart = s + } + else if + isEndOfFileAt s + then error { pos = slice (commentStart, commentStart + 2) , what = "Unclosed comment." @@ -648,15 +669,24 @@ struct } else loop_inComment (s + 1) - {commentStart = commentStart, nesting = nesting, lastCommentStart = lastCommentStart} + { commentStart = commentStart + , nesting = nesting + , lastCommentStart = lastCommentStart + } and loop_inLineComment s {commentStart, nesting, lastCommentStart} = if is #"\n" at s then loop_inComment (s + 1) - {commentStart = commentStart, nesting = nesting - 1, lastCommentStart = lastCommentStart} + { commentStart = commentStart + , nesting = nesting - 1 + , lastCommentStart = lastCommentStart + } else loop_inLineComment (s + 1) - {commentStart = commentStart, nesting = nesting, lastCommentStart = lastCommentStart} + { commentStart = commentStart + , nesting = nesting + , lastCommentStart = lastCommentStart + } in loop_topLevel startOffset From b58fc1ea02e20703ff6a380e00da43c5c2dd0148 Mon Sep 17 00:00:00 2001 From: TwoFinger <> Date: Tue, 7 Oct 2025 14:56:52 +0800 Subject: [PATCH 3/5] Revert "reformat Lexer.sml" This reverts commit 0cee20db2fe5337833b5883ae43fe31d74003080. --- src/lex/Lexer.sml | 54 +++++++++++------------------------------------ 1 file changed, 12 insertions(+), 42 deletions(-) diff --git a/src/lex/Lexer.sml b/src/lex/Lexer.sml index 16ab00c..8d6a8da 100644 --- a/src/lex/Lexer.sml +++ b/src/lex/Lexer.sml @@ -620,8 +620,7 @@ struct *) and loop_afterOpenParen s = if is #"*" at s then - loop_inComment (s + 1) - {commentStart = s - 1, nesting = 1, lastCommentStart = s - 1} + loop_inComment (s + 1) {commentStart = s - 1, nesting = 1, lastCommentStart = s - 1} else success (mkr Token.OpenParen (s - 1, s)) @@ -630,38 +629,18 @@ struct * `nesting` is always >= 0 and indicates how many open-comments we've seen. *) and loop_inComment s {commentStart, nesting, lastCommentStart} = - if - nesting = 0 - then + if nesting = 0 then success (mk Token.Comment (commentStart, s)) - else if - is #"(" at s andalso is #"*" at s + 1 - then + else if is #"(" at s andalso is #"*" at s + 1 then loop_inComment (s + 2) - { commentStart = commentStart - , nesting = nesting + 1 - , lastCommentStart = s - } - else if - is #"*" at s andalso is #")" at s + 1 - then + {commentStart = commentStart, nesting = nesting + 1, lastCommentStart = s} + else if is #"*" at s andalso is #")" at s + 1 then loop_inComment (s + 2) - { commentStart = commentStart - , nesting = nesting - 1 - , lastCommentStart = lastCommentStart - } - else if - (AstAllows.lineComment allows) andalso s = lastCommentStart + 2 - andalso is #")" at s - then + {commentStart = commentStart, nesting = nesting - 1, lastCommentStart = lastCommentStart} + else if (AstAllows.lineComment allows) andalso s = lastCommentStart + 2 andalso is #")" at s then loop_inLineComment (s + 1) - { commentStart = commentStart - , nesting = nesting - , lastCommentStart = s - } - else if - isEndOfFileAt s - then + {commentStart = commentStart, nesting = nesting, lastCommentStart = s} + else if isEndOfFileAt s then error { pos = slice (commentStart, commentStart + 2) , what = "Unclosed comment." @@ -669,24 +648,15 @@ struct } else loop_inComment (s + 1) - { commentStart = commentStart - , nesting = nesting - , lastCommentStart = lastCommentStart - } + {commentStart = commentStart, nesting = nesting, lastCommentStart = lastCommentStart} and loop_inLineComment s {commentStart, nesting, lastCommentStart} = if is #"\n" at s then loop_inComment (s + 1) - { commentStart = commentStart - , nesting = nesting - 1 - , lastCommentStart = lastCommentStart - } + {commentStart = commentStart, nesting = nesting - 1, lastCommentStart = lastCommentStart} else loop_inLineComment (s + 1) - { commentStart = commentStart - , nesting = nesting - , lastCommentStart = lastCommentStart - } + {commentStart = commentStart, nesting = nesting, lastCommentStart = lastCommentStart} in loop_topLevel startOffset From ecd88cbe5d5f5174a53797712fe6e911899a72c0 Mon Sep 17 00:00:00 2001 From: TwoFinger <> Date: Tue, 7 Oct 2025 15:09:40 +0800 Subject: [PATCH 4/5] rewrite without lastCommentStart --- src/lex/Lexer.sml | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/lex/Lexer.sml b/src/lex/Lexer.sml index 8d6a8da..0f51be0 100644 --- a/src/lex/Lexer.sml +++ b/src/lex/Lexer.sml @@ -620,7 +620,7 @@ struct *) and loop_afterOpenParen s = if is #"*" at s then - loop_inComment (s + 1) {commentStart = s - 1, nesting = 1, lastCommentStart = s - 1} + loop_inComment (s + 1) {commentStart = s - 1, nesting = 1} else success (mkr Token.OpenParen (s - 1, s)) @@ -628,18 +628,24 @@ struct (** Inside a comment that started at `commentStart` * `nesting` is always >= 0 and indicates how many open-comments we've seen. *) - and loop_inComment s {commentStart, nesting, lastCommentStart} = + and loop_inComment s {commentStart, nesting} = if nesting = 0 then success (mk Token.Comment (commentStart, s)) else if is #"(" at s andalso is #"*" at s + 1 then - loop_inComment (s + 2) - {commentStart = commentStart, nesting = nesting + 1, lastCommentStart = s} + if (AstAllows.lineComment allows) andalso is #")" at s + 2 then + (* Nested line comment *) + loop_inLineComment (s + 3) + {commentStart = commentStart, nesting = nesting + 1} + else + loop_inComment (s + 2) + {commentStart = commentStart, nesting = nesting + 1} else if is #"*" at s andalso is #")" at s + 1 then loop_inComment (s + 2) - {commentStart = commentStart, nesting = nesting - 1, lastCommentStart = lastCommentStart} - else if (AstAllows.lineComment allows) andalso s = lastCommentStart + 2 andalso is #")" at s then + {commentStart = commentStart, nesting = nesting - 1} + else if (AstAllows.lineComment allows) andalso is #")" at commentStart + 2 then + (* Top-level line comment *) loop_inLineComment (s + 1) - {commentStart = commentStart, nesting = nesting, lastCommentStart = s} + {commentStart = commentStart, nesting = nesting} else if isEndOfFileAt s then error { pos = slice (commentStart, commentStart + 2) @@ -648,15 +654,15 @@ struct } else loop_inComment (s + 1) - {commentStart = commentStart, nesting = nesting, lastCommentStart = lastCommentStart} + {commentStart = commentStart, nesting = nesting} - and loop_inLineComment s {commentStart, nesting, lastCommentStart} = + and loop_inLineComment s {commentStart, nesting} = if is #"\n" at s then loop_inComment (s + 1) - {commentStart = commentStart, nesting = nesting - 1, lastCommentStart = lastCommentStart} + {commentStart = commentStart, nesting = nesting - 1} else loop_inLineComment (s + 1) - {commentStart = commentStart, nesting = nesting, lastCommentStart = lastCommentStart} + {commentStart = commentStart, nesting = nesting} in loop_topLevel startOffset From d0cc5e03a63785fc2148c4bcbf644c9e08da1719 Mon Sep 17 00:00:00 2001 From: TwoFinger <> Date: Tue, 7 Oct 2025 21:59:54 +0800 Subject: [PATCH 5/5] fix the excessive indent after a line comment --- src/lex/Lexer.sml | 2 +- test/succeed/successor-ml/line-comments.sml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lex/Lexer.sml b/src/lex/Lexer.sml index 0f51be0..b7111e1 100644 --- a/src/lex/Lexer.sml +++ b/src/lex/Lexer.sml @@ -658,7 +658,7 @@ struct and loop_inLineComment s {commentStart, nesting} = if is #"\n" at s then - loop_inComment (s + 1) + loop_inComment (s) {commentStart = commentStart, nesting = nesting - 1} else loop_inLineComment (s + 1) diff --git a/test/succeed/successor-ml/line-comments.sml b/test/succeed/successor-ml/line-comments.sml index 8c61bd3..594d50e 100644 --- a/test/succeed/successor-ml/line-comments.sml +++ b/test/succeed/successor-ml/line-comments.sml @@ -1,7 +1,7 @@ val x = (*) whole line - case foobar of (*) end of line - + case foobar of (*) end of line + (* (*) nested *)