Skip to content

Commit 2274f1b

Browse files
allcreKaanOzkanamomchilov
authored
Add RBS Prism Infrastructure Stubs and Parser API Improvements (sorbet#9587)
* Add RBS with Prism infrastructure stubs Co-authored-by: Kaan Ozkan <[email protected]> * Improve RBS rewrite infrastructure * Continue supporting RBS and Prism * Add `Prism::Parser.prettyPrint()` Co-authored-by: Alexander Momchilov <[email protected]> * Enable printing Prism RBS rewrite tree Co-authored-by: Alexander Momchilov <[email protected]> * Stop prism parser after RBS rewrite * Check is RBS enabled within runRBSRewrite * Move [[maybe_unused]] into cc files * Move runPrismRBSRewrite into anonymous namespace * Remove RBS_REWRITER phase * No-op runPrismRBSRewrite within runPrismParser --------- Co-authored-by: Kaan Ozkan <[email protected]> Co-authored-by: Alexander Momchilov <[email protected]>
1 parent 9659de9 commit 2274f1b

File tree

11 files changed

+259
-40
lines changed

11 files changed

+259
-40
lines changed

main/options/options.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ struct Printers {
9898
enum class Phase {
9999
INIT,
100100
PARSER,
101-
RBS_REWRITER,
102101
DESUGARER,
103102
REWRITER,
104103
LOCAL_VARS,

main/pipeline/pipeline.cc

Lines changed: 88 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@
4747
#include "rbs/AssertionsRewriter.h"
4848
#include "rbs/CommentsAssociator.h"
4949
#include "rbs/SigsRewriter.h"
50+
#include "rbs/prism/AssertionsRewriterPrism.h"
51+
#include "rbs/prism/CommentsAssociatorPrism.h"
52+
#include "rbs/prism/SigsRewriterPrism.h"
5053
#include "resolver/resolver.h"
5154
#include "rewriter/rewriter.h"
5255

@@ -203,6 +206,10 @@ core::StrictLevel decideStrictLevel(const core::GlobalState &gs, const core::Fil
203206

204207
namespace {
205208

209+
pm_node_t *runPrismRBSRewrite(core::GlobalState &gs, core::FileRef file, pm_node_t *node,
210+
const vector<core::LocOffsets> &commentLocations, const options::Printers &print,
211+
core::MutableContext &ctx, const parser::Prism::Parser &parser);
212+
206213
ast::ExpressionPtr fetchTreeFromCache(core::GlobalState &gs, core::FileRef fref, core::File &file,
207214
const unique_ptr<const OwnedKeyValueStore> &kvstore) {
208215
if (kvstore == nullptr) {
@@ -252,6 +259,59 @@ parser::ParseResult runParser(core::GlobalState &gs, core::FileRef file, const o
252259
return result;
253260
}
254261

262+
parser::ParseResult runPrismParser(core::GlobalState &gs, core::FileRef file, const options::Printers &print,
263+
const options::Options &opts, bool preserveConcreteSyntax = false) {
264+
Timer timeit(gs.tracer(), "runParser", {{"file", string(file.data(gs).path())}});
265+
266+
parser::ParseResult parseResult;
267+
{
268+
core::MutableContext ctx(gs, core::Symbols::root(), file);
269+
core::UnfreezeNameTable nameTableAccess(gs); // enters strings from source code as names
270+
// The RBS rewriter produces plain Whitequark nodes and not `NodeWithExpr` which causes errors in
271+
// `PrismDesugar.cc`. For now, disable all direct translation, and fallback to `Desugar.cc`.
272+
auto source = file.data(ctx).source();
273+
parser::Prism::Parser parser{source};
274+
bool collectComments = gs.cacheSensitiveOptions.rbsEnabled;
275+
parser::Prism::ParseResult prismResult = parser.parseWithoutTranslation(collectComments);
276+
277+
if (opts.stopAfterPhase == options::Phase::PARSER) {
278+
return parser::ParseResult{nullptr, prismResult.getCommentLocations()};
279+
}
280+
281+
auto node = prismResult.getRawNodePointer();
282+
283+
// TODO: Remove `&& false` once RBS rewriter with Prism AST migration is complete
284+
// https://github.com/sorbet/sorbet/issues/9065
285+
if (gs.cacheSensitiveOptions.rbsEnabled && false) {
286+
node = runPrismRBSRewrite(gs, file, node, prismResult.getCommentLocations(), print, ctx, parser);
287+
}
288+
289+
bool directlyDesugar = !gs.cacheSensitiveOptions.rbsEnabled;
290+
auto translatedTree = parser::Prism::Translator(parser, ctx, prismResult.getParseErrors(), directlyDesugar,
291+
preserveConcreteSyntax)
292+
.translate(node);
293+
294+
parseResult = parser::ParseResult{move(translatedTree), prismResult.getCommentLocations()};
295+
}
296+
297+
if (parseResult.tree) {
298+
if (print.ParseTree.enabled) {
299+
print.ParseTree.fmt("{}\n", parseResult.tree->toStringWithTabs(gs, 0));
300+
}
301+
if (print.ParseTreeJson.enabled) {
302+
print.ParseTreeJson.fmt("{}\n", parseResult.tree->toJSON(gs, 0));
303+
}
304+
if (print.ParseTreeJsonWithLocs.enabled) {
305+
print.ParseTreeJson.fmt("{}\n", parseResult.tree->toJSONWithLocs(gs, file, 0));
306+
}
307+
if (print.ParseTreeWhitequark.enabled) {
308+
print.ParseTreeWhitequark.fmt("{}\n", parseResult.tree->toWhitequark(gs, 0));
309+
}
310+
}
311+
312+
return parseResult;
313+
}
314+
255315
unique_ptr<parser::Node> runRBSRewrite(core::GlobalState &gs, core::FileRef file, parser::ParseResult &&parseResult,
256316
const options::Printers &print) {
257317
auto node = move(parseResult.tree);
@@ -278,36 +338,6 @@ unique_ptr<parser::Node> runRBSRewrite(core::GlobalState &gs, core::FileRef file
278338
return node;
279339
}
280340

281-
parser::ParseResult runPrismParser(core::GlobalState &gs, core::FileRef file, const options::Printers &print,
282-
bool preserveConcreteSyntax = false) {
283-
Timer timeit(gs.tracer(), "runParser", {{"file", string(file.data(gs).path())}});
284-
285-
parser::ParseResult parseResult;
286-
{
287-
core::MutableContext ctx(gs, core::Symbols::root(), file);
288-
core::UnfreezeNameTable nameTableAccess(gs); // enters strings from source code as names
289-
// The RBS rewriter produces plain Whitequark nodes and not `NodeWithExpr` which causes errors in
290-
// `PrismDesugar.cc`. For now, disable all direct translation, and fallback to `Desugar.cc`.
291-
auto directlyTranslate = !gs.cacheSensitiveOptions.rbsEnabled;
292-
parseResult = parser::Prism::Parser::run(ctx, directlyTranslate);
293-
}
294-
295-
if (print.ParseTree.enabled) {
296-
print.ParseTree.fmt("{}\n", parseResult.tree->toStringWithTabs(gs, 0));
297-
}
298-
if (print.ParseTreeJson.enabled) {
299-
print.ParseTreeJson.fmt("{}\n", parseResult.tree->toJSON(gs, 0));
300-
}
301-
if (print.ParseTreeJsonWithLocs.enabled) {
302-
print.ParseTreeJson.fmt("{}\n", parseResult.tree->toJSONWithLocs(gs, file, 0));
303-
}
304-
if (print.ParseTreeWhitequark.enabled) {
305-
print.ParseTreeWhitequark.fmt("{}\n", parseResult.tree->toWhitequark(gs, 0));
306-
}
307-
308-
return parseResult;
309-
}
310-
311341
ast::ExpressionPtr runDesugar(core::GlobalState &gs, core::FileRef file, unique_ptr<parser::Node> parseTree,
312342
const options::Printers &print, bool preserveConcreteSyntax = false) {
313343
Timer timeit(gs.tracer(), "runDesugar", {{"file", string(file.data(gs).path())}});
@@ -351,6 +381,27 @@ ast::ParsedFile emptyParsedFile(core::FileRef file) {
351381
return {ast::MK::EmptyTree(), file};
352382
}
353383

384+
pm_node_t *runPrismRBSRewrite(core::GlobalState &gs, core::FileRef file, pm_node_t *node,
385+
const vector<core::LocOffsets> &commentLocations, const options::Printers &print,
386+
core::MutableContext &ctx, const parser::Prism::Parser &parser) {
387+
Timer timeit(gs.tracer(), "runPrismRBSRewrite", {{"file", string(file.data(gs).path())}});
388+
389+
auto associator = rbs::CommentsAssociatorPrism(ctx, parser, commentLocations);
390+
auto commentMap = associator.run(node);
391+
392+
auto sigsRewriter = rbs::SigsRewriterPrism(ctx, parser, commentMap.signaturesForNode);
393+
node = sigsRewriter.run(node);
394+
395+
auto assertionsRewriter = rbs::AssertionsRewriterPrism(ctx, commentMap.assertionsForNode);
396+
node = assertionsRewriter.run(node);
397+
398+
if (print.RBSRewriteTree.enabled) {
399+
print.RBSRewriteTree.fmt("{}\n", parser.prettyPrint(node));
400+
}
401+
402+
return node;
403+
}
404+
354405
} // namespace
355406

356407
ast::ExpressionPtr desugarOne(const options::Options &opts, core::GlobalState &gs, core::FileRef file,
@@ -401,20 +452,21 @@ ast::ParsedFile indexOne(const options::Options &opts, core::GlobalState &lgs, c
401452
}
402453

403454
parseTree = runRBSRewrite(lgs, file, move(parseResult), print);
404-
if (opts.stopAfterPhase == options::Phase::RBS_REWRITER) {
405-
return emptyParsedFile(file);
406-
}
407455

408456
break;
409457
}
410458
case options::Parser::PRISM: {
411-
auto parseResult = runPrismParser(lgs, file, print);
412-
parseTree = runRBSRewrite(lgs, file, move(parseResult), print);
459+
auto parseResult = runPrismParser(lgs, file, print, opts);
413460

414-
if (opts.stopAfterPhase == options::Phase::PARSER) {
461+
// parseResult is null if runPrismParser stopped after an intermediate phase
462+
if (parseResult.tree == nullptr) {
415463
return emptyParsedFile(file);
416464
}
417465

466+
// TODO: Remove this check once runPrismRBSRewrite is no longer no-oped inside of runPrismParser
467+
// https://github.com/sorbet/sorbet/issues/9065
468+
parseTree = runRBSRewrite(lgs, file, move(parseResult), print);
469+
418470
break;
419471
}
420472
}

parser/prism/Parser.cc

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ parser::ParseResult Parser::run(core::MutableContext ctx, bool directlyDesugar,
1212
auto source = file.data(ctx).source();
1313
Prism::Parser parser{source};
1414
bool collectComments = ctx.state.cacheSensitiveOptions.rbsEnabled;
15-
Prism::ParseResult parseResult = parser.parse(collectComments);
15+
Prism::ParseResult parseResult = parser.parseWithoutTranslation(collectComments);
1616

1717
auto translatedTree =
1818
Prism::Translator(parser, ctx, parseResult.parseErrors, directlyDesugar, preserveConcreteSyntax)
@@ -24,7 +24,9 @@ pm_parser_t *Parser::getRawParserPointer() {
2424
return &parser;
2525
}
2626

27-
ParseResult Parser::parse(bool collectComments) {
27+
// Parses without translating and returns raw Prism nodes for intermediate processing (e.g., RBS rewriting)
28+
// Caller must keep Parser alive for later translation, unlike run() which parses + translates in one step
29+
ParseResult Parser::parseWithoutTranslation(bool collectComments) {
2830
pm_node_t *root = pm_parse(&parser);
2931
auto comments = collectComments ? collectCommentLocations() : vector<core::LocOffsets>{};
3032
return ParseResult{*this, root, collectErrors(), move(comments)};
@@ -91,4 +93,12 @@ vector<core::LocOffsets> Parser::collectCommentLocations() {
9193

9294
return commentLocations;
9395
}
96+
97+
string Parser::prettyPrint(pm_node_t *node) const {
98+
pm_buffer_t buffer{};
99+
pm_prettyprint(&buffer, const_cast<pm_parser_t *>(&parser), node);
100+
string result(buffer.value, buffer.length);
101+
pm_buffer_free(&buffer);
102+
return result;
103+
}
94104
}; // namespace sorbet::parser::Prism

parser/prism/Parser.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,12 @@ class Parser final {
6060
static parser::ParseResult run(core::MutableContext ctx, bool directlyDesugar = true,
6161
bool preserveConcreteSyntax = false);
6262

63-
ParseResult parse(bool collectComments = false);
63+
ParseResult parseWithoutTranslation(bool collectComments = false);
6464
core::LocOffsets translateLocation(pm_location_t location) const;
6565
core::LocOffsets translateLocation(const uint8_t *start, const uint8_t *end) const;
6666
std::string_view resolveConstant(pm_constant_id_t constantId) const;
6767
std::string_view extractString(pm_string_t *string) const;
68+
std::string prettyPrint(pm_node_t *node) const;
6869

6970
pm_location_t getZeroWidthLocation() const;
7071
pm_location_t convertLocOffsets(core::LocOffsets loc) const;
@@ -92,6 +93,7 @@ class ParseResult final {
9293
const std::vector<ParseError> parseErrors;
9394
std::vector<core::LocOffsets> commentLocations;
9495

96+
public:
9597
ParseResult(Parser &parser, pm_node_t *node, std::vector<ParseError> parseErrors,
9698
std::vector<core::LocOffsets> commentLocations)
9799
: parser{parser}, node{node, NodeDeleter{parser}}, parseErrors{parseErrors}, commentLocations{
@@ -105,6 +107,14 @@ class ParseResult final {
105107
pm_node_t *getRawNodePointer() const {
106108
return node.get();
107109
}
110+
111+
const std::vector<core::LocOffsets> &getCommentLocations() const {
112+
return commentLocations;
113+
}
114+
115+
const std::vector<ParseError> &getParseErrors() const {
116+
return parseErrors;
117+
}
108118
};
109119

110120
} // namespace sorbet::parser::Prism

rbs/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ cc_library(
33
srcs = glob([
44
"*.cc",
55
"*.h",
6+
"prism/*.cc",
7+
"prism/*.h",
68
]),
79
linkstatic = select({
810
"//tools/config:linkshared": 0,
@@ -14,6 +16,7 @@ cc_library(
1416
"//common",
1517
"//core",
1618
"//parser",
19+
"//parser/prism",
1720
"//rewriter/util",
1821
"@rbs_parser",
1922
],
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#include "rbs/prism/AssertionsRewriterPrism.h"
2+
3+
using namespace std;
4+
5+
namespace sorbet::rbs {
6+
7+
AssertionsRewriterPrism::AssertionsRewriterPrism(core::MutableContext ctx,
8+
std::map<pm_node_t *, std::vector<CommentNodePrism>> &commentsByNode)
9+
: ctx(ctx), commentsByNode(&commentsByNode) {}
10+
11+
pm_node_t *AssertionsRewriterPrism::run(pm_node_t *node) {
12+
[[maybe_unused]] auto *_commentsByNode = this->commentsByNode;
13+
return node;
14+
}
15+
16+
} // namespace sorbet::rbs
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#ifndef SORBET_RBS_ASSERTIONS_REWRITER_PRISM_H
2+
#define SORBET_RBS_ASSERTIONS_REWRITER_PRISM_H
3+
4+
#include "parser/parser.h"
5+
#include "rbs/prism/CommentsAssociatorPrism.h"
6+
7+
extern "C" {
8+
#include "prism.h"
9+
}
10+
11+
namespace sorbet::rbs {
12+
13+
class AssertionsRewriterPrism {
14+
public:
15+
AssertionsRewriterPrism(core::MutableContext ctx,
16+
std::map<pm_node_t *, std::vector<CommentNodePrism>> &commentsByNode);
17+
18+
pm_node_t *run(pm_node_t *node);
19+
20+
private:
21+
core::MutableContext ctx;
22+
std::map<pm_node_t *, std::vector<CommentNodePrism>> *commentsByNode;
23+
};
24+
25+
} // namespace sorbet::rbs
26+
27+
#endif // SORBET_RBS_ASSERTIONS_REWRITER_PRISM_H
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include "rbs/prism/CommentsAssociatorPrism.h"
2+
3+
using namespace std;
4+
5+
namespace sorbet::rbs {
6+
7+
CommentsAssociatorPrism::CommentsAssociatorPrism(core::MutableContext ctx, const parser::Prism::Parser &parser,
8+
const std::vector<core::LocOffsets> &commentLocations)
9+
: ctx(ctx), parser(parser), commentLocations(commentLocations) {}
10+
11+
CommentMapPrism CommentsAssociatorPrism::run(pm_node_t *node) {
12+
[[maybe_unused]] auto &_parser = this->parser;
13+
[[maybe_unused]] auto &_commentLocations = this->commentLocations;
14+
return CommentMapPrism{};
15+
}
16+
17+
} // namespace sorbet::rbs
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#ifndef SORBET_RBS_COMMENTS_ASSOCIATOR_PRISM_H
2+
#define SORBET_RBS_COMMENTS_ASSOCIATOR_PRISM_H
3+
4+
#include "common/common.h"
5+
#include "parser/parser.h"
6+
#include "parser/prism/Parser.h"
7+
8+
extern "C" {
9+
#include "prism.h"
10+
}
11+
12+
namespace sorbet::rbs {
13+
14+
struct CommentNodePrism {
15+
core::LocOffsets loc;
16+
std::string string;
17+
};
18+
19+
struct CommentMapPrism {
20+
std::map<pm_node_t *, std::vector<CommentNodePrism>> signaturesForNode;
21+
std::map<pm_node_t *, std::vector<CommentNodePrism>> assertionsForNode;
22+
};
23+
24+
class CommentsAssociatorPrism {
25+
public:
26+
CommentsAssociatorPrism(core::MutableContext ctx, const parser::Prism::Parser &parser,
27+
const std::vector<core::LocOffsets> &commentLocations);
28+
29+
CommentMapPrism run(pm_node_t *node);
30+
31+
private:
32+
core::MutableContext ctx;
33+
const parser::Prism::Parser &parser;
34+
const std::vector<core::LocOffsets> &commentLocations;
35+
};
36+
37+
} // namespace sorbet::rbs
38+
39+
#endif // SORBET_RBS_COMMENTS_ASSOCIATOR_PRISM_H

rbs/prism/SigsRewriterPrism.cc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include "rbs/prism/SigsRewriterPrism.h"
2+
3+
using namespace std;
4+
5+
namespace sorbet::rbs {
6+
7+
SigsRewriterPrism::SigsRewriterPrism(core::MutableContext ctx, const parser::Prism::Parser &parser,
8+
std::map<pm_node_t *, std::vector<CommentNodePrism>> &commentsByNode)
9+
: ctx(ctx), parser(parser), commentsByNode(&commentsByNode) {}
10+
11+
pm_node_t *SigsRewriterPrism::run(pm_node_t *node) {
12+
[[maybe_unused]] auto &_parser = this->parser;
13+
[[maybe_unused]] auto *_commentsByNode = this->commentsByNode;
14+
return node;
15+
}
16+
17+
} // namespace sorbet::rbs

0 commit comments

Comments
 (0)