[PATCH] Pass pratt parsed token to expr parser functions to fix expr locus
The pratt parser skips the first token of an expression before invoking the actual expression parsing function. This makes getting the correct starting location of a pratt parsed expression hard. The "correction" of the location by subtracting an integer is often wrong (since there may be arbitrary whitespace or comments between tokens). Fix this by passing the token to the expression parsing functions (instead of just providing a pratt_parse boolean). Use this token to set the start of the expression. Before gccrs would generate the following error message: return.rs:3:22: error: cannot ‘break’ outside of a loop 3 | let x = 5 - break return (16 + 2); | ^ Now we get: return.rs:3:17: error: cannot ‘break’ outside of a loop 3 | let x = 5 - break return (16 + 2); | ^ --- gcc/rust/parse/rust-parse-impl.h | 180 +-- gcc/rust/parse/rust-parse.h | 58 +- 2 files changed, 103 insertions(+), 135 deletions(-) diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h index 340fea70201..6241a972ff7 100644 --- a/gcc/rust/parse/rust-parse-impl.h +++ b/gcc/rust/parse/rust-parse-impl.h @@ -6580,7 +6580,7 @@ Parser::parse_path_expr_segment () template AST::QualifiedPathInExpression Parser::parse_qualified_path_in_expression ( - bool pratt_parse) + const_TokenPtr pratt_parsed_token) { /* Note: the Rust grammar is defined in such a way that it is impossible to * determine whether a prospective qualified path is a @@ -6595,7 +6595,7 @@ Parser::parse_qualified_path_in_expression ( // parse the qualified path type (required) AST::QualifiedPathType qual_path_type -= parse_qualified_path_type (pratt_parse); += parse_qualified_path_type (pratt_parsed_token); if (qual_path_type.is_error ()) { // TODO: should this create a parse error? @@ -6661,12 +6661,13 @@ Parser::parse_qualified_path_in_expression ( // Parses the type syntactical construction at the start of a qualified path. template AST::QualifiedPathType -Parser::parse_qualified_path_type (bool pratt_parse) +Parser::parse_qualified_path_type ( + const_TokenPtr pratt_parsed_token) { - Location locus = Linemap::unknown_location (); + Location locus; /* TODO: should this actually be error? is there anywhere where this could be * valid? */ - if (!pratt_parse) + if (pratt_parsed_token == nullptr) { locus = lexer.peek_token ()->get_locus (); if (!skip_token (LEFT_ANGLE)) @@ -6676,10 +6677,7 @@ Parser::parse_qualified_path_type (bool pratt_parse) } } else -{ - // move back by 1 if pratt parsing due to skipping '<' - locus = lexer.peek_token ()->get_locus () - 1; -} +locus = pratt_parsed_token->get_locus (); // parse type (required) std::unique_ptr type = parse_type (); @@ -7305,10 +7303,10 @@ Parser::parse_expr_without_block (AST::AttrVec outer_attrs) template std::unique_ptr Parser::parse_block_expr (AST::AttrVec outer_attrs, - bool pratt_parse) + const_TokenPtr pratt_parsed_token) { - Location locus = Linemap::unknown_location (); - if (!pratt_parse) + Location locus; + if (pratt_parsed_token == nullptr) { locus = lexer.peek_token ()->get_locus (); if (!skip_token (LEFT_CURLY)) @@ -7318,9 +7316,7 @@ Parser::parse_block_expr (AST::AttrVec outer_attrs, } } else -{ - locus = lexer.peek_token ()->get_locus () - 1; -} +locus = pratt_parsed_token->get_locus (); AST::AttrVec inner_attrs = parse_inner_attributes (); @@ -7611,22 +7607,17 @@ Parser::parse_literal_expr (AST::AttrVec outer_attrs) // Parses a return expression (including any expression to return). template std::unique_ptr -Parser::parse_return_expr (AST::AttrVec outer_attrs, - bool pratt_parse) +Parser::parse_return_expr ( + AST::AttrVec outer_attrs, const_TokenPtr pratt_parsed_token) { - Location locus = Linemap::unknown_location (); - if (!pratt_parse) + Location locus; + if (pratt_parsed_token == nullptr) { locus = lexer.peek_token ()->get_locus (); - skip_token (RETURN_TOK); } else -{ - // minus 7 chars for 6 in return and a space - // or just TODO: pass in location data - locus = lexer.peek_token ()->get_locus () - 7; -} +locus = pratt_parsed_token->get_locus (); // parse expression to return, if it exists ParseRestrictions restrictions; @@ -7644,21 +7635,16 @@ Parser::parse_return_expr (AST::AttrVec outer_attrs, template std::unique_ptr Parser::parse_break_expr (AST::AttrVec outer_attrs, - bool pratt_parse) + const_TokenPtr pratt_parsed_token) { - Location locus = Linemap::unknown
Re: [PATCH] Pass pratt parsed token to expr parser functions to fix expr locus
I think the core idea of this patch (fixing locations) is very important and useful. But isn’t it overkill to pass the token in instead of just the location? You can avoid a fairly expensive shared_ptr copy by doing so. > On 29 Jul 2021, at 6:13 am, Mark Wielaard wrote: > > The pratt parser skips the first token of an expression before > invoking the actual expression parsing function. This makes getting > the correct starting location of a pratt parsed expression hard. The > "correction" of the location by subtracting an integer is often wrong > (since there may be arbitrary whitespace or comments between > tokens). Fix this by passing the token to the expression parsing > functions (instead of just providing a pratt_parse boolean). Use this > token to set the start of the expression. > > Before gccrs would generate the following error message: > > return.rs:3:22: error: cannot ‘break’ outside of a loop >3 | let x = 5 - break return (16 + 2); > | ^ > > Now we get: > > return.rs:3:17: error: cannot ‘break’ outside of a loop >3 | let x = 5 - break return (16 + 2); > | ^ > --- > gcc/rust/parse/rust-parse-impl.h | 180 +-- > gcc/rust/parse/rust-parse.h | 58 +- > 2 files changed, 103 insertions(+), 135 deletions(-) > > diff --git a/gcc/rust/parse/rust-parse-impl.h > b/gcc/rust/parse/rust-parse-impl.h > index 340fea70201..6241a972ff7 100644 > --- a/gcc/rust/parse/rust-parse-impl.h > +++ b/gcc/rust/parse/rust-parse-impl.h > @@ -6580,7 +6580,7 @@ Parser::parse_path_expr_segment () > template > AST::QualifiedPathInExpression > Parser::parse_qualified_path_in_expression ( > - bool pratt_parse) > + const_TokenPtr pratt_parsed_token) > { > /* Note: the Rust grammar is defined in such a way that it is impossible to >* determine whether a prospective qualified path is a > @@ -6595,7 +6595,7 @@ > Parser::parse_qualified_path_in_expression ( > > // parse the qualified path type (required) > AST::QualifiedPathType qual_path_type > -= parse_qualified_path_type (pratt_parse); > += parse_qualified_path_type (pratt_parsed_token); > if (qual_path_type.is_error ()) > { > // TODO: should this create a parse error? > @@ -6661,12 +6661,13 @@ > Parser::parse_qualified_path_in_expression ( > // Parses the type syntactical construction at the start of a qualified path. > template > AST::QualifiedPathType > -Parser::parse_qualified_path_type (bool pratt_parse) > +Parser::parse_qualified_path_type ( > + const_TokenPtr pratt_parsed_token) > { > - Location locus = Linemap::unknown_location (); > + Location locus; > /* TODO: should this actually be error? is there anywhere where this could > be >* valid? */ > - if (!pratt_parse) > + if (pratt_parsed_token == nullptr) > { > locus = lexer.peek_token ()->get_locus (); > if (!skip_token (LEFT_ANGLE)) > @@ -6676,10 +6677,7 @@ Parser::parse_qualified_path_type > (bool pratt_parse) >} > } > else > -{ > - // move back by 1 if pratt parsing due to skipping '<' > - locus = lexer.peek_token ()->get_locus () - 1; > -} > +locus = pratt_parsed_token->get_locus (); > > // parse type (required) > std::unique_ptr type = parse_type (); > @@ -7305,10 +7303,10 @@ Parser::parse_expr_without_block > (AST::AttrVec outer_attrs) > template > std::unique_ptr > Parser::parse_block_expr (AST::AttrVec outer_attrs, > - bool pratt_parse) > + const_TokenPtr pratt_parsed_token) > { > - Location locus = Linemap::unknown_location (); > - if (!pratt_parse) > + Location locus; > + if (pratt_parsed_token == nullptr) > { > locus = lexer.peek_token ()->get_locus (); > if (!skip_token (LEFT_CURLY)) > @@ -7318,9 +7316,7 @@ Parser::parse_block_expr > (AST::AttrVec outer_attrs, >} > } > else > -{ > - locus = lexer.peek_token ()->get_locus () - 1; > -} > +locus = pratt_parsed_token->get_locus (); > > AST::AttrVec inner_attrs = parse_inner_attributes (); > > @@ -7611,22 +7607,17 @@ Parser::parse_literal_expr > (AST::AttrVec outer_attrs) > // Parses a return expression (including any expression to return). > template > std::unique_ptr > -Parser::parse_return_expr (AST::AttrVec outer_attrs, > - bool pratt_parse) > +Parser::parse_return_expr ( > + AST::AttrVec outer_attrs, const_TokenPtr pratt_parsed_token) > { > - Location locus = Linemap::unknown_location (); > - if (!pratt_parse) > + Location locus; > + if (pratt_parsed_token == nullptr) > { > locus = lexer.peek_token ()->get_locus (); > - > skip_token (RETURN_TOK); > } > else > -{ > - // minus 7 chars for 6 in return and a space > - // or just TODO: pass in location data > - locus = lexer.peek_token ()->get_locus () - 7; > -} > +locus = pratt_parsed_token->get_locus (