[PATCH] Pass pratt parsed token to expr parser functions to fix expr locus

2021-07-28 Thread Mark Wielaard
The pratt parser skips the first token of an expression before
invoking the actual expression parsing function. This makes getting
the correct starting location of a pratt parsed expression hard. The
"correction" of the location by subtracting an integer is often wrong
(since there may be arbitrary whitespace or comments between
tokens). Fix this by passing the token to the expression parsing
functions (instead of just providing a pratt_parse boolean). Use this
token to set the start of the expression.

Before gccrs would generate the following error message:

return.rs:3:22: error: cannot ‘break’ outside of a loop
3 | let x = 5 - break return (16 + 2);
  |  ^

Now we get:

return.rs:3:17: error: cannot ‘break’ outside of a loop
3 | let x = 5 - break return (16 + 2);
  | ^
---
 gcc/rust/parse/rust-parse-impl.h | 180 +--
 gcc/rust/parse/rust-parse.h  |  58 +-
 2 files changed, 103 insertions(+), 135 deletions(-)

diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h
index 340fea70201..6241a972ff7 100644
--- a/gcc/rust/parse/rust-parse-impl.h
+++ b/gcc/rust/parse/rust-parse-impl.h
@@ -6580,7 +6580,7 @@ Parser::parse_path_expr_segment ()
 template 
 AST::QualifiedPathInExpression
 Parser::parse_qualified_path_in_expression (
-  bool pratt_parse)
+  const_TokenPtr pratt_parsed_token)
 {
   /* Note: the Rust grammar is defined in such a way that it is impossible to
* determine whether a prospective qualified path is a
@@ -6595,7 +6595,7 @@ 
Parser::parse_qualified_path_in_expression (
 
   // parse the qualified path type (required)
   AST::QualifiedPathType qual_path_type
-= parse_qualified_path_type (pratt_parse);
+= parse_qualified_path_type (pratt_parsed_token);
   if (qual_path_type.is_error ())
 {
   // TODO: should this create a parse error?
@@ -6661,12 +6661,13 @@ 
Parser::parse_qualified_path_in_expression (
 // Parses the type syntactical construction at the start of a qualified path.
 template 
 AST::QualifiedPathType
-Parser::parse_qualified_path_type (bool pratt_parse)
+Parser::parse_qualified_path_type (
+  const_TokenPtr pratt_parsed_token)
 {
-  Location locus = Linemap::unknown_location ();
+  Location locus;
   /* TODO: should this actually be error? is there anywhere where this could be
* valid? */
-  if (!pratt_parse)
+  if (pratt_parsed_token == nullptr)
 {
   locus = lexer.peek_token ()->get_locus ();
   if (!skip_token (LEFT_ANGLE))
@@ -6676,10 +6677,7 @@ Parser::parse_qualified_path_type 
(bool pratt_parse)
}
 }
   else
-{
-  // move back by 1 if pratt parsing due to skipping '<'
-  locus = lexer.peek_token ()->get_locus () - 1;
-}
+locus = pratt_parsed_token->get_locus ();
 
   // parse type (required)
   std::unique_ptr type = parse_type ();
@@ -7305,10 +7303,10 @@ Parser::parse_expr_without_block 
(AST::AttrVec outer_attrs)
 template 
 std::unique_ptr
 Parser::parse_block_expr (AST::AttrVec outer_attrs,
- bool pratt_parse)
+ const_TokenPtr pratt_parsed_token)
 {
-  Location locus = Linemap::unknown_location ();
-  if (!pratt_parse)
+  Location locus;
+  if (pratt_parsed_token == nullptr)
 {
   locus = lexer.peek_token ()->get_locus ();
   if (!skip_token (LEFT_CURLY))
@@ -7318,9 +7316,7 @@ Parser::parse_block_expr 
(AST::AttrVec outer_attrs,
}
 }
   else
-{
-  locus = lexer.peek_token ()->get_locus () - 1;
-}
+locus = pratt_parsed_token->get_locus ();
 
   AST::AttrVec inner_attrs = parse_inner_attributes ();
 
@@ -7611,22 +7607,17 @@ Parser::parse_literal_expr 
(AST::AttrVec outer_attrs)
 // Parses a return expression (including any expression to return).
 template 
 std::unique_ptr
-Parser::parse_return_expr (AST::AttrVec outer_attrs,
-  bool pratt_parse)
+Parser::parse_return_expr (
+  AST::AttrVec outer_attrs, const_TokenPtr pratt_parsed_token)
 {
-  Location locus = Linemap::unknown_location ();
-  if (!pratt_parse)
+  Location locus;
+  if (pratt_parsed_token == nullptr)
 {
   locus = lexer.peek_token ()->get_locus ();
-
   skip_token (RETURN_TOK);
 }
   else
-{
-  // minus 7 chars for 6 in return and a space
-  // or just TODO: pass in location data
-  locus = lexer.peek_token ()->get_locus () - 7;
-}
+locus = pratt_parsed_token->get_locus ();
 
   // parse expression to return, if it exists
   ParseRestrictions restrictions;
@@ -7644,21 +7635,16 @@ Parser::parse_return_expr 
(AST::AttrVec outer_attrs,
 template 
 std::unique_ptr
 Parser::parse_break_expr (AST::AttrVec outer_attrs,
- bool pratt_parse)
+ const_TokenPtr pratt_parsed_token)
 {
-  Location locus = Linemap::unknown

Re: [PATCH] Pass pratt parsed token to expr parser functions to fix expr locus

2021-07-28 Thread The Other via Gcc-rust
I think the core idea of this patch (fixing locations) is very important and 
useful.

But isn’t it overkill to pass the token in instead of just the location? You 
can avoid a fairly expensive shared_ptr copy by doing so. 

> On 29 Jul 2021, at 6:13 am, Mark Wielaard  wrote:
> 
> The pratt parser skips the first token of an expression before
> invoking the actual expression parsing function. This makes getting
> the correct starting location of a pratt parsed expression hard. The
> "correction" of the location by subtracting an integer is often wrong
> (since there may be arbitrary whitespace or comments between
> tokens). Fix this by passing the token to the expression parsing
> functions (instead of just providing a pratt_parse boolean). Use this
> token to set the start of the expression.
> 
> Before gccrs would generate the following error message:
> 
> return.rs:3:22: error: cannot ‘break’ outside of a loop
>3 | let x = 5 - break return (16 + 2);
>  |  ^
> 
> Now we get:
> 
> return.rs:3:17: error: cannot ‘break’ outside of a loop
>3 | let x = 5 - break return (16 + 2);
>  | ^
> ---
> gcc/rust/parse/rust-parse-impl.h | 180 +--
> gcc/rust/parse/rust-parse.h  |  58 +-
> 2 files changed, 103 insertions(+), 135 deletions(-)
> 
> diff --git a/gcc/rust/parse/rust-parse-impl.h 
> b/gcc/rust/parse/rust-parse-impl.h
> index 340fea70201..6241a972ff7 100644
> --- a/gcc/rust/parse/rust-parse-impl.h
> +++ b/gcc/rust/parse/rust-parse-impl.h
> @@ -6580,7 +6580,7 @@ Parser::parse_path_expr_segment ()
> template 
> AST::QualifiedPathInExpression
> Parser::parse_qualified_path_in_expression (
> -  bool pratt_parse)
> +  const_TokenPtr pratt_parsed_token)
> {
>   /* Note: the Rust grammar is defined in such a way that it is impossible to
>* determine whether a prospective qualified path is a
> @@ -6595,7 +6595,7 @@ 
> Parser::parse_qualified_path_in_expression (
> 
>   // parse the qualified path type (required)
>   AST::QualifiedPathType qual_path_type
> -= parse_qualified_path_type (pratt_parse);
> += parse_qualified_path_type (pratt_parsed_token);
>   if (qual_path_type.is_error ())
> {
>   // TODO: should this create a parse error?
> @@ -6661,12 +6661,13 @@ 
> Parser::parse_qualified_path_in_expression (
> // Parses the type syntactical construction at the start of a qualified path.
> template 
> AST::QualifiedPathType
> -Parser::parse_qualified_path_type (bool pratt_parse)
> +Parser::parse_qualified_path_type (
> +  const_TokenPtr pratt_parsed_token)
> {
> -  Location locus = Linemap::unknown_location ();
> +  Location locus;
>   /* TODO: should this actually be error? is there anywhere where this could 
> be
>* valid? */
> -  if (!pratt_parse)
> +  if (pratt_parsed_token == nullptr)
> {
>   locus = lexer.peek_token ()->get_locus ();
>   if (!skip_token (LEFT_ANGLE))
> @@ -6676,10 +6677,7 @@ Parser::parse_qualified_path_type 
> (bool pratt_parse)
>}
> }
>   else
> -{
> -  // move back by 1 if pratt parsing due to skipping '<'
> -  locus = lexer.peek_token ()->get_locus () - 1;
> -}
> +locus = pratt_parsed_token->get_locus ();
> 
>   // parse type (required)
>   std::unique_ptr type = parse_type ();
> @@ -7305,10 +7303,10 @@ Parser::parse_expr_without_block 
> (AST::AttrVec outer_attrs)
> template 
> std::unique_ptr
> Parser::parse_block_expr (AST::AttrVec outer_attrs,
> -  bool pratt_parse)
> +  const_TokenPtr pratt_parsed_token)
> {
> -  Location locus = Linemap::unknown_location ();
> -  if (!pratt_parse)
> +  Location locus;
> +  if (pratt_parsed_token == nullptr)
> {
>   locus = lexer.peek_token ()->get_locus ();
>   if (!skip_token (LEFT_CURLY))
> @@ -7318,9 +7316,7 @@ Parser::parse_block_expr 
> (AST::AttrVec outer_attrs,
>}
> }
>   else
> -{
> -  locus = lexer.peek_token ()->get_locus () - 1;
> -}
> +locus = pratt_parsed_token->get_locus ();
> 
>   AST::AttrVec inner_attrs = parse_inner_attributes ();
> 
> @@ -7611,22 +7607,17 @@ Parser::parse_literal_expr 
> (AST::AttrVec outer_attrs)
> // Parses a return expression (including any expression to return).
> template 
> std::unique_ptr
> -Parser::parse_return_expr (AST::AttrVec outer_attrs,
> -   bool pratt_parse)
> +Parser::parse_return_expr (
> +  AST::AttrVec outer_attrs, const_TokenPtr pratt_parsed_token)
> {
> -  Location locus = Linemap::unknown_location ();
> -  if (!pratt_parse)
> +  Location locus;
> +  if (pratt_parsed_token == nullptr)
> {
>   locus = lexer.peek_token ()->get_locus ();
> -
>   skip_token (RETURN_TOK);
> }
>   else
> -{
> -  // minus 7 chars for 6 in return and a space
> -  // or just TODO: pass in location data
> -  locus = lexer.peek_token ()->get_locus () - 7;
> -}
> +locus = pratt_parsed_token->get_locus (