The lexer tried to handle the shebang line but used loc directly,
instead of the current_column. And it assumed a '/' should immediately
follow the "#!". But if the "#!" is followed by whitespace and/or
comments and a '[' character, then the first line isn't see as a
shebang line (even if the kernel or shell would) but as the start of
an inner attribute.

Add various tests for when the first line starting with "#!" is seen
as a shebang line (and should be skipped). And some tests there is a
'[' character following some whitespace and/or comments and the "#!"
is seen as part of an inner attribute.
---
 gcc/rust/lex/rust-lex.cc                      | 79 ++++++++++++++-----
 .../rust/compile/torture/not_shebang.rs       |  3 +
 .../torture/not_shebang_block_comment.rs      |  1 +
 .../compile/torture/not_shebang_comment.rs    |  3 +
 .../torture/not_shebang_multiline_comment.rs  |  7 ++
 .../compile/torture/not_shebang_spaces.rs     |  6 ++
 gcc/testsuite/rust/compile/torture/shebang.rs |  3 +
 .../rust/compile/torture/shebang_plus_attr.rs |  3 +
 .../compile/torture/shebang_plus_attr2.rs     |  3 +
 9 files changed, 89 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang.rs
 create mode 100644 
gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs
 create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang_comment.rs
 create mode 100644 
gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs
 create mode 100644 gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs
 create mode 100755 gcc/testsuite/rust/compile/torture/shebang.rs
 create mode 100755 gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs
 create mode 100755 gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs

diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index d1384168731..ebd69de0fd1 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -237,28 +237,63 @@ Lexer::build_token ()
       current_char = peek_input ();
       skip_input ();
 
-      // return end of file token if end of file
-      if (current_char == EOF)
-       return Token::make (END_OF_FILE, loc);
-
       // detect shebang
-      if (loc == 1 && current_line == 1 && current_char == '#')
+      // Must be the first thing on the first line, starting with #!
+      // But since an attribute can also start with an #! we don't count it as 
a
+      // shebang line when after any whitespace or comments there is a [. If it
+      // is a shebang line we simple drop the line. Otherwise we don't consume
+      // any characters and fall through to the real tokenizer.
+      if (current_line == 1 && current_column == 1 && current_char == '#'
+         && peek_input () == '!')
        {
-         current_char = peek_input ();
-
-         if (current_char == '!')
+         int n = 1;
+         while (true)
            {
-             skip_input ();
-             current_char = peek_input ();
-
-             if (current_char == '/')
+             int next_char = peek_input (n);
+             if (is_whitespace (next_char))
+               n++;
+             else if (next_char == '/' && peek_input (n + 1) == '/')
                {
-                 // definitely shebang
-
-                 skip_input ();
-
-                 // ignore rest of line
-                 while (current_char != '\n')
+                 // A single line comment
+                 n += 2;
+                 next_char = peek_input (n);
+                 while (next_char != '\n' && next_char != EOF)
+                   {
+                     n++;
+                     next_char = peek_input (n);
+                   }
+                 if (next_char == '\n')
+                   n++;
+               }
+             else if (next_char == '/' && peek_input (n + 1) == '*')
+               {
+                 // Start of a block comment
+                 n += 2;
+                 int level = 1;
+                 while (level > 0)
+                   {
+                     if (peek_input (n) == EOF)
+                       break;
+                     else if (peek_input (n) == '/'
+                              && peek_input (n + 1) == '*')
+                       {
+                         n += 2;
+                         level += 1;
+                       }
+                     else if (peek_input (n) == '*'
+                              && peek_input (n + 1) == '/')
+                       {
+                         n += 2;
+                         level -= 1;
+                       }
+                     else
+                       n++;
+                   }
+               }
+             else if (next_char != '[')
+               {
+                 // definitely shebang, ignore the first line
+                 while (current_char != '\n' && current_char != EOF)
                    {
                      current_char = peek_input ();
                      skip_input ();
@@ -269,11 +304,17 @@ Lexer::build_token ()
                  current_column = 1;
                  // tell line_table that new line starts
                  line_map->start_line (current_line, max_column_hint);
-                 continue;
+                 break;
                }
+             else
+               break; /* Definitely not a shebang line. */
            }
        }
 
+      // return end of file token if end of file
+      if (current_char == EOF)
+       return Token::make (END_OF_FILE, loc);
+
       // if not end of file, start tokenising
       switch (current_char)
        {
diff --git a/gcc/testsuite/rust/compile/torture/not_shebang.rs 
b/gcc/testsuite/rust/compile/torture/not_shebang.rs
new file mode 100644
index 00000000000..37e01b65940
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang.rs
@@ -0,0 +1,3 @@
+#!
+[allow(unused)]
+fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs 
b/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs
new file mode 100644
index 00000000000..662f6506749
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs
@@ -0,0 +1 @@
+#!/*/this/is/a/comment*/[allow(unused)] fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs 
b/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs
new file mode 100644
index 00000000000..273ae4e8e2a
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs
@@ -0,0 +1,3 @@
+#!//this/is/a/comment
+[allow(unused)]   
+fn main () { }
diff --git 
a/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs 
b/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs
new file mode 100644
index 00000000000..86800b14cb3
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs
@@ -0,0 +1,7 @@
+#!//this/is/a/comment
+
+/* Also a /* nested */
+   multiline // comment
+   with some more whitespace after, but then finally a [, so not a real #! 
line.  */
+
+[allow(unused)] fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs 
b/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs
new file mode 100644
index 00000000000..6b94a69111a
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs
@@ -0,0 +1,6 @@
+#!   
+
+    [allow(unused)]   
+
+        fn main () { }
+    
diff --git a/gcc/testsuite/rust/compile/torture/shebang.rs 
b/gcc/testsuite/rust/compile/torture/shebang.rs
new file mode 100755
index 00000000000..1c8b9c9a955
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/shebang.rs
@@ -0,0 +1,3 @@
+#!/usr/bin/env cat 
+
+fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs 
b/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs
new file mode 100755
index 00000000000..075bc6cf594
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs
@@ -0,0 +1,3 @@
+#!/usr/bin/env cat 
+#![allow(unused)]
+fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs 
b/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs
new file mode 100755
index 00000000000..ece8a52381c
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs
@@ -0,0 +1,3 @@
+#!//usr/bin/env cat 
+#![allow(unused)]
+fn main () { }
-- 
2.32.0

-- 
Gcc-rust mailing list
Gcc-rust@gcc.gnu.org
https://gcc.gnu.org/mailman/listinfo/gcc-rust

Reply via email to