The very first thing in a rust source file might be the optional UTF-8
BOM.  This is the 3 bytes 0xEF 0xBB 0xBF. They can simply be skipped,
they just mark the file as UTF-8.  Add some testcases to show we now
handle such files.
---
 gcc/rust/lex/rust-lex.cc                            | 13 +++++++++++++
 gcc/testsuite/rust/compile/torture/bom.rs           |  1 +
 gcc/testsuite/rust/compile/torture/bom_comment.rs   |  2 ++
 gcc/testsuite/rust/compile/torture/bom_shebang.rs   |  2 ++
 .../rust/compile/torture/bom_whitespace.rs          |  2 ++
 5 files changed, 20 insertions(+)
 create mode 100644 gcc/testsuite/rust/compile/torture/bom.rs
 create mode 100644 gcc/testsuite/rust/compile/torture/bom_comment.rs
 create mode 100644 gcc/testsuite/rust/compile/torture/bom_shebang.rs
 create mode 100644 gcc/testsuite/rust/compile/torture/bom_whitespace.rs

diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index ebd69de0fd1..617dd69a080 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -237,6 +237,19 @@ Lexer::build_token ()
       current_char = peek_input ();
       skip_input ();
 
+      // detect UTF8 bom
+      //
+      // Must be the first thing on the first line.
+      // There might be an optional BOM (Byte Order Mark), which for UTF-8 is
+      // the three bytes 0xEF, 0xBB and 0xBF. These can simply be skipped.
+      if (current_line == 1 && current_column == 1 && current_char == 0xef
+         && peek_input () == 0xbb && peek_input (1) == 0xbf)
+       {
+         skip_input (1);
+         current_char = peek_input ();
+         skip_input ();
+       }
+
       // detect shebang
       // Must be the first thing on the first line, starting with #!
       // But since an attribute can also start with an #! we don't count it as 
a
diff --git a/gcc/testsuite/rust/compile/torture/bom.rs 
b/gcc/testsuite/rust/compile/torture/bom.rs
new file mode 100644
index 00000000000..5edcab227ee
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/bom.rs
@@ -0,0 +1 @@
+pub fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/bom_comment.rs 
b/gcc/testsuite/rust/compile/torture/bom_comment.rs
new file mode 100644
index 00000000000..020e1707b55
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/bom_comment.rs
@@ -0,0 +1,2 @@
+// UTF8 BOM
+pub fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/bom_shebang.rs 
b/gcc/testsuite/rust/compile/torture/bom_shebang.rs
new file mode 100644
index 00000000000..4c552e8d71d
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/bom_shebang.rs
@@ -0,0 +1,2 @@
+#!/usr/bin/cat
+pub fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/bom_whitespace.rs 
b/gcc/testsuite/rust/compile/torture/bom_whitespace.rs
new file mode 100644
index 00000000000..b10d5654473
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/bom_whitespace.rs
@@ -0,0 +1,2 @@
+
+pub fn main () { }
-- 
2.32.0

-- 
Gcc-rust mailing list
Gcc-rust@gcc.gnu.org
https://gcc.gnu.org/mailman/listinfo/gcc-rust

Reply via email to