From: Arthur Cohen <[email protected]>

gcc/rust/ChangeLog:

        * ast/rust-ast-fragment.cc (Fragment::is_pattern_fragment): New 
function.
        (Fragment::take_pattern_fragment): Likewise.
        (Fragment::assert_single_fragment): Likewise.
        * ast/rust-ast-fragment.h: Declare them.
        * ast/rust-ast.cc (SingleASTNode::SingleASTNode): Add new constructor 
for pattern
        single AST nodes.
        (SingleASTNode::operator=): Handle patterns.
        (SingleASTNode::accept_vis): Likewise.
        (SingleASTNode::is_error): Likewise.
        (SingleASTNode::as_string): Likewise.
        * ast/rust-ast.h: Add get_pattern_ptr() functions.
        * ast/rust-expr.h: Likewise.
        * ast/rust-item.h: Likewise.
        * ast/rust-pattern.h: Likewise.
        * ast/rust-stmt.h: Likewise.
        * expand/rust-expand-visitor.cc (derive_item): Use new API enum values.
        (expand_item_attribute): Likewise.
        (expand_stmt_attribute): Likewise.
        (ExpandVisitor::maybe_expand_pattern): New function.
        (ExpandVisitor::expand_closure_params): Handle patterns.
        (ExpandVisitor::visit): Add new visitors for patterns and missed exprs.
        * expand/rust-expand-visitor.h: Declare them.
        * expand/rust-macro-expand.cc (transcribe_pattern): New function.
        (transcribe_context): Call it.
        * expand/rust-macro-expand.h (struct MacroExpander): New Context type.

gcc/testsuite/ChangeLog:

        * rust/compile/issue-3726.rs: New test.
        * rust/compile/issue-3898.rs: New test.
---
 gcc/rust/ast/rust-ast-fragment.cc        |  14 +++
 gcc/rust/ast/rust-ast-fragment.h         |   2 +
 gcc/rust/ast/rust-ast.cc                 |  16 ++++
 gcc/rust/ast/rust-ast.h                  |  12 +++
 gcc/rust/ast/rust-expr.h                 |   6 ++
 gcc/rust/ast/rust-item.h                 |  12 +++
 gcc/rust/ast/rust-pattern.h              |   6 ++
 gcc/rust/ast/rust-stmt.h                 |   6 ++
 gcc/rust/expand/rust-expand-visitor.cc   | 104 +++++++++++++++++++--
 gcc/rust/expand/rust-expand-visitor.h    |  29 +++---
 gcc/rust/expand/rust-macro-expand.cc     |  24 +++++
 gcc/rust/expand/rust-macro-expand.h      |   1 +
 gcc/testsuite/rust/compile/issue-3726.rs |  17 ++++
 gcc/testsuite/rust/compile/issue-3898.rs | 112 +++++++++++++++++++++++
 14 files changed, 344 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/rust/compile/issue-3726.rs
 create mode 100644 gcc/testsuite/rust/compile/issue-3898.rs

diff --git a/gcc/rust/ast/rust-ast-fragment.cc 
b/gcc/rust/ast/rust-ast-fragment.cc
index 056fcac55b0..8a547b4bcb5 100644
--- a/gcc/rust/ast/rust-ast-fragment.cc
+++ b/gcc/rust/ast/rust-ast-fragment.cc
@@ -116,6 +116,12 @@ Fragment::is_type_fragment () const
   return is_single_fragment_of_kind (SingleASTNode::Kind::Type);
 }
 
+bool
+Fragment::is_pattern_fragment () const
+{
+  return is_single_fragment_of_kind (SingleASTNode::Kind::Pattern);
+}
+
 std::unique_ptr<Expr>
 Fragment::take_expression_fragment ()
 {
@@ -130,6 +136,13 @@ Fragment::take_type_fragment ()
   return nodes[0].take_type ();
 }
 
+std::unique_ptr<Pattern>
+Fragment::take_pattern_fragment ()
+{
+  assert_single_fragment (SingleASTNode::Kind::Pattern);
+  return nodes[0].take_pattern ();
+}
+
 void
 Fragment::accept_vis (ASTVisitor &vis)
 {
@@ -159,6 +172,7 @@ Fragment::assert_single_fragment (SingleASTNode::Kind 
expected) const
     {SingleASTNode::Kind::Expr, "expr"},
     {SingleASTNode::Kind::Stmt, "stmt"},
     {SingleASTNode::Kind::Extern, "extern"},
+    {SingleASTNode::Kind::Pattern, "pattern"},
   };
 
   auto actual = nodes[0].get_kind ();
diff --git a/gcc/rust/ast/rust-ast-fragment.h b/gcc/rust/ast/rust-ast-fragment.h
index d7584d0697c..23f26d3e616 100644
--- a/gcc/rust/ast/rust-ast-fragment.h
+++ b/gcc/rust/ast/rust-ast-fragment.h
@@ -86,9 +86,11 @@ public:
 
   bool is_expression_fragment () const;
   bool is_type_fragment () const;
+  bool is_pattern_fragment () const;
 
   std::unique_ptr<Expr> take_expression_fragment ();
   std::unique_ptr<Type> take_type_fragment ();
+  std::unique_ptr<Pattern> take_pattern_fragment ();
 
   void accept_vis (ASTVisitor &vis);
 
diff --git a/gcc/rust/ast/rust-ast.cc b/gcc/rust/ast/rust-ast.cc
index c9b6fcef9d6..7feb7a688f7 100644
--- a/gcc/rust/ast/rust-ast.cc
+++ b/gcc/rust/ast/rust-ast.cc
@@ -71,6 +71,10 @@ SingleASTNode::SingleASTNode (SingleASTNode const &other)
     case Kind::Type:
       type = other.type->clone_type ();
       break;
+
+    case Kind::Pattern:
+      pattern = other.pattern->clone_pattern ();
+      break;
     }
 }
 
@@ -103,6 +107,10 @@ SingleASTNode::operator= (SingleASTNode const &other)
     case Kind::Type:
       type = other.type->clone_type ();
       break;
+
+    case Kind::Pattern:
+      pattern = other.pattern->clone_pattern ();
+      break;
     }
   return *this;
 }
@@ -135,6 +143,10 @@ SingleASTNode::accept_vis (ASTVisitor &vis)
     case Kind::Type:
       type->accept_vis (vis);
       break;
+
+    case Kind::Pattern:
+      pattern->accept_vis (vis);
+      break;
     }
 }
 
@@ -155,6 +167,8 @@ SingleASTNode::is_error ()
       return assoc_item == nullptr;
     case Kind::Type:
       return type == nullptr;
+    case Kind::Pattern:
+      return pattern == nullptr;
     }
 
   rust_unreachable ();
@@ -178,6 +192,8 @@ SingleASTNode::as_string () const
       return "Associated Item: " + assoc_item->as_string ();
     case Kind::Type:
       return "Type: " + type->as_string ();
+    case Kind::Pattern:
+      return "Pattern: " + pattern->as_string ();
     }
 
   rust_unreachable ();
diff --git a/gcc/rust/ast/rust-ast.h b/gcc/rust/ast/rust-ast.h
index 148e297c543..a7e62965201 100644
--- a/gcc/rust/ast/rust-ast.h
+++ b/gcc/rust/ast/rust-ast.h
@@ -1970,6 +1970,7 @@ public:
     Extern,
     Assoc,
     Type,
+    Pattern,
   };
 
 private:
@@ -1982,6 +1983,7 @@ private:
   std::unique_ptr<ExternalItem> external_item;
   std::unique_ptr<AssociatedItem> assoc_item;
   std::unique_ptr<Type> type;
+  std::unique_ptr<Pattern> pattern;
 
 public:
   SingleASTNode (std::unique_ptr<Expr> expr)
@@ -2008,6 +2010,10 @@ public:
     : kind (Kind::Type), type (std::move (type))
   {}
 
+  SingleASTNode (std::unique_ptr<Pattern> pattern)
+    : kind (Kind::Pattern), pattern (std::move (pattern))
+  {}
+
   SingleASTNode (SingleASTNode const &other);
 
   SingleASTNode operator= (SingleASTNode const &other);
@@ -2076,6 +2082,12 @@ public:
     return std::move (type);
   }
 
+  std::unique_ptr<Pattern> take_pattern ()
+  {
+    rust_assert (!is_error ());
+    return std::move (pattern);
+  }
+
   void accept_vis (ASTVisitor &vis) override;
 
   bool is_error ();
diff --git a/gcc/rust/ast/rust-expr.h b/gcc/rust/ast/rust-expr.h
index ee3919a9a91..94d9ba13f1d 100644
--- a/gcc/rust/ast/rust-expr.h
+++ b/gcc/rust/ast/rust-expr.h
@@ -2492,6 +2492,12 @@ public:
     return *pattern;
   }
 
+  std::unique_ptr<Pattern> &get_pattern_ptr ()
+  {
+    rust_assert (pattern != nullptr);
+    return pattern;
+  }
+
   Type &get_type ()
   {
     rust_assert (has_type_given ());
diff --git a/gcc/rust/ast/rust-item.h b/gcc/rust/ast/rust-item.h
index 375f66ddcc1..3fd49f4b199 100644
--- a/gcc/rust/ast/rust-item.h
+++ b/gcc/rust/ast/rust-item.h
@@ -631,6 +631,12 @@ public:
     return *param_name;
   }
 
+  std::unique_ptr<Pattern> &get_pattern_ptr ()
+  {
+    rust_assert (param_name != nullptr);
+    return param_name;
+  }
+
   const Pattern &get_pattern () const
   {
     rust_assert (param_name != nullptr);
@@ -714,6 +720,12 @@ public:
     return *param_name;
   }
 
+  std::unique_ptr<Pattern> &get_pattern_ptr ()
+  {
+    rust_assert (param_name != nullptr);
+    return param_name;
+  }
+
   bool has_name () const { return param_name != nullptr; }
 
   // TODO: is this better? Or is a "vis_block" better?
diff --git a/gcc/rust/ast/rust-pattern.h b/gcc/rust/ast/rust-pattern.h
index 4948159f537..029a5b36c6a 100644
--- a/gcc/rust/ast/rust-pattern.h
+++ b/gcc/rust/ast/rust-pattern.h
@@ -1509,6 +1509,12 @@ public:
     return *pattern_in_parens;
   }
 
+  std::unique_ptr<Pattern> &get_pattern_in_parens_ptr ()
+  {
+    rust_assert (pattern_in_parens != nullptr);
+    return pattern_in_parens;
+  }
+
   NodeId get_node_id () const override { return node_id; }
 
   Pattern::Kind get_pattern_kind () override { return Pattern::Kind::Grouped; }
diff --git a/gcc/rust/ast/rust-stmt.h b/gcc/rust/ast/rust-stmt.h
index f843a79b3f9..5fb00ef1805 100644
--- a/gcc/rust/ast/rust-stmt.h
+++ b/gcc/rust/ast/rust-stmt.h
@@ -201,6 +201,12 @@ public:
     return *variables_pattern;
   }
 
+  std::unique_ptr<Pattern> &get_pattern_ptr ()
+  {
+    rust_assert (variables_pattern != nullptr);
+    return variables_pattern;
+  }
+
   Type &get_type ()
   {
     rust_assert (has_type ());
diff --git a/gcc/rust/expand/rust-expand-visitor.cc 
b/gcc/rust/expand/rust-expand-visitor.cc
index a53f0640109..c1833c276c5 100644
--- a/gcc/rust/expand/rust-expand-visitor.cc
+++ b/gcc/rust/expand/rust-expand-visitor.cc
@@ -18,6 +18,7 @@
 
 #include "rust-expand-visitor.h"
 #include "rust-ast-fragment.h"
+#include "rust-item.h"
 #include "rust-proc-macro.h"
 #include "rust-attributes.h"
 #include "rust-ast.h"
@@ -62,7 +63,7 @@ derive_item (AST::Item &item, AST::SimplePath &to_derive,
        {
          switch (node.get_kind ())
            {
-           case AST::SingleASTNode::ITEM:
+           case AST::SingleASTNode::Kind::Item:
              result.push_back (node.take_item ());
              break;
            default:
@@ -85,7 +86,7 @@ expand_item_attribute (AST::Item &item, AST::SimplePath &name,
        {
          switch (node.get_kind ())
            {
-           case AST::SingleASTNode::ITEM:
+           case AST::SingleASTNode::Kind::Item:
              result.push_back (node.take_item ());
              break;
            default:
@@ -114,7 +115,7 @@ expand_stmt_attribute (T &statement, AST::SimplePath 
&attribute,
        {
          switch (node.get_kind ())
            {
-           case AST::SingleASTNode::STMT:
+           case AST::SingleASTNode::Kind::Stmt:
              result.push_back (node.take_stmt ());
              break;
            default:
@@ -380,6 +381,23 @@ ExpandVisitor::maybe_expand_type 
(std::unique_ptr<AST::TypeNoBounds> &type)
       final_fragment.take_type_fragment (), BUILTINS_LOCATION);
 }
 
+void
+ExpandVisitor::maybe_expand_pattern (std::unique_ptr<AST::Pattern> &pattern)
+{
+  NodeId old_expect = pattern->get_node_id ();
+  std::swap (macro_invoc_expect_id, old_expect);
+
+  expander.push_context (MacroExpander::ContextType::PATTERN);
+  pattern->accept_vis (*this);
+  expander.pop_context ();
+
+  std::swap (macro_invoc_expect_id, old_expect);
+
+  auto final_fragment = expander.take_expanded_fragment ();
+  if (final_fragment.should_expand () && final_fragment.is_pattern_fragment ())
+    pattern = final_fragment.take_pattern_fragment ();
+}
+
 // FIXME: Can this be refactored into a `scoped` method? Which takes a
 // ContextType as parameter and a lambda? And maybe just an std::vector<T>&?
 void
@@ -452,6 +470,8 @@ ExpandVisitor::expand_closure_params 
(std::vector<AST::ClosureParam> &params)
 {
   for (auto &param : params)
     {
+      maybe_expand_pattern (param.get_pattern_ptr ());
+
       if (param.has_type_given ())
        maybe_expand_type (param.get_type_ptr ());
     }
@@ -729,7 +749,7 @@ ExpandVisitor::visit (AST::MatchExpr &expr)
       auto &arm = match_case.get_arm ();
 
       for (auto &pattern : arm.get_patterns ())
-       visit (pattern);
+       maybe_expand_pattern (pattern);
 
       if (arm.has_match_arm_guard ())
        maybe_expand_expr (arm.get_guard_expr_ptr ());
@@ -738,6 +758,13 @@ ExpandVisitor::visit (AST::MatchExpr &expr)
     }
 }
 
+void
+ExpandVisitor::visit (AST::TupleExpr &expr)
+{
+  for (auto &sub : expr.get_tuple_elems ())
+    maybe_expand_expr (sub);
+}
+
 void
 ExpandVisitor::visit (AST::TypeParam &param)
 {
@@ -1013,13 +1040,70 @@ ExpandVisitor::visit (AST::StructPatternFieldIdent 
&field)
 void
 ExpandVisitor::visit (AST::GroupedPattern &pattern)
 {
-  visit (pattern.get_pattern_in_parens ());
+  maybe_expand_pattern (pattern.get_pattern_in_parens_ptr ());
+}
+
+void
+ExpandVisitor::visit (AST::SlicePatternItemsNoRest &items)
+{
+  for (auto &sub : items.get_patterns ())
+    maybe_expand_pattern (sub);
+}
+
+void
+ExpandVisitor::visit (AST::SlicePatternItemsHasRest &items)
+{
+  for (auto &sub : items.get_lower_patterns ())
+    maybe_expand_pattern (sub);
+  for (auto &sub : items.get_upper_patterns ())
+    maybe_expand_pattern (sub);
+}
+
+void
+ExpandVisitor::visit (AST::AltPattern &pattern)
+{
+  for (auto &alt : pattern.get_alts ())
+    maybe_expand_pattern (alt);
+}
+
+void
+ExpandVisitor::visit (AST::TupleStructItemsNoRange &tuple_items)
+{
+  for (auto &sub : tuple_items.get_patterns ())
+    maybe_expand_pattern (sub);
+}
+
+void
+ExpandVisitor::visit (AST::TupleStructItemsRange &tuple_items)
+{
+  for (auto &sub : tuple_items.get_lower_patterns ())
+    maybe_expand_pattern (sub);
+
+  for (auto &sub : tuple_items.get_upper_patterns ())
+    maybe_expand_pattern (sub);
+}
+
+void
+ExpandVisitor::visit (AST::TuplePatternItemsMultiple &tuple_items)
+{
+  for (auto &sub : tuple_items.get_patterns ())
+    maybe_expand_pattern (sub);
+}
+
+void
+ExpandVisitor::visit (AST::TuplePatternItemsRanged &tuple_items)
+{
+  for (auto &sub : tuple_items.get_lower_patterns ())
+    maybe_expand_pattern (sub);
+
+  for (auto &sub : tuple_items.get_upper_patterns ())
+    maybe_expand_pattern (sub);
 }
 
 void
 ExpandVisitor::visit (AST::LetStmt &stmt)
 {
-  visit (stmt.get_pattern ());
+  maybe_expand_pattern (stmt.get_pattern_ptr ());
 
   if (stmt.has_type ())
     maybe_expand_type (stmt.get_type_ptr ());
@@ -1049,9 +1133,17 @@ ExpandVisitor::visit (AST::BareFunctionType &type)
 void
 ExpandVisitor::visit (AST::FunctionParam &param)
 {
+  maybe_expand_pattern (param.get_pattern_ptr ());
   maybe_expand_type (param.get_type_ptr ());
 }
 
+void
+ExpandVisitor::visit (AST::VariadicParam &param)
+{
+  if (param.has_pattern ())
+    maybe_expand_pattern (param.get_pattern_ptr ());
+}
+
 void
 ExpandVisitor::visit (AST::SelfParam &param)
 {
diff --git a/gcc/rust/expand/rust-expand-visitor.h 
b/gcc/rust/expand/rust-expand-visitor.h
index 01a0d1cb297..8fee291d595 100644
--- a/gcc/rust/expand/rust-expand-visitor.h
+++ b/gcc/rust/expand/rust-expand-visitor.h
@@ -20,6 +20,7 @@
 #define RUST_EXPAND_VISITOR_H
 
 #include "rust-ast-visitor.h"
+#include "rust-item.h"
 #include "rust-macro-expand.h"
 #include "rust-proc-macro.h"
 
@@ -47,18 +48,15 @@ public:
 
   using AST::DefaultASTVisitor::visit;
 
-  /*
-     Maybe expand a macro invocation in lieu of an expression
-     expr : Core guidelines R33, this function reseat the pointer.
-  */
-  void maybe_expand_expr (std::unique_ptr<AST::Expr> &expr);
-
-  /*
-     Maybe expand a macro invocation in lieu of a type
-     type : Core guidelines R33, this function reseat the pointer.
+  /**
+   * Maybe expand a macro invocation in lieu of an expression, type or pattern.
+   *
+   * @ptr Core guidelines R33, this function reseats the pointer.
    */
-  void maybe_expand_type (std::unique_ptr<AST::Type> &type);
+  void maybe_expand_expr (std::unique_ptr<AST::Expr> &ptr);
+  void maybe_expand_type (std::unique_ptr<AST::Type> &ptr);
   void maybe_expand_type (std::unique_ptr<AST::TypeNoBounds> &type);
+  void maybe_expand_pattern (std::unique_ptr<AST::Pattern> &ptr);
 
   /**
    * Expand all macro invocations in lieu of types within a vector of struct
@@ -239,6 +237,7 @@ public:
   void visit (AST::IfLetExpr &expr) override;
   void visit (AST::IfLetExprConseqElse &expr) override;
   void visit (AST::MatchExpr &expr) override;
+  void visit (AST::TupleExpr &expr) override;
   void visit (AST::TypeParam &param) override;
   void visit (AST::LifetimeWhereClauseItem &) override;
   void visit (AST::TypeBoundWhereClauseItem &item) override;
@@ -276,12 +275,20 @@ public:
   void visit (AST::MetaListNameValueStr &) override;
   void visit (AST::StructPatternFieldIdent &field) override;
   void visit (AST::GroupedPattern &pattern) override;
+  void visit (AST::SlicePatternItemsNoRest &items) override;
+  void visit (AST::SlicePatternItemsHasRest &items) override;
+  void visit (AST::AltPattern &pattern) override;
+  void visit (AST::TupleStructItemsNoRange &tuple_items) override;
+  void visit (AST::TupleStructItemsRange &tuple_items) override;
+  void visit (AST::TuplePatternItemsMultiple &tuple_items) override;
+  void visit (AST::TuplePatternItemsRanged &tuple_items) override;
 
   void visit (AST::LetStmt &stmt) override;
   void visit (AST::ExprStmt &stmt) override;
 
   void visit (AST::BareFunctionType &type) override;
-  void visit (AST::FunctionParam &type) override;
+  void visit (AST::FunctionParam &param) override;
+  void visit (AST::VariadicParam &param) override;
   void visit (AST::SelfParam &type) override;
 
   template <typename T>
diff --git a/gcc/rust/expand/rust-macro-expand.cc 
b/gcc/rust/expand/rust-macro-expand.cc
index dfead3acc1d..edaf8910d06 100644
--- a/gcc/rust/expand/rust-macro-expand.cc
+++ b/gcc/rust/expand/rust-macro-expand.cc
@@ -1000,6 +1000,27 @@ transcribe_type (Parser<MacroInvocLexer> &parser)
   return AST::Fragment ({std::move (type)}, lexer.get_token_slice (start, 
end));
 }
 
+/**
+ * Transcribe one pattern from a macro invocation
+ *
+ * @param parser Parser to extract statements from
+ */
+static AST::Fragment
+transcribe_pattern (Parser<MacroInvocLexer> &parser)
+{
+  auto &lexer = parser.get_token_source ();
+  auto start = lexer.get_offs ();
+
+  auto pattern = parser.parse_pattern ();
+  for (auto err : parser.get_errors ())
+    err.emit ();
+
+  auto end = lexer.get_offs ();
+
+  return AST::Fragment ({std::move (pattern)},
+                       lexer.get_token_slice (start, end));
+}
+
 static AST::Fragment
 transcribe_context (MacroExpander::ContextType ctx,
                    Parser<MacroInvocLexer> &parser, bool semicolon,
@@ -1012,6 +1033,7 @@ transcribe_context (MacroExpander::ContextType ctx,
   //     -- Trait --> parser.parse_trait_item();
   //     -- Impl --> parser.parse_impl_item();
   //     -- Extern --> parser.parse_extern_item();
+  //     -- Pattern --> parser.parse_pattern();
   //     -- None --> [has semicolon?]
   //                 -- Yes --> parser.parse_stmt();
   //                 -- No --> [switch invocation.delimiter()]
@@ -1040,6 +1062,8 @@ transcribe_context (MacroExpander::ContextType ctx,
       break;
     case MacroExpander::ContextType::TYPE:
       return transcribe_type (parser);
+    case MacroExpander::ContextType::PATTERN:
+      return transcribe_pattern (parser);
       break;
     case MacroExpander::ContextType::STMT:
       return transcribe_many_stmts (parser, last_token_id, semicolon);
diff --git a/gcc/rust/expand/rust-macro-expand.h 
b/gcc/rust/expand/rust-macro-expand.h
index 360294c6bf9..901583f5d8a 100644
--- a/gcc/rust/expand/rust-macro-expand.h
+++ b/gcc/rust/expand/rust-macro-expand.h
@@ -291,6 +291,7 @@ struct MacroExpander
     TRAIT,
     IMPL,
     TRAIT_IMPL,
+    PATTERN,
   };
 
   ExpansionCfg cfg;
diff --git a/gcc/testsuite/rust/compile/issue-3726.rs 
b/gcc/testsuite/rust/compile/issue-3726.rs
new file mode 100644
index 00000000000..ced87a57457
--- /dev/null
+++ b/gcc/testsuite/rust/compile/issue-3726.rs
@@ -0,0 +1,17 @@
+pub enum TypeCtor {
+    Slice,
+    Array,
+}
+pub struct ApplicationTy(TypeCtor);
+
+macro_rules! ty_app {
+    ($ctor:pat) => {
+        ApplicationTy($ctor)
+    };
+}
+
+pub fn foo(ty: ApplicationTy) {
+    match ty {
+        ty_app!(TypeCtor::Array) => {}
+    }
+}
diff --git a/gcc/testsuite/rust/compile/issue-3898.rs 
b/gcc/testsuite/rust/compile/issue-3898.rs
new file mode 100644
index 00000000000..114370cf881
--- /dev/null
+++ b/gcc/testsuite/rust/compile/issue-3898.rs
@@ -0,0 +1,112 @@
+// { dg-additional-options "-frust-compile-until=lowering" }
+
+#[lang = "sized"]
+trait Sized {}
+
+enum Result<T, E> {
+    Ok(T),
+    Err(E),
+}
+
+use Result::{Err, Ok};
+
+struct Utf8Error;
+
+const CONT_MASK: u8 = 15;
+const TAG_CONT_U8: u8 = 15;
+
+#[inline(always)]
+pub fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
+    let mut index = 0;
+    let len = 64;
+
+    let usize_bytes = 8;
+    let ascii_block_size = 2 * usize_bytes;
+    let blocks_end = if len >= ascii_block_size {
+        len - ascii_block_size + 1
+    } else {
+        0
+    };
+
+    while index < len {
+        let old_offset = index;
+        macro_rules! err {
+            ($error_len: expr) => {
+                return Err(Utf8Error)
+            };
+        }
+
+        macro_rules! next {
+            () => {{
+                index += 1;
+                // we needed data, but there was none: error!
+                if index >= len {
+                    err!(None)
+                }
+                v[index]
+            }};
+        }
+
+        let first = v[index];
+        if first >= 128 {
+            let w = 15;
+            // 2-byte encoding is for codepoints  \u{0080} to  \u{07ff}
+            //        first  C2 80        last DF BF
+            // 3-byte encoding is for codepoints  \u{0800} to  \u{ffff}
+            //        first  E0 A0 80     last EF BF BF
+            //   excluding surrogates codepoints  \u{d800} to  \u{dfff}
+            //               ED A0 80 to       ED BF BF
+            // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
+            //        first  F0 90 80 80  last F4 8F BF BF
+            //
+            // Use the UTF-8 syntax from the RFC
+            //
+            // https://tools.ietf.org/html/rfc3629
+            // UTF8-1      = %x00-7F
+            // UTF8-2      = %xC2-DF UTF8-tail
+            // UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
+            //               %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
+            // UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( 
UTF8-tail ) /
+            //               %xF4 %x80-8F 2( UTF8-tail )
+            match w {
+                2 => {
+                    if next!() & !CONT_MASK != TAG_CONT_U8 {
+                        err!(Some(1))
+                    }
+                }
+                3 => {
+                    match (first, next!()) {
+                        (0xE0, 0xA0..=0xBF)
+                        | (0xE1..=0xEC, 0x80..=0xBF)
+                        | (0xED, 0x80..=0x9F)
+                        | (0xEE..=0xEF, 0x80..=0xBF) => {}
+                        _ => err!(Some(1)),
+                    }
+                    if next!() & !CONT_MASK != TAG_CONT_U8 {
+                        err!(Some(2))
+                    }
+                }
+                4 => {
+                    match (first, next!()) {
+                        (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | 
(0xF4, 0x80..=0x8F) => {}
+                        _ => err!(Some(1)),
+                    }
+                    if next!() & !CONT_MASK != TAG_CONT_U8 {
+                        err!(Some(2))
+                    }
+                    if next!() & !CONT_MASK != TAG_CONT_U8 {
+                        err!(Some(3))
+                    }
+                }
+                _ => err!(Some(1)),
+            }
+            index += 1;
+        } else {
+            index += 1;
+        }
+    }
+
+    Ok(())
+}
+
+fn main() {}
-- 
2.50.1

Reply via email to