This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo-access.git


The following commit(s) were added to refs/heads/main by this push:
     new bb66979  reimplements java parser and evaluator (#7)
bb66979 is described below

commit bb66979416e605f2357cba47d2f1df9c10a3144b
Author: Keith Turner <ktur...@apache.org>
AuthorDate: Wed Sep 20 16:06:19 2023 -0400

    reimplements java parser and evaluator (#7)
    
    Re-implementation of the parser and parse tree that's easier to understand 
and more closely aligned with the grammar for the language.  This new 
implementations performance is similar to the old implementation. See the 
comments on #7 for more details.
---
 .../accumulo/access/AccessEvaluatorImpl.java       |  43 +-
 .../accumulo/access/AccessExpressionImpl.java      | 513 +--------------------
 .../java/org/apache/accumulo/access/AeNode.java    | 280 +++++++++++
 .../java/org/apache/accumulo/access/Parser.java    |  77 ++++
 .../java/org/apache/accumulo/access/Tokenizer.java | 133 ++++++
 .../accumulo/access/AccessExpressionImplTest.java  | 112 -----
 .../accumulo/access/AccessExpressionTest.java      |   6 +
 7 files changed, 510 insertions(+), 654 deletions(-)

diff --git a/src/main/java/org/apache/accumulo/access/AccessEvaluatorImpl.java 
b/src/main/java/org/apache/accumulo/access/AccessEvaluatorImpl.java
index ced77c4..f4851de 100644
--- a/src/main/java/org/apache/accumulo/access/AccessEvaluatorImpl.java
+++ b/src/main/java/org/apache/accumulo/access/AccessEvaluatorImpl.java
@@ -145,48 +145,11 @@ class AccessEvaluatorImpl implements AccessEvaluator {
     }
   }
 
-  public boolean evaluate(AccessExpressionImpl visibility) throws 
IllegalAccessExpressionException {
+  public boolean evaluate(AccessExpressionImpl accessExpression)
+      throws IllegalAccessExpressionException {
     // The VisibilityEvaluator computes a trie from the given Authorizations, 
that ColumnVisibility
     // expressions can be evaluated against.
-    return authorizedPredicates.stream()
-        .allMatch(ap -> evaluate(ap, visibility.getExpressionBytes(), 
visibility.getParseTree()));
-  }
-
-  private static boolean evaluate(Predicate<BytesWrapper> authorizedPredicate,
-      final byte[] expression, final AccessExpressionImpl.Node root)
-      throws IllegalAccessExpressionException {
-    if (expression.length == 0) {
-      return true;
-    }
-    switch (root.type) {
-      case TERM:
-        return authorizedPredicate.test(root.getTerm(expression));
-      case AND:
-        if (root.children == null || root.children.size() < 2) {
-          throw new IllegalAccessExpressionException("AND has less than 2 
children",
-              root.getTerm(expression).toString(), root.start);
-        }
-        for (AccessExpressionImpl.Node child : root.children) {
-          if (!evaluate(authorizedPredicate, expression, child)) {
-            return false;
-          }
-        }
-        return true;
-      case OR:
-        if (root.children == null || root.children.size() < 2) {
-          throw new IllegalAccessExpressionException("OR has less than 2 
children",
-              root.getTerm(expression).toString(), root.start);
-        }
-        for (AccessExpressionImpl.Node child : root.children) {
-          if (evaluate(authorizedPredicate, expression, child)) {
-            return true;
-          }
-        }
-        return false;
-      default:
-        throw new IllegalAccessExpressionException("No such node type",
-            root.getTerm(expression).toString(), root.start);
-    }
+    return 
authorizedPredicates.stream().allMatch(accessExpression.aeNode::canAccess);
   }
 
   private static class BuilderImpl
diff --git a/src/main/java/org/apache/accumulo/access/AccessExpressionImpl.java 
b/src/main/java/org/apache/accumulo/access/AccessExpressionImpl.java
index 5941316..514ed30 100644
--- a/src/main/java/org/apache/accumulo/access/AccessExpressionImpl.java
+++ b/src/main/java/org/apache/accumulo/access/AccessExpressionImpl.java
@@ -20,59 +20,15 @@ package org.apache.accumulo.access;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 
-import java.io.Serializable;
-import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
 import java.util.HashSet;
-import java.util.List;
-import java.util.TreeSet;
 import java.util.concurrent.atomic.AtomicReference;
 
-/**
- * Validate the column visibility is a valid expression and set the visibility 
for a Mutation. See
- * {@link AccessExpressionImpl#AccessExpressionImpl(byte[])} for the 
definition of an expression.
- *
- * <p>
- * The expression is a sequence of characters from the set [A-Za-z0-9_-.] 
along with the binary
- * operators "&amp;" and "|" indicating that both operands are necessary, or 
the either is
- * necessary. The following are valid expressions for visibility:
- *
- * <pre>
- * A
- * A|B
- * (A|B)&amp;(C|D)
- * orange|(red&amp;yellow)
- * </pre>
- *
- * <p>
- * The following are not valid expressions for visibility:
- *
- * <pre>
- * A|B&amp;C
- * A=B
- * A|B|
- * A&amp;|B
- * ()
- * )
- * dog|!cat
- * </pre>
- *
- * <p>
- * In addition to the base set of visibilities, any character can be used in 
the expression if it is
- * quoted. If the quoted term contains '&quot;' or '\', then escape the 
character with '\'. The
- * {@link #quote(String)} method can be used to properly quote and escape 
terms automatically. The
- * following is an example of a quoted term:
- *
- * <pre>
- * &quot;A#C&quot; &amp; B
- * </pre>
- */
 class AccessExpressionImpl implements AccessExpression {
 
-  Node node = null;
-  private byte[] expression;
+  private final byte[] expression;
+
+  final AeNode aeNode;
 
   private final AtomicReference<String> expressionString = new 
AtomicReference<>(null);
 
@@ -86,414 +42,24 @@ class AccessExpressionImpl implements AccessExpression {
     return expressionString.updateAndGet(es -> es == null ? new 
String(expression, UTF_8) : es);
   }
 
-  byte[] getExpressionBytes() {
-    return expression;
-  }
-
-  /**
-   * The node types in a parse tree for a visibility expression.
-   */
-  enum NodeType {
-    EMPTY, TERM, OR, AND,
-  }
-
-  /**
-   * All empty nodes are equal and represent the same value.
-   */
-  private static final Node EMPTY_NODE = new Node(NodeType.EMPTY, 0);
-
   // must create this after creating EMPTY_NODE
   static final AccessExpression EMPTY = new AccessExpressionImpl("");
 
-  /**
-   * A node in the parse tree for a visibility expression.
-   */
-  static class Node {
-    /**
-     * An empty list of nodes.
-     */
-    public static final List<Node> EMPTY = Collections.emptyList();
-    NodeType type;
-    int start;
-    int end;
-    List<Node> children = EMPTY;
-
-    public Node(NodeType type, int start) {
-      this.type = type;
-      this.start = start;
-      this.end = start + 1;
-    }
-
-    public Node(int start, int end) {
-      this.type = NodeType.TERM;
-      this.start = start;
-      this.end = end;
-    }
-
-    public void add(Node child) {
-      if (children == EMPTY) {
-        children = new ArrayList<>();
-      }
-
-      children.add(child);
-    }
-
-    public NodeType getType() {
-      return type;
-    }
-
-    public List<Node> getChildren() {
-      return children;
-    }
-
-    public BytesWrapper getTerm(byte[] expression) {
-      if (type != NodeType.TERM) {
-        throw new IllegalStateException();
-      }
-
-      if (expression[start] == '"') {
-        // its a quoted term
-        int qStart = start + 1;
-        int qEnd = end - 1;
-
-        return new BytesWrapper(expression, qStart, qEnd - qStart);
-      }
-      return new BytesWrapper(expression, start, end - start);
-    }
-  }
-
-  /**
-   * A node comparator. Nodes sort according to node type, terms sort 
lexicographically. AND and OR
-   * nodes sort by number of children, or if the same by corresponding 
children.
-   */
-  static class NodeComparator implements Comparator<Node>, Serializable {
-
-    private static final long serialVersionUID = 1L;
-    byte[] text;
-
-    /**
-     * Creates a new comparator.
-     *
-     * @param text expression string, encoded in UTF-8
-     */
-    public NodeComparator(byte[] text) {
-      this.text = text;
-    }
-
-    @Override
-    public int compare(Node a, Node b) {
-      int diff = a.type.ordinal() - b.type.ordinal();
-      if (diff != 0) {
-        return diff;
-      }
-      switch (a.type) {
-        case EMPTY:
-          return 0; // All empty nodes are the same
-        case TERM:
-          return Arrays.compare(text, a.start, a.end, text, b.start, b.end);
-        case OR:
-        case AND:
-          diff = a.children.size() - b.children.size();
-          if (diff != 0) {
-            return diff;
-          }
-          for (int i = 0; i < a.children.size(); i++) {
-            diff = compare(a.children.get(i), b.children.get(i));
-            if (diff != 0) {
-              return diff;
-            }
-          }
-      }
-      return 0;
-    }
-  }
-
-  /*
-   * Convenience method that delegates to normalize with a new NodeComparator 
constructed using the
-   * supplied expression.
-   */
-  private static Node normalize(Node root, byte[] expression) {
-    return normalize(root, new NodeComparator(expression));
-  }
-
-  // @formatter:off
-    /*
-     * Walks an expression's AST in order to:
-     *  1) roll up expressions with the same operant (`a&(b&c) becomes a&b&c`)
-     *  2) sort labels lexicographically (permutations of `a&b&c` are 
re-ordered to appear as `a&b&c`)
-     *  3) dedupes labels (`a&b&a` becomes `a&b`)
-     */
-    // @formatter:on
-  private static Node normalize(Node root, NodeComparator comparator) {
-    if (root.type != NodeType.TERM) {
-      TreeSet<Node> rolledUp = new TreeSet<>(comparator);
-      java.util.Iterator<Node> itr = root.children.iterator();
-      while (itr.hasNext()) {
-        Node c = normalize(itr.next(), comparator);
-        if (c.type == root.type) {
-          rolledUp.addAll(c.children);
-          itr.remove();
-        }
-      }
-      rolledUp.addAll(root.children);
-      root.children.clear();
-      root.children.addAll(rolledUp);
-
-      // need to promote a child if it's an only child
-      if (root.children.size() == 1) {
-        return root.children.get(0);
-      }
-    }
-
-    return root;
-  }
-
-  /*
-   * Walks an expression's AST and appends a string representation to a 
supplied StringBuilder. This
-   * method adds parens where necessary.
-   */
-  private static void stringify(Node root, byte[] expression, StringBuilder 
out) {
-    if (root.type == NodeType.TERM) {
-      out.append(new String(expression, root.start, root.end - root.start, 
UTF_8));
-    } else {
-      String sep = "";
-      for (Node c : root.children) {
-        out.append(sep);
-        boolean parens = (c.type != NodeType.TERM && root.type != c.type);
-        if (parens) {
-          out.append("(");
-        }
-        stringify(c, expression, out);
-        if (parens) {
-          out.append(")");
-        }
-        sep = root.type == NodeType.AND ? "&" : "|";
-      }
-    }
-  }
-
   @Override
   public String normalize() {
-    Node normRoot = normalize(node, expression);
-    StringBuilder builder = new StringBuilder(expression.length);
-    stringify(normRoot, expression, builder);
+    // TODO pass a string builder
+    StringBuilder builder = new StringBuilder();
+    aeNode.normalize().stringify(builder, false);
     return builder.toString();
   }
 
   @Override
   public Authorizations getAuthorizations() {
     HashSet<String> auths = new HashSet<>();
-    findAuths(node, expression, auths);
+    aeNode.getAuthorizations(auths::add);
     return Authorizations.of(auths);
   }
 
-  private void findAuths(Node node, byte[] expression, HashSet<String> auths) {
-    switch (node.getType()) {
-      case AND:
-      case OR:
-        for (Node child : node.getChildren()) {
-          findAuths(child, expression, auths);
-        }
-        break;
-      case TERM:
-        auths.add(node.getTerm(expression).toString());
-        break;
-      case EMPTY:
-        break;
-      default:
-        throw new IllegalArgumentException("Unknown node type " + 
node.getType());
-    }
-  }
-
-  private static class ColumnVisibilityParser {
-    private int index = 0;
-    private int parens = 0;
-
-    public ColumnVisibilityParser() {}
-
-    Node parse(byte[] expression) {
-      if (expression.length > 0) {
-        Node node = parse_(expression);
-        if (node == null) {
-          throw new IllegalAccessExpressionException("operator or missing 
parens",
-              new String(expression, UTF_8), index - 1);
-        }
-        if (parens != 0) {
-          throw new IllegalAccessExpressionException("parenthesis mis-match",
-              new String(expression, UTF_8), index - 1);
-        }
-        return node;
-      }
-      return null;
-    }
-
-    Node processTerm(int start, int end, Node expr, byte[] expression) {
-      if (start != end) {
-        if (expr != null) {
-          throw new IllegalAccessExpressionException("expression needs | or &",
-              new String(expression, UTF_8), start);
-        }
-        return new Node(start, end);
-      }
-      if (expr == null) {
-        throw new IllegalAccessExpressionException("empty term", new 
String(expression, UTF_8),
-            start);
-      }
-      return expr;
-    }
-
-    Node parse_(byte[] expression) {
-      Node result = null;
-      Node expr = null;
-      int wholeTermStart = index;
-      int subtermStart = index;
-      boolean subtermComplete = false;
-
-      while (index < expression.length) {
-        switch (expression[index++]) {
-          case '&':
-            expr = processTerm(subtermStart, index - 1, expr, expression);
-            if (result != null) {
-              if (!result.type.equals(NodeType.AND)) {
-                throw new IllegalAccessExpressionException("cannot mix & and 
|",
-                    new String(expression, UTF_8), index - 1);
-              }
-            } else {
-              result = new Node(NodeType.AND, wholeTermStart);
-            }
-            result.add(expr);
-            expr = null;
-            subtermStart = index;
-            subtermComplete = false;
-            break;
-          case '|':
-            expr = processTerm(subtermStart, index - 1, expr, expression);
-            if (result != null) {
-              if (!result.type.equals(NodeType.OR)) {
-                throw new IllegalAccessExpressionException("cannot mix | and 
&",
-                    new String(expression, UTF_8), index - 1);
-              }
-            } else {
-              result = new Node(NodeType.OR, wholeTermStart);
-            }
-            result.add(expr);
-            expr = null;
-            subtermStart = index;
-            subtermComplete = false;
-            break;
-          case '(':
-            parens++;
-            if (subtermStart != index - 1 || expr != null) {
-              throw new IllegalAccessExpressionException("expression needs & 
or |",
-                  new String(expression, UTF_8), index - 1);
-            }
-            expr = parse_(expression);
-            subtermStart = index;
-            subtermComplete = false;
-            break;
-          case ')':
-            parens--;
-            Node child = processTerm(subtermStart, index - 1, expr, 
expression);
-            if (child == null && result == null) {
-              throw new IllegalAccessExpressionException("empty expression not 
allowed",
-                  new String(expression, UTF_8), index);
-            }
-            if (result == null) {
-              return child;
-            }
-            if (result.type == child.type) {
-              for (Node c : child.children) {
-                result.add(c);
-              }
-            } else {
-              result.add(child);
-            }
-            result.end = index - 1;
-            return result;
-          case '"':
-            if (subtermStart != index - 1) {
-              throw new IllegalAccessExpressionException("expression needs & 
or |",
-                  new String(expression, UTF_8), index - 1);
-            }
-
-            while (index < expression.length && expression[index] != '"') {
-              if (expression[index] == '\\') {
-                index++;
-                if (index == expression.length
-                    || (expression[index] != '\\' && expression[index] != 
'"')) {
-                  throw new IllegalAccessExpressionException("invalid escaping 
within quotes",
-                      new String(expression, UTF_8), index - 1);
-                }
-              }
-              index++;
-            }
-
-            if (index == expression.length) {
-              throw new IllegalAccessExpressionException("unclosed quote",
-                  new String(expression, UTF_8), subtermStart);
-            }
-
-            if (subtermStart + 1 == index) {
-              throw new IllegalAccessExpressionException("empty term",
-                  new String(expression, UTF_8), subtermStart);
-            }
-
-            index++;
-
-            subtermComplete = true;
-
-            break;
-          default:
-            if (subtermComplete) {
-              throw new IllegalAccessExpressionException("expression needs & 
or |",
-                  new String(expression, UTF_8), index - 1);
-            }
-
-            byte c = expression[index - 1];
-            if (!isValidAuthChar(c)) {
-              throw new IllegalAccessExpressionException("bad character (" + c 
+ ")",
-                  new String(expression, UTF_8), index - 1);
-            }
-        }
-      }
-      Node child = processTerm(subtermStart, index, expr, expression);
-      if (result != null) {
-        result.add(child);
-        result.end = index;
-      } else {
-        result = child;
-      }
-      if (result.type != NodeType.TERM) {
-        if (result.children.size() < 2) {
-          throw new IllegalAccessExpressionException("missing term", new 
String(expression, UTF_8),
-              index);
-        }
-      }
-      return result;
-    }
-  }
-
-  private void validate(byte[] expression) {
-    // TODO does not seem like null should be accepted
-    if (expression != null && expression.length > 0) {
-      ColumnVisibilityParser p = new ColumnVisibilityParser();
-      node = p.parse(expression);
-    } else {
-      node = EMPTY_NODE;
-    }
-    this.expression = expression;
-  }
-
-  /**
-   * Creates an empty visibility. Normally, elements with empty visibility can 
be seen by everyone.
-   * Though, one could change this behavior with filters.
-   *
-   * @see #AccessExpressionImpl(String)
-   */
-  AccessExpressionImpl() {
-    this(new byte[] {});
-  }
-
   /**
    * Creates a column visibility for a Mutation.
    *
@@ -512,8 +78,9 @@ class AccessExpressionImpl implements AccessExpression {
    * @see #AccessExpressionImpl(String)
    */
   AccessExpressionImpl(byte[] expression) {
-    // TODO copy bytes to make immutable?
-    validate(expression);
+    // TODO copy?
+    this.expression = expression;
+    aeNode = Parser.parseAccessExpression(expression);
   }
 
   @Override
@@ -548,34 +115,6 @@ class AccessExpressionImpl implements AccessExpression {
     return Arrays.hashCode(expression);
   }
 
-  /**
-   * Gets the parse tree for this column visibility.
-   *
-   * @return parse tree node
-   */
-  Node getParseTree() {
-    return node;
-  }
-
-  /**
-   * Properly quotes terms in a column visibility expression. If no quoting is 
needed, then nothing
-   * is done.
-   *
-   * <p>
-   * Examples of using quote :
-   *
-   * <pre>
-   * import static org.apache.accumulo.core.security.ColumnVisibility.quote;
-   *   .
-   *   .
-   *   .
-   * String s = quote(&quot;A#C&quot;) + &quot;&amp;&quot; + 
quote(&quot;FOO&quot;);
-   * ColumnVisibility cv = new ColumnVisibility(s);
-   * </pre>
-   *
-   * @param term term to quote
-   * @return quoted term (unquoted if unnecessary)
-   */
   static String quote(String term) {
     return new String(quote(term.getBytes(UTF_8)), UTF_8);
   }
@@ -592,7 +131,7 @@ class AccessExpressionImpl implements AccessExpression {
     boolean needsQuote = false;
 
     for (byte b : term) {
-      if (!isValidAuthChar(b)) {
+      if (!Tokenizer.isValidAuthChar(b)) {
         needsQuote = true;
         break;
       }
@@ -604,34 +143,4 @@ class AccessExpressionImpl implements AccessExpression {
 
     return AccessEvaluatorImpl.escape(term, true);
   }
-
-  private static final boolean[] validAuthChars = new boolean[256];
-
-  static {
-    for (int i = 0; i < 256; i++) {
-      validAuthChars[i] = false;
-    }
-
-    for (int i = 'a'; i <= 'z'; i++) {
-      validAuthChars[i] = true;
-    }
-
-    for (int i = 'A'; i <= 'Z'; i++) {
-      validAuthChars[i] = true;
-    }
-
-    for (int i = '0'; i <= '9'; i++) {
-      validAuthChars[i] = true;
-    }
-
-    validAuthChars['_'] = true;
-    validAuthChars['-'] = true;
-    validAuthChars[':'] = true;
-    validAuthChars['.'] = true;
-    validAuthChars['/'] = true;
-  }
-
-  static final boolean isValidAuthChar(byte b) {
-    return validAuthChars[0xff & b];
-  }
 }
diff --git a/src/main/java/org/apache/accumulo/access/AeNode.java 
b/src/main/java/org/apache/accumulo/access/AeNode.java
new file mode 100644
index 0000000..556dd6d
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/access/AeNode.java
@@ -0,0 +1,280 @@
+package org.apache.accumulo.access;
+
+import java.util.List;
+import java.util.TreeSet;
+import java.util.function.Consumer;
+import java.util.function.Predicate;
+
+/**
+ * Contains the code for an Access Expression represented as a parse tree and 
all the operations on
+ * a parse tree.
+ */
+abstract class AeNode implements Comparable<AeNode> {
+
+  abstract boolean canAccess(Predicate<BytesWrapper> authorizedPredicate);
+
+  abstract void getAuthorizations(Consumer<String> authConsumer);
+
+  abstract void stringify(StringBuilder builder, boolean addParens);
+
+  abstract AeNode normalize();
+
+  abstract int ordinal();
+
+  public int compareTo(AeNode o) {
+    return ordinal() - o.ordinal();
+  }
+
+  private static class EmptyNode extends AeNode {
+    @Override
+    boolean canAccess(Predicate<BytesWrapper> authorizedPredicate) {
+      return true;
+    }
+
+    @Override
+    void getAuthorizations(Consumer<String> authConsumer) {}
+
+    @Override
+    void stringify(StringBuilder builder, boolean addParens) {
+
+    }
+
+    @Override
+    AeNode normalize() {
+      return this;
+    }
+
+    @Override
+    public int compareTo(AeNode o) {
+      return super.compareTo(o);
+    }
+
+    @Override
+    int ordinal() {
+      return 0;
+    }
+  }
+
+  private static class AuthNode extends AeNode {
+    private final BytesWrapper authInExpression;
+
+    AuthNode(Tokenizer.AuthorizationToken auth) {
+      authInExpression = new BytesWrapper(auth.data, auth.start, auth.len);
+    }
+
+    @Override
+    boolean canAccess(Predicate<BytesWrapper> authorizedPredicate) {
+      return authorizedPredicate.test(authInExpression);
+    }
+
+    @Override
+    void getAuthorizations(Consumer<String> authConsumer) {
+      authConsumer.accept(AccessEvaluatorImpl.unescape(authInExpression));
+    }
+
+    @Override
+    void stringify(StringBuilder builder, boolean addParens) {
+      boolean needsQuotes = false;
+      for (int i = 0; i < authInExpression.length(); i++) {
+        if (!Tokenizer.isValidAuthChar(authInExpression.byteAt(i))) {
+          needsQuotes = true;
+          break;
+        }
+      }
+
+      if (needsQuotes) {
+        builder.append('"');
+        builder.append(authInExpression);
+        builder.append('"');
+      } else {
+        builder.append(authInExpression);
+      }
+
+    }
+
+    @Override
+    AeNode normalize() {
+      return this;
+    }
+
+    @Override
+    public int compareTo(AeNode other) {
+      int cmp = super.compareTo(other);
+      if (cmp == 0) {
+        cmp = authInExpression.compareTo(((AuthNode) other).authInExpression);
+      }
+      return cmp;
+    }
+
+    @Override
+    int ordinal() {
+      return 1;
+    }
+  }
+
+  private static abstract class MultiNode extends AeNode {
+    protected final List<AeNode> children;
+
+    private MultiNode(List<AeNode> children) {
+      this.children = children;
+    }
+
+    @Override
+    void getAuthorizations(Consumer<String> authConsumer) {
+      children.forEach(aeNode -> aeNode.getAuthorizations(authConsumer));
+    }
+
+    abstract char operator();
+
+    @Override
+    void stringify(StringBuilder builder, boolean addParens) {
+      if (addParens) {
+        builder.append("(");
+      }
+
+      var iter = children.iterator();
+      iter.next().stringify(builder, true);
+      iter.forEachRemaining(aeNode -> {
+        builder.append(operator());
+        aeNode.stringify(builder, true);
+      });
+
+      if (addParens) {
+        builder.append(")");
+      }
+    }
+
+    @Override
+    public int compareTo(AeNode other) {
+      int cmp = super.compareTo(other);
+      if (cmp == 0) {
+        cmp = children.size() - ((MultiNode) other).children.size();
+
+        if (cmp == 0) {
+          for (int i = 0; i < children.size(); i++) {
+            cmp = children.get(i).compareTo(((MultiNode) 
other).children.get(i));
+            if (cmp != 0) {
+              break;
+            }
+          }
+        }
+      }
+      return cmp;
+    }
+  }
+
+  private static class AndNode extends MultiNode {
+
+    private AndNode(List<AeNode> children) {
+      super(children);
+    }
+
+    @Override
+    char operator() {
+      return '&';
+    }
+
+    @Override
+    boolean canAccess(Predicate<BytesWrapper> authorizedPredicate) {
+      for (var child : children) {
+        if (!child.canAccess(authorizedPredicate)) {
+          return false;
+        }
+      }
+
+      return true;
+    }
+
+    void flatten(TreeSet<AeNode> nodes) {
+      for (var child : children) {
+        if (child instanceof AndNode) {
+          ((AndNode) child).flatten(nodes);
+        } else {
+          nodes.add(child.normalize());
+        }
+      }
+    }
+
+    @Override
+    AeNode normalize() {
+      var flattened = new TreeSet<AeNode>();
+      flatten(flattened);
+      if (flattened.size() == 1) {
+        return flattened.iterator().next();
+      } else {
+        return new AndNode(List.copyOf(flattened));
+      }
+    }
+
+    int ordinal() {
+      return 3;
+    }
+  }
+
+  private static class OrNode extends MultiNode {
+
+    private OrNode(List<AeNode> children) {
+      super(children);
+    }
+
+    @Override
+    char operator() {
+      return '|';
+    }
+
+    @Override
+    boolean canAccess(Predicate<BytesWrapper> authorizedPredicate) {
+      for (var child : children) {
+        if (child.canAccess(authorizedPredicate)) {
+          return true;
+        }
+      }
+
+      return false;
+    }
+
+    void flatten(TreeSet<AeNode> nodes) {
+      for (var child : children) {
+        if (child instanceof OrNode) {
+          ((OrNode) child).flatten(nodes);
+        } else {
+          nodes.add(child.normalize());
+        }
+      }
+    }
+
+    @Override
+    AeNode normalize() {
+      var flattened = new TreeSet<AeNode>();
+      flatten(flattened);
+      if (flattened.size() == 1) {
+        return flattened.iterator().next();
+      } else {
+        return new OrNode(List.copyOf(flattened));
+      }
+    }
+
+    int ordinal() {
+      return 2;
+    }
+  }
+
+  static AeNode of() {
+    return new EmptyNode();
+  }
+
+  static AeNode of(Tokenizer.AuthorizationToken auth) {
+    return new AuthNode(auth);
+  }
+
+  static AeNode of(byte operator, List<AeNode> children) {
+    switch (operator) {
+      case '&':
+        return new AndNode(children);
+      case '|':
+        return new OrNode(children);
+      default:
+        throw new IllegalArgumentException();
+    }
+  }
+}
diff --git a/src/main/java/org/apache/accumulo/access/Parser.java 
b/src/main/java/org/apache/accumulo/access/Parser.java
new file mode 100644
index 0000000..80d7a7b
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/access/Parser.java
@@ -0,0 +1,77 @@
+package org.apache.accumulo.access;
+
+import java.util.ArrayList;
+
+/**
+ * Code for parsing an access expression and creating a parse tree of type 
{@link AeNode}
+ */
+final class Parser {
+  public static AeNode parseAccessExpression(byte[] expression) {
+
+    Tokenizer tokenizer = new Tokenizer(expression);
+
+    if (!tokenizer.hasNext()) {
+      return AeNode.of();
+    }
+
+    var node = parseExpression(tokenizer);
+
+    if (tokenizer.hasNext()) {
+      // not all input was read, so not a valid expression
+      tokenizer.error("Unconsumed token " + (char) tokenizer.peek());
+    }
+
+    return node;
+  }
+
+  private static AeNode parseExpression(Tokenizer tokenizer) {
+    if (!tokenizer.hasNext()) {
+      tokenizer.error("illegal empty expression ");
+    }
+
+    AeNode first;
+
+    if (tokenizer.peek() == '(') {
+      first = parseParenExpression(tokenizer);
+    } else {
+      first = AeNode.of(tokenizer.nextAuthorization());
+    }
+
+    if (tokenizer.hasNext() && (tokenizer.peek() == '&' || tokenizer.peek() == 
'|')) {
+      var nodes = new ArrayList<AeNode>();
+      nodes.add(first);
+
+      var operator = tokenizer.peek();
+
+      do {
+        tokenizer.advance();
+
+        if (!tokenizer.hasNext()) {
+          tokenizer.error("nothing following a " + (char) operator + " 
operator ");
+        }
+
+        if (tokenizer.peek() == '(') {
+          nodes.add(parseParenExpression(tokenizer));
+        } else {
+          nodes.add(AeNode.of(tokenizer.nextAuthorization()));
+        }
+      } while (tokenizer.hasNext() && tokenizer.peek() == operator);
+
+      if (tokenizer.hasNext() && (tokenizer.peek() == '|' || tokenizer.peek() 
== '&')) {
+        // A case of mixed operators, lets give a clear error message
+        tokenizer.error("cannot mix | and &");
+      }
+
+      return AeNode.of(operator, nodes);
+    } else {
+      return first;
+    }
+  }
+
+  private static AeNode parseParenExpression(Tokenizer tokenizer) {
+    tokenizer.advance();
+    var node = parseExpression(tokenizer);
+    tokenizer.next(')');
+    return node;
+  }
+}
diff --git a/src/main/java/org/apache/accumulo/access/Tokenizer.java 
b/src/main/java/org/apache/accumulo/access/Tokenizer.java
new file mode 100644
index 0000000..6972572
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/access/Tokenizer.java
@@ -0,0 +1,133 @@
+package org.apache.accumulo.access;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+/**
+ * A simple wrapper around a byte array that keeps some state and provides 
high level operations to
+ * the {@link Parser} class. The purpose of this class is to make {@link 
Parser} as simple and easy
+ * to understand as possible while still being performant.
+ */
+final class Tokenizer {
+
+  private static final boolean[] validAuthChars = new boolean[256];
+
+  static {
+    for (int i = 0; i < 256; i++) {
+      validAuthChars[i] = false;
+    }
+
+    for (int i = 'a'; i <= 'z'; i++) {
+      validAuthChars[i] = true;
+    }
+
+    for (int i = 'A'; i <= 'Z'; i++) {
+      validAuthChars[i] = true;
+    }
+
+    for (int i = '0'; i <= '9'; i++) {
+      validAuthChars[i] = true;
+    }
+
+    validAuthChars['_'] = true;
+    validAuthChars['-'] = true;
+    validAuthChars[':'] = true;
+    validAuthChars['.'] = true;
+    validAuthChars['/'] = true;
+  }
+
+  static boolean isValidAuthChar(byte b) {
+    return validAuthChars[0xff & b];
+  }
+
+  private byte[] expression;
+  private int index;
+
+  private AuthorizationToken authorizationToken = new AuthorizationToken();
+
+  static class AuthorizationToken {
+    byte[] data;
+    int start;
+    int len;
+  }
+
+  Tokenizer(byte[] expression) {
+    this.expression = expression;
+    authorizationToken.data = expression;
+  }
+
+  boolean hasNext() {
+    return index < expression.length;
+  }
+
+  public void advance() {
+    index++;
+  }
+
+  public void next(char expected) {
+    if (!hasNext()) {
+      error("expected " + expected + " but expression is at end");
+    }
+
+    if (expression[index] != expected) {
+      error("expected " + expected + " but saw " + (char) (expression[index]));
+    }
+    index++;
+  }
+
+  public void error(String msg) {
+    error(msg, index);
+  }
+
+  public void error(String msg, int idx) {
+    throw new IllegalAccessExpressionException(msg, new String(expression, 
UTF_8), idx);
+  }
+
+  byte peek() {
+    return expression[index];
+  }
+
+  AuthorizationToken nextAuthorization() {
+    if (expression[index] == '"') {
+      int start = ++index;
+
+      while (index < expression.length && expression[index] != '"') {
+        if (expression[index] == '\\') {
+          index++;
+          if (index == expression.length
+              || (expression[index] != '\\' && expression[index] != '"')) {
+            error("invalid escaping within quotes", index - 1);
+          }
+        }
+        index++;
+      }
+
+      if (index == expression.length) {
+        error("unclosed quote", start - 1);
+      }
+
+      if (start == index) {
+        error("empty authorization token in quotes", start - 1);
+      }
+
+      authorizationToken.start = start;
+      authorizationToken.len = index - start;
+
+      index++;
+
+      return authorizationToken;
+
+    } else if (isValidAuthChar(expression[index])) {
+      int start = index;
+      while (index < expression.length && isValidAuthChar(expression[index])) {
+        index++;
+      }
+      authorizationToken.start = start;
+      authorizationToken.len = index - start;
+      return authorizationToken;
+    } else {
+      error("Expected an authorization");
+      return null;
+    }
+  }
+
+}
diff --git 
a/src/test/java/org/apache/accumulo/access/AccessExpressionImplTest.java 
b/src/test/java/org/apache/accumulo/access/AccessExpressionImplTest.java
deleted file mode 100644
index 2175155..0000000
--- a/src/test/java/org/apache/accumulo/access/AccessExpressionImplTest.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.accumulo.access;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.util.Comparator;
-
-import org.apache.accumulo.access.AccessExpressionImpl.Node;
-import org.apache.accumulo.access.AccessExpressionImpl.NodeComparator;
-import org.apache.accumulo.access.AccessExpressionImpl.NodeType;
-import org.junit.jupiter.api.Test;
-
-public class AccessExpressionImplTest {
-
-  @Test
-  public void testParseTree() {
-    Node node = parse("(W)|(U&V)");
-    assertNode(node, NodeType.OR, 0, 9);
-    assertNode(node.getChildren().get(0), NodeType.TERM, 1, 2);
-    assertNode(node.getChildren().get(1), NodeType.AND, 5, 8);
-  }
-
-  @Test
-  public void testParseTreeWithNoChildren() {
-    Node node = parse("ABC");
-    assertNode(node, NodeType.TERM, 0, 3);
-  }
-
-  @Test
-  public void testParseTreeWithTwoChildren() {
-    Node node = parse("ABC|DEF");
-    assertNode(node, NodeType.OR, 0, 7);
-    assertNode(node.getChildren().get(0), NodeType.TERM, 0, 3);
-    assertNode(node.getChildren().get(1), NodeType.TERM, 4, 7);
-  }
-
-  @Test
-  public void testParseTreeWithParenthesesAndTwoChildren() {
-    Node node = parse("(ABC|DEF)");
-    assertNode(node, NodeType.OR, 1, 8);
-    assertNode(node.getChildren().get(0), NodeType.TERM, 1, 4);
-    assertNode(node.getChildren().get(1), NodeType.TERM, 5, 8);
-  }
-
-  @Test
-  public void testParseTreeWithParenthesizedChildren() {
-    Node node = parse("ABC|(DEF&GHI)");
-    assertNode(node, NodeType.OR, 0, 13);
-    assertNode(node.getChildren().get(0), NodeType.TERM, 0, 3);
-    assertNode(node.getChildren().get(1), NodeType.AND, 5, 12);
-    assertNode(node.getChildren().get(1).children.get(0), NodeType.TERM, 5, 8);
-    assertNode(node.getChildren().get(1).children.get(1), NodeType.TERM, 9, 
12);
-  }
-
-  @Test
-  public void testParseTreeWithMoreParentheses() {
-    Node node = parse("(W)|(U&V)");
-    assertNode(node, NodeType.OR, 0, 9);
-    assertNode(node.getChildren().get(0), NodeType.TERM, 1, 2);
-    assertNode(node.getChildren().get(1), NodeType.AND, 5, 8);
-    assertNode(node.getChildren().get(1).children.get(0), NodeType.TERM, 5, 6);
-    assertNode(node.getChildren().get(1).children.get(1), NodeType.TERM, 7, 8);
-  }
-
-  @Test
-  public void testEmptyParseTreesAreEqual() {
-    Comparator<Node> comparator = new NodeComparator(new byte[] {});
-    Node empty = new AccessExpressionImpl().getParseTree();
-    assertEquals(0, comparator.compare(empty, parse("")));
-  }
-
-  @Test
-  public void testParseTreesOrdering() {
-    byte[] expression = "(b&c&d)|((a|m)&y&z)|(e&f)".getBytes(UTF_8);
-    byte[] flattened = new 
AccessExpressionImpl(expression).normalize().getBytes(UTF_8);
-
-    // Convert to String for indexOf convenience
-    String flat = new String(flattened, UTF_8);
-    assertTrue(flat.indexOf('e') < flat.indexOf('|'), "shortest expressions 
sort first");
-    assertTrue(flat.indexOf('b') < flat.indexOf('a'), "shortest children sort 
first");
-  }
-
-  private Node parse(String s) {
-    AccessExpressionImpl v = new AccessExpressionImpl(s);
-    return v.getParseTree();
-  }
-
-  private void assertNode(Node node, NodeType nodeType, int start, int end) {
-    assertEquals(node.type, nodeType);
-    assertEquals(start, node.start);
-    assertEquals(end, node.end);
-  }
-}
diff --git a/src/test/java/org/apache/accumulo/access/AccessExpressionTest.java 
b/src/test/java/org/apache/accumulo/access/AccessExpressionTest.java
index 91e618d..c591d9e 100644
--- a/src/test/java/org/apache/accumulo/access/AccessExpressionTest.java
+++ b/src/test/java/org/apache/accumulo/access/AccessExpressionTest.java
@@ -95,6 +95,12 @@ public class AccessExpressionTest {
     testData.add(List.of("(Z&(X&M))&C&(A&B)", "A&B&C&M&X&Z"));
     testData.add(List.of("(Z&(X&(M|L)))&C&(A&B)", "A&B&C&X&Z&(L|M)"));
     testData.add(List.of("(Z|(X|(M&L)))|C|(A|B)", "A|B|C|X|Z|(L&M)"));
+    testData.add(List.of("(A&(C&B)&C)|((A&C)&(B&C))", "A&B&C"));
+    testData.add(List.of("(A|(C|B)|C)&((A|C)|(B|C))", "A|B|C"));
+    testData.add(List.of("a|a|a|a", "a"));
+    testData.add(List.of("a&a&a&a", "a"));
+    testData.add(List.of("(a|a)|(a|a)", "a"));
+    testData.add(List.of("(a&a)&(a&a)", "a"));
 
     for (var testCase : testData) {
       assertEquals(2, testCase.size());


Reply via email to