branch: externals/trie commit 1c2790d230742a22dc0b437d3c7ec35f6d4f9b3d Author: Toby Cubitt <toby-predict...@dr-qubit.org> Commit: tsc25 <toby-predict...@dr-qubit.org>
Replaced wildcard searches with more powerful and efficient regexp searches. --- trie.el | 1472 +++++++++++++-------------------------------------------------- 1 file changed, 299 insertions(+), 1173 deletions(-) diff --git a/trie.el b/trie.el index 8cd03bb..630a40f 100644 --- a/trie.el +++ b/trie.el @@ -5,7 +5,7 @@ ;; Copyright (C) 2008 Toby Cubitt ;; Author: Toby Cubitt <toby-predict...@dr-qubit.org> -;; Version: 0.1 +;; Version: 0.2 ;; Keywords: trie, ternary search tree, completion ;; URL: http://www.dr-qubit.org/emacs.php @@ -48,16 +48,14 @@ ;; association using `trie-insert', retrieve an association using ;; `trie-lookup', and map over a trie using `trie-map', `trie-mapc', ;; `trie-mapcar', or `trie-mapf'. You can find completions of a prefix -;; sequence using `trie-complete', search for keys that match a wildcard -;; pattern using `trie-wildcard-search', or search for keys matching a -;; regular expression using `trie-regexp-search'. Using `trie-stack', you -;; can create an object that allows the contents of the trie to be used -;; like a stack, useful for building other algorithms on top of tries; +;; sequence using `trie-complete', or search for keys matching a regular +;; expression using `trie-regexp-search'. Using `trie-stack', you can +;; create an object that allows the contents of the trie to be used like +;; a stack, useful for building other algorithms on top of tries; ;; `trie-stack-pop' pops elements off the stack one-by-one, in "lexical" ;; order, whilst `trie-stack-push' pushes things onto the -;; stack. Similarly, `trie-complete-stack', `trie-wildcard-stack' and -;; `trie-regexp-stack' create "lexically-ordered" stacks of query -;; results. +;; stack. Similarly, `trie-complete-stack', and `trie-regexp-stack' +;; create "lexically-ordered" stacks of query results. ;; ;; Note that there are two uses for a trie: as a lookup table, in which ;; case only the presence or absence of a key in the trie is significant, @@ -151,6 +149,11 @@ ;;; Change Log: ;; +;; Version 0.2 +;; * Replaced wildcard searches with regexp searches, using the tNFA.el tagged +;; non-deterministic finite state automata library. This is both more +;; general *and* more efficient. +;; ;; Version 0.1 ;; * Initial release (complete rewrite from scratch of tstree.el!) ;; * Ternary search trees are now implemented as a tree of avl trees, which @@ -180,6 +183,7 @@ (eval-when-compile (require 'cl)) (require 'avl-tree) (require 'heap) +(require 'tNFA) @@ -1043,8 +1047,8 @@ bind any variables with names commencing \"--\"." (pushed '()) )) (:constructor - trie--wildcard-stack-create - (trie pattern + trie--regexp-stack-create + (trie regexp &optional reverse &aux @@ -1053,9 +1057,9 @@ bind any variables with names commencing \"--\"." (stack-createfun (trie--stack-createfun trie)) (stack-popfun (trie--stack-popfun trie)) (stack-emptyfun (trie--stack-emptyfun trie)) - (repopulatefun 'trie--wildcard-stack-repopulate) - (store (trie--wildcard-stack-construct-store - trie pattern reverse)) + (repopulatefun 'trie--regexp-stack-repopulate) + (store (trie--regexp-stack-construct-store + trie regexp reverse)) (pushed '()) )) (:copier nil)) @@ -1107,7 +1111,8 @@ element stored in the trie.)" (if (trie--stack-pushed trie-stack) (pop (trie--stack-pushed trie-stack)) ;; otherwise, pop first element from trie-stack and repopulate it - (let ((first (pop (trie--stack-store trie-stack)))) + (prog1 + (pop (trie--stack-store trie-stack)) (setf (trie--stack-store trie-stack) (funcall (trie--stack-repopulatefun trie-stack) (trie--stack-store trie-stack) @@ -1116,8 +1121,7 @@ element stored in the trie.)" (trie--stack-lookupfun trie-stack) (trie--stack-stack-createfun trie-stack) (trie--stack-stack-popfun trie-stack) - (trie--stack-stack-emptyfun trie-stack))) - first)))) + (trie--stack-stack-emptyfun trie-stack))))))) (defun trie-stack-push (element trie-stack) @@ -1202,72 +1206,64 @@ element stored in the trie.)" `(cond ;; filter, maxnum, resultfun ((and ,filter ,maxnum ,resultfun) - (lambda (node seq) - (let ((data (trie--node-data node))) - (when (funcall ,filter seq data) - (aset trie--accumulate 0 - (cons (funcall ,resultfun seq data) - (aref trie--accumulate 0))) - (and (>= (length (aref trie--accumulate 0)) ,maxnum) - (throw 'trie-accumulate--done nil)))))) - ;; filter, maxnum, !resultfun - ((and ,filter ,maxnum (not ,resultfun)) - (lambda (node seq) - (let ((data (trie--node-data node))) - (when (funcall ,filter seq data) - (aset trie--accumulate 0 - (cons (cons seq data) - (aref trie--accumulate 0))) - (and (>= (length (aref trie--accumulate 0)) ,maxnum) - (throw 'trie-accumulate--done nil)))))) - ;; filter, !maxnum, resultfun - ((and ,filter (not ,maxnum) ,resultfun) - (lambda (node seq) - (let ((data (trie--node-data node))) - (when (funcall ,filter seq data) - (aset trie--accumulate 0 - (cons (funcall ,resultfun seq data) - (aref trie--accumulate 0))))))) - ;; filter, !maxnum, !resultfun - ((and ,filter (not ,maxnum) (not ,resultfun)) - (lambda (node seq) - (let ((data (trie--node-data node))) - (when (funcall ,filter seq data) - (aset trie--accumulate 0 - (cons (cons seq data) - (aref trie--accumulate 0))))))) - ;; !filter, maxnum, resultfun - ((and (not ,filter) ,maxnum ,resultfun) - (lambda (node seq) - (let ((data (trie--node-data node))) + (lambda (seq data) + (when (funcall ,filter seq data) (aset trie--accumulate 0 (cons (funcall ,resultfun seq data) (aref trie--accumulate 0))) (and (>= (length (aref trie--accumulate 0)) ,maxnum) (throw 'trie-accumulate--done nil))))) - ;; !filter, maxnum, !resultfun - ((and (not ,filter) ,maxnum (not ,resultfun)) - (lambda (node seq) - (let ((data (trie--node-data node))) + ;; filter, maxnum, !resultfun + ((and ,filter ,maxnum (not ,resultfun)) + (lambda (seq data) + (when (funcall ,filter seq data) (aset trie--accumulate 0 (cons (cons seq data) (aref trie--accumulate 0))) (and (>= (length (aref trie--accumulate 0)) ,maxnum) (throw 'trie-accumulate--done nil))))) - ;; !filter, !maxnum, resultfun - ((and (not ,filter) (not ,maxnum) ,resultfun) - (lambda (node seq) - (let ((data (trie--node-data node))) + ;; filter, !maxnum, resultfun + ((and ,filter (not ,maxnum) ,resultfun) + (lambda (seq data) + (when (funcall ,filter seq data) (aset trie--accumulate 0 (cons (funcall ,resultfun seq data) (aref trie--accumulate 0)))))) - ;; !filter, !maxnum, !resultfun - ((and (not ,filter) (not ,maxnum) (not ,resultfun)) - (lambda (node seq) - (let ((data (trie--node-data node))) + ;; filter, !maxnum, !resultfun + ((and ,filter (not ,maxnum) (not ,resultfun)) + (lambda (seq data) + (when (funcall ,filter seq data) (aset trie--accumulate 0 (cons (cons seq data) (aref trie--accumulate 0)))))) + ;; !filter, maxnum, resultfun + ((and (not ,filter) ,maxnum ,resultfun) + (lambda (seq data) + (aset trie--accumulate 0 + (cons (funcall ,resultfun seq data) + (aref trie--accumulate 0))) + (and (>= (length (aref trie--accumulate 0)) ,maxnum) + (throw 'trie-accumulate--done nil)))) + ;; !filter, maxnum, !resultfun + ((and (not ,filter) ,maxnum (not ,resultfun)) + (lambda (seq data) + (aset trie--accumulate 0 + (cons (cons seq data) + (aref trie--accumulate 0))) + (and (>= (length (aref trie--accumulate 0)) ,maxnum) + (throw 'trie-accumulate--done nil)))) + ;; !filter, !maxnum, resultfun + ((and (not ,filter) (not ,maxnum) ,resultfun) + (lambda (seq data) + (aset trie--accumulate 0 + (cons (funcall ,resultfun seq data) + (aref trie--accumulate 0))))) + ;; !filter, !maxnum, !resultfun + ((and (not ,filter) (not ,maxnum) (not ,resultfun)) + (lambda (seq data) + (aset trie--accumulate 0 + (cons (cons seq data) + (aref trie--accumulate 0))))) )) @@ -1277,30 +1273,26 @@ element stored in the trie.)" `(cond ;; filter, maxnum ((and ,filter ,maxnum) - (lambda (node seq) - (let ((data (trie--node-data node))) - (when (funcall ,filter seq data) - (heap-add trie--accumulate (cons seq data)) - (and (> (heap-size trie--accumulate) ,maxnum) - (heap-delete-root trie--accumulate)))))) + (lambda (seq data) + (when (funcall ,filter seq data) + (heap-add trie--accumulate (cons seq data)) + (and (> (heap-size trie--accumulate) ,maxnum) + (heap-delete-root trie--accumulate))))) ;; filter, !maxnum ((and ,filter (not ,maxnum)) - (lambda (node seq) - (let ((data (trie--node-data node))) - (when (funcall ,filter seq data) - (heap-add trie--accumulate (cons seq data)))))) + (lambda (seq data) + (when (funcall ,filter seq data) + (heap-add trie--accumulate (cons seq data))))) ;; !filter, maxnum ((and (not ,filter) ,maxnum) - (lambda (node seq) - (let ((data (trie--node-data node))) - (heap-add trie--accumulate (cons seq data)) - (and (> (heap-size trie--accumulate) ,maxnum) - (heap-delete-root trie--accumulate))))) + (lambda (seq data) + (heap-add trie--accumulate (cons seq data)) + (and (> (heap-size trie--accumulate) ,maxnum) + (heap-delete-root trie--accumulate)))) ;; !filter, !maxnum ((and (not ,filter) (not ,maxnum)) - (lambda (node seq) - (let ((data (trie--node-data node))) - (heap-add trie--accumulate (cons seq data))))))) + (lambda (seq data) + (heap-add trie--accumulate (cons seq data)))))) @@ -1431,18 +1423,23 @@ default key-data cons cell." ;; accumulate completions (let (node) + (declare (special accumulator)) (trie--accumulate-results rankfun maxnum reverse filter resultfun accumulator nil (mapc (lambda (pfx) (setq node (trie--node-find (trie--root trie) pfx (trie--lookupfun trie))) (when node - (trie--mapc accumulator (trie--mapfun trie) node pfx - (if maxnum reverse (not reverse))))) + (trie--mapc + (lambda (node seq) + (funcall accumulator seq (trie--node-data node))) + (trie--mapfun trie) node pfx + (if maxnum reverse (not reverse))))) prefix)) )) + (defun trie-complete-stack (trie prefix &optional reverse) "Return an object that allows completions of PREFIX to be accessed as if they were a stack. @@ -1511,287 +1508,37 @@ it is better to use one of those instead." ;; ================================================================ -;; Wildcard search - -(defmacro trie--wildcard-literal-p (el) `(vectorp ,el)) - -(defmacro trie--wildcard-*-p (el) `(eq ,el ?*)) - -(defmacro trie--wildcard-?-p (el) `(eq ,el ??)) - -(defmacro trie--wildcard-group-start-p (el) - `(eq (car-safe ,el) ?\()) - -(defmacro trie--wildcard-group-end-p (el) - `(eq (car-safe ,el) ?\))) - -(defmacro trie--wildcard-char-alt-p (el) - `(and (listp ,el) - (listp (cdr ,el)) - (or (= (length ,el) 1) - (not (eq (car (last ,el)) ?^))))) - -(defmacro trie--wildcard-neg-char-alt-p (el) - `(and (listp ,el) - (listp (cdr ,el)) - (not (= (length ,el) 1)) - (eq (car (last ,el)) ?^))) - -(defmacro trie--wildcard-group-count (el) - `(cdr ,el)) - +;; Regexp search -;;; ---------------------------------------------------------------- -;;; The public wildcard search functions - -(defun trie-wildcard-match (pattern sequence cmpfun) - "Return t if wildcard PATTERN matches SEQ, nil otherwise. -CMPFUN is used as the comparison function for comparing elements -of the sequence against the pattern. - -PATTERN must be a sequence (vector, list or string) containing -either elements of the type used to reference data in the trie, -or any of the characters `*', `?', `[', `]', `(', `)', `^' or -`\\'. The meaning and syntax of these special characters follows -shell-glob syntax: - - * wildcard - Matches zero or more characters. May *only* appear at the end - of the pattern. - - ? wildcard - Matches any single character. - - [...] character alternative - Matches any of the listed characters. - - [^...] negated character alternative - Matches any character *other* then those listed. - - []...] character alternative including `]' - Matches any of the listed characters, including `]'. - - [^]...] negated character alternative including `]' - Matches any character other than `]' and any others listed. - - \\ quote literal - Causes the next element of the pattern sequence to be treated - literally; special characters lose their special meaning, for - anything else it has no effect. - - ( start group - Starts a grouping construct. - - ) end group - Ends a grouping construct. - -To include a `]' in a character alternative, place it immediately -after the opening `[', or the opening `[^' in a negated character -alternative. To include a `^' in a character alternative, negated -or otherwise, place it anywhere other than immediately after the -opening `['. To include a literal `\\' in the pattern, quote it -with another `\\' (remember that `\\' also has to be quoted -within elisp strings, so as a string this would be -\"\\\\\\\\\"). The above syntax descriptions are written in terms -of strings, but the special characters can be used in *any* -sequence type. E.g. the character alternative \"[abc]\" would be -\(?[ ?a ?b ?c ?]\) as a list, or [?[ ?a ?b ?c ?]] as a -vector. The \"characters\" in the alternative can of course be -any data type that might be stored in the trie, not just actual -characters. - -Grouping constructs have no effect on which SEQUENCE's match the -PATTERN, but data about which elements matched which group are -included in the results. When groups are present, the return -result for a match is a list containing cons cells whose cars and -cdrs give the start and end indices of the elements that matched -the corresponding groups, in order." - (let ((pat (append pattern nil)) ; convert pattern to list - token (idx 0) group-stack groups) - (catch 'match - - ;; parse pattern - (while (and pat (> (length sequence) 0)) - (setq pat (trie--wildcard-next-token pat) - token (car pat) - pat (cdr pat)) - (cond - - ;; start group (: add current character index to pending groups - ((trie--wildcard-group-start-p token) - (dotimes (i (trie--wildcard-group-count token)) - (push idx group-stack))) - - ;; end group ): add current character index to pending groups - ((trie--wildcard-group-end-p token) - (dotimes (i (trie--wildcard-group-count token)) - (if (null group-stack) - (error "Syntax error in trie wildcard pattern: missing \"(\"") - (push (cons (pop group-stack) idx) groups)))) - - ;; literal string: compare elements - ((trie--wildcard-literal-p token) - ;; if literal is longer than remaining string, or literal is at end - ;; of pattern and remaining string is too long, match has failed - (when (or (> (length token) (length sequence)) - (and (null pat) (< (length token) (length sequence)))) - (throw 'match nil)) - ;; compare element by element using CMPFUN - (dotimes (i (length token)) - (when (or (funcall cmpfun (elt sequence i) (aref token i)) - (funcall cmpfun (aref token i) (elt sequence i))) - (throw 'match nil))) - (setq sequence (trie--subseq sequence (length token)) - idx (+ idx (length token)))) - - ;; ? wildcard: accept anything - ((trie--wildcard-?-p token) - (setq sequence (trie--subseq sequence 1) - idx (1+ idx))) - - ;; character alternative: check next element matches - ((trie--wildcard-char-alt-p token) - (while (and token - (or (funcall cmpfun (elt sequence 0) (car token)) - (funcall cmpfun (car token) (elt sequence 0)))) - (setq token (cdr token))) - (if token - (setq sequence (trie--subseq sequence 1) - idx (1+ idx)) - (throw 'match nil))) - - ;; negated character alternative: check next element isn't excluded - ((trie--wildcard-neg-char-alt-p token) - (dolist (c (butlast token)) ; drop final ^ - (unless (or (funcall cmpfun (elt sequence 0) c) - (funcall cmpfun c (elt sequence 0))) - (throw 'match nil)) - (setq idx (1+ idx)))) - - ;; terminal * and possibly ): Houston, we have a match! - ((and (trie--wildcard-*-p token) - (catch 'not-group - (dolist (tok pat) - (unless (eq tok ?\)) (throw 'not-group nil))) - t)) - (setq idx (+ idx (length sequence))) - ;; if we have groups, complete them - (when pat - (while pat - (if (null group-stack) - (error "Syntax error in trie wildcard pattern:\ - missing \"(\"") - (push (cons (pop group-stack) idx) groups) - (setq pat (cdr pat)))) - (unless (null group-stack) - (error "Syntax error in trie wildcard pattern: missing \")\"")) - (setq groups - (sort groups - (lambda (a b) - (or (< (car a) (car b)) - (and (= (car a) (car b)) - (> (cdr a) (cdr b)))))))) - (throw 'match (or groups t))) - - ;; non-terminal *: not supported for efficiency reasons - ((trie--wildcard-*-p token) - (error "Syntax error in trie wildcard pattern:\ -non-terminal * wildcards are not supported")) - -;;; ;; * wildcard: oh boy, gonna have to recursively check all possible -;;; ;; search brances -;;; ((trie--wildcard-*-p token) -;;; (setq sequence (trie--subseq sequence 1)) -;;; (throw 'match -;;; (or (= (length sequence) 0) -;;; (and pat (trie-wildcard-match pat sequence cmpfun)) -;;; (trie-wildcard-match pattern sequence cmpfun)))) - ) - - ;; store unparsed pattern for next iteration - (setq pattern pat)) - - ;; if we got to the end of PATTERN, SEQUENCE matched - (if (or pat (> (length sequence) 0)) nil (or groups t)) - ))) - - - -(defun trie-wildcard-search - (trie pattern &optional rankfun maxnum reverse filter resultfun) - "Return an alist containing all matches for PATTERN in TRIE +(defun trie-regexp-search + (trie regexp &optional rankfun maxnum reverse filter resultfun type) + "Return an alist containing all matches for REGEXP in TRIE along with their associated data, in the order defined by -RANKFUN, defaulting to \"lexical\" order (i.e. the order defined -by the trie's comparison function). If REVERSE is non-nil, the +RANKFUN, defauling to \"lexical\" order (i.e. the order defined +by the trie's comparison function). If REVERSE is non-nil, the completions are sorted in the reverse order. Returns nil if no completions are found. -PATTERN must be a sequence (vector, list or string) containing -either elements of the type used to reference data in the trie, -or any of the characters `*', `?', `[', `]', `(', `)', `^' or -`\\'. The meaning and syntax of these special characters follows -shell-glob syntax: - - * wildcard - Matches zero or more characters. May *only* appear at the end - of the pattern. - - ? wildcard - Matches any single character. - - [...] character alternative - Matches any of the listed characters. - - [^...] negated character alternative - Matches any character *other* then those listed. - - []...] character alternative including `]' - Matches any of the listed characters, including `]'. - - [^]...] negated character alternative including `]' - Matches any character other than `]' and any others listed. - - \\ quote literal - Causes the next element of the pattern sequence to be treated - literally; special characters lose their special meaning, for - anything else it has no effect. - - ( start group - Starts a grouping construct. - - ) end group - Ends a grouping construct. - -To include a `]' in a character alternative, place it immediately -after the opening `[', or the opening `[^' in a negated character -alternative. To include a `^' in a character alternative, negated -or otherwise, place it anywhere other than immediately after the -opening `['. To include a literal `\\' in the pattern, quote it -with another `\\' (remember that `\\' also has to be quoted -within elisp strings, so as a string this would be -\"\\\\\\\\\"). The above syntax descriptions are written in terms -of strings, but the special characters can be used in *any* -sequence type. E.g. the character alternative \"[abc]\" would be -\(?[ ?a ?b ?c ?]\) as a list, or [?[ ?a ?b ?c ?]] as a -vector. The \"characters\" in the alternative can of course be -any data type that might be stored in the trie, not just actual -characters. - -Grouping constructs have no effect on which keys match the -pattern, but data about which sequence elements matched which -group are included in the results. When groups are present, the -car of an element in the results alist is no longer a straight -key. Instead, it is a list whose first element is the matching -key, and the remainder contains cons cells whose cars and cdrs -give the start and end indices of the elements that matched the -corresponding groups, in order. - -If PATTERN is a string, it must be possible to apply `string' to -individual elements of the sequences stored in the trie. The -matches returned in the alist will be sequences of the same type -as KEY. If PATTERN is a list of pattern sequences, matches for -all patterns in the list are included in the returned alist. All -sequences in the list must be of the same type. +REGEXP is a regular expression, but it need not necessarily be a +string. It must be a sequence (vector, list of string) whose +elements are either elements of the same type as elements of the +trie keys (which behave as literals in the regexp), or any of the +usual regexp special characters and backslash constructs. If +REGEXP is a string, it must be possible to apply `string' to +individual elements of the keys stored in the trie. The matches +returned in the alist will be sequences of the same type as KEY. + +Back-references and non-greedy postfix operators are *not* +supported, and the matches are always anchored, so `$' and `^' +lose their special meanings. + +If the regexp contains any non-shy grouping constructs, subgroup +match data is included in the results. In this case, the car of +each match (as returned by a call to `trie-stack-pop' is no +longer just a key. Instead, it is a list whose first element is +the matching key, and whose remaining elements are cons cells +whose cars and cdrs give the start and end indices of the +elements that matched the corresponding groups, in order. The optional integer argument MAXNUM limits the results to the first MAXNUM matches. Otherwise, all matches are returned. @@ -1812,64 +1559,101 @@ RESULTFUN defines a function used to process results before adding them to the final result list. If specified, it should accept two arguments: a key and its associated data. It's return value is what gets added to the final result list, instead of the -default key-data cons cell. - - -Efficiency concerns: - -Wildcard searches on tries are very efficient compared to similar -searches on other data structures. The supported wildcard -patterns are the subset of shell-glob patterns that can be -searched efficiently. Note, however, that supplying a list of -PATTERN's simply finds matches for each pattern independently, -and sorts the results (removing any duplicates), which for -closely-related patterns is inefficient. If you want true -alternation and a less limited pattern syntax, use -`trie-regexp-search' instead...but you'll have to implement it -first!." +default key-data cons cell." ;; convert trie from print-form if necessary (trie-transform-from-read-warn trie) - ;; wrap prefix in a list if necessary - ;; FIXME: the test for a list of patterns, below, will fail if the PATTERN - ;; sequence is a list, and the first element of PATTERN is itself a - ;; list (there might be no easy way to fully fix this...) - (if (or (atom pattern) - (and (listp pattern) (not (sequencep (car pattern))))) - (setq pattern (list pattern)) - ;; sort list of patterns if sorting completions lexically - (when (null rankfun) - (setq pattern - (sort pattern (trie-construct-sortfun - (trie--comparison-function trie)))))) + ;; massage rankfun to cope with grouping data + ;; FIXME: could skip this if REGEXP contains no grouping constructs + (when rankfun + (setq rankfun + `(lambda (a b) + ;; if car of argument contains a key+group list rather than a + ;; straight key, remove group list + ;; FIXME: the test for straight key, below, will fail if the key + ;; is a list, and the first element of the key is itself a + ;; list (there might be no easy way to fully fix this...) + (unless (or (atom (car a)) + (and (listp (car a)) (not (sequencep (caar a))))) + (setq a (cons (caar a) (cdr a)))) + (unless (or (atom (car b)) + (and (listp (car b)) (not (sequencep (caar b))))) + (setq b (cons (caar b) (cdr b)))) + ;; call rankfun on massaged arguments + (,rankfun a b)))) - ;; construct appropriate rankfun for wildcard search - (destructuring-bind (rankfun expect-duplicate-results) - (trie--wildcard-construct-rankfun trie pattern rankfun reverse) - (let ((seq (cond ((stringp (car pattern)) "") - ((listp (car pattern)) ()) - (t [])))) - ;; accumulate pattern matches - (declare (special accumulator)) - (trie--accumulate-results - rankfun maxnum reverse filter resultfun - accumulator expect-duplicate-results - (mapc (lambda (pat) - (trie--do-wildcard-search - (trie--root trie) - seq pat rankfun maxnum reverse - 0 nil nil - (trie--comparison-function trie) - (trie--lookupfun trie) - (trie--mapfun trie))) - ;; convert patterns to lists - (mapcar (lambda (pat) (append pat nil)) pattern)))))) - - - -(defun trie-wildcard-stack (trie pattern &optional reverse) - "Return an object that allows matches to PATTERN to be accessed + ;; accumulate completions + (declare (special accumulator)) + (trie--accumulate-results + rankfun maxnum reverse filter resultfun accumulator nil + (trie--do-regexp-search + (trie--root trie) + (tNFA-from-regexp regexp) + (cond ((stringp regexp) "") ((listp regexp) ()) (t [])) + 0 (or (and maxnum reverse) (and (not maxnum) (not reverse))) + (trie--comparison-function trie) + (trie--lookupfun trie) + (trie--mapfun trie)))) + + + +(defun trie--do-regexp-search (--trie--regexp-search--node + tNFA seq pos reverse + comparison-function lookupfun mapfun) + ;; Search everything below the node --TRIE--REGEXP-SEARCH-NODE for matches + ;; to the regexp encoded in tNFA. SEQ is the sequence corresponding to NODE, + ;; POS is it's length. REVERSE is the usual query argument, and the + ;; remaining arguments are the corresponding trie functions. + (declare (special accumulator)) + (cond + ;; data node + ((trie--node-data-p --trie--regexp-search--node) + (when (tNFA-match-p tNFA) + (let ((groups (tNFA-group-data tNFA))) + (funcall accumulator + (if groups (cons seq groups) seq) + (trie--node-data --trie--regexp-search--node))))) + + ;; wildcard transition: map over all nodes in subtree + ((tNFA-wildcard-p tNFA) + (let (state groups) + (funcall mapfun + (lambda (node) + (if (trie--node-data-p node) + (when (tNFA-match-p tNFA) + (setq groups (tNFA-group-data tNFA)) + (funcall accumulator + (if groups (cons seq groups) seq) + (trie--node-data node))) + (when (setq state (tNFA-next-state + tNFA (trie--node-split node) pos)) + (trie--do-regexp-search + node state + (trie--seq-append seq (trie--node-split node)) + (1+ pos) reverse comparison-function lookupfun mapfun)))) + (trie--node-subtree --trie--regexp-search--node) + reverse))) + + (t ;; no wildcard transition: loop over all transitions + (let (node state) + (dolist (chr (sort (tNFA-transitions tNFA) + (if reverse + `(lambda (a b) (,comparison-function b a)) + comparison-function))) + (when (and (setq node (trie--node-find + --trie--regexp-search--node + (vector chr) lookupfun)) + (setq state (tNFA-next-state tNFA chr pos))) + (trie--do-regexp-search + node state (trie--seq-append seq chr) (1+ pos) + reverse comparison-function lookupfun mapfun))))) + )) + + + +(defun trie-regexp-stack (trie regexp &optional reverse) + "Return an object that allows matches to REGEXP to be accessed as if they were a stack. The stack is sorted in \"lexical\" order, i.e. the order defined @@ -1877,511 +1661,43 @@ by TRIE's comparison function, or in reverse order if REVERSE is non-nil. Calling `trie-stack-pop' pops the top element (a cons cell containing a key and its associated data) from the stack. -PATTERN must be a sequence (vector, list or string) containing -either elements of the type used to reference data in the trie, -or any of the characters `*', `?', `[', `]', `(', `)', `^' or -`\\'. The meaning and syntax of these special characters follows -shell-glob syntax: - - * wildcard - Matches zero or more characters. May *only* appear at the end - of the pattern. - - ? wildcard - Matches any single character. - - [...] character alternative - Matches any of the listed characters. - - [^...] negated character alternative - Matches any character *other* then those listed. - - []...] character alternative including `]' - Matches any of the listed characters, including `]'. - - [^]...] negated character alternative including `]' - Matches any character other than `]' and any others listed. - - \\ quote literal - Causes the next element of the pattern sequence to be treated - literally; special characters lose their special meaning, for - anything else it has no effect. - - ( start group - Starts a grouping construct. - - ) end group - Ends a grouping construct. - -To include a `]' in a character alternative, place it immediately -after the opening `[', or the opening `[^' in a negated character -alternative. To include a `^' in a character alternative, negated -or otherwise, place it anywhere other than immediately after the -opening `['. To include a literal `\\' in the pattern, quote it -with another `\\' (remember that `\\' also has to be quoted -within elisp strings, so as a string this would be -\"\\\\\\\\\"). The above syntax descriptions are written in terms -of strings, but the special characters can be used in *any* -sequence type. E.g. the character alternative \"[abc]\" would be -\(?[ ?a ?b ?c ?]\) as a list, or [?[ ?a ?b ?c ?]] as a -vector. The \"characters\" in the alternative can of course be -any data type that might be stored in the trie, not just actual -characters. - -Grouping constructs have no effect on which keys match the -pattern, but data about which sequence elements matched which -group are included in the results. When groups are present, the -car of a match result (as returned by a call to `trie-stack-pop') -is no longer a straight key. Instead, it is a list whose first -element is the matching key, and the remainder contains cons -cells whose cars and cdrs give the start and end indices of the -elements that matched the corresponding groups, in order. - -If PATTERN is a string, it must be possible to apply `string' to -individual elements of the sequences stored in the trie. The -matches returned in the alist will be sequences of the same type -as KEY. (Support for lists of PATTERN's has not yet been -implemented.) - - -Efficiency concerns: +REGEXP is a regular expression, but it need not necessarily be a +string. It must be a sequence (vector, list of string) whose +elements are either elements of the same type as elements of the +trie keys (which behave as literals in the regexp), or any of the +usual regexp special characters and backslash constructs. If +REGEXP is a string, it must be possible to apply `string' to +individual elements of the keys stored in the trie. The matches +returned in the alist will be sequences of the same type as KEY. + +Back-references and non-greedy postfix operators are *not* +supported, and the matches are always anchored, so `$' and `^' +lose their special meanings. + +If the regexp contains any non-shy grouping constructs, subgroup +match data is included in the results. In this case, the car of +each match (as returned by a call to `trie-stack-pop' is no +longer just a key. Instead, it is a list whose first element is +the matching key, and whose remaining elements are cons cells +whose cars and cdrs give the start and end indices of the +elements that matched the corresponding groups, in order." -Wildcard searches on tries are very efficient compared to similar -searches on other data structures. The supported wildcard -patterns are the subset of shell-glob patterns that can be -searched efficiently. If you want a less limited pattern syntax, -use `trie-regexp-stack' instead...but you'll have to implement it -first!." ;; convert trie from print-form if necessary (trie-transform-from-read-warn trie) ;; if stack functions aren't defined for trie type, throw error (if (not (functionp (trie--stack-createfun trie))) (error "Trie type does not support stack operations") - ;; otherwise, create and initialise a stack - (trie--wildcard-stack-create trie pattern reverse))) - - - -;;; ------------------------------------------------------------------ -;;; Internal functions (do the real work) - -(defun trie--wildcard-next-token (pattern &optional cmpfun) - ;; Extract first pattern element from PATTERN (a list), and return it consed - ;; with remainder of pattern. If CMPFUN is supplied, it is used to sort - ;; character alternatives. - (when pattern - (let ((token (pop pattern))) - (cond - ;; *: drop any following *'s - ((eq token ?*) - (while (eq (car pattern) ?*) (pop pattern))) - - ;; [: gobble up to closing ] - ((eq token ?\[) - ;; character alternatives are stored in lists - (setq token ()) - (cond - ;; gobble ] appearing straight after [ - ((eq (car pattern) ?\]) (push (pop pattern) token)) - ;; gobble ] appearing straight after [^ - ((and (eq (car pattern) ?^) (eq (nth 1 pattern) ?\])) - (push (pop pattern) token) - (push (pop pattern) token))) - ;; gobble everything up to closing ] - (while (not (eq (car pattern) ?\])) - (push (pop pattern) token) - (unless pattern - (error "Syntax error in trie wildcard pattern: missing \"]\""))) - (pop pattern) ; dump closing ] - ;; if CMPFUN was supplied, sort characters in alternative - (when cmpfun - ;; leave final ^ at end in negated character alternative - (if (eq (car (last token)) ?^) - (setq token (concat (sort (butlast token) cmpfun) ?^)) - (setq token (sort token cmpfun))))) - - ;; ?: nothing to gobble - ((eq token ??)) - - ;; ]: syntax error (always gobbled when parsing [) - ((eq token ?\]) - (error "Syntax error in trie wildcard pattern: missing \"[\"")) - - ;; (: gobble any following ('s - ((eq token ?\() - (let ((i 1)) - (while (eq (car pattern) ?\() - (incf i) - (pop pattern)) - (setq token (cons ?\( i)))) - - ;; ): gobble any following )'s - ((eq token ?\)) - (let ((i 1)) - (while (eq (car pattern) ?\)) - (incf i) - (pop pattern)) - (setq token (cons ?\) i)))) - - ;; anything else, gobble up to first special character - (t - (push token pattern) - (setq token nil) - (while (and pattern - (not (or (eq (car pattern) ?\[) (eq (car pattern) ?\]) - (eq (car pattern) ?*) (eq (car pattern) ??) - (eq (car pattern) ?\() (eq (car pattern) ?\))))) - ;; \: dump \ and gobble next character - (when (eq (car pattern) ?\\) - (pop pattern) - (unless pattern - (error "Syntax error in trie wildcard pattern:\ - missing character after \"\\\""))) - (push (pop pattern) token)) - ;; fixed strings are stored in vectors - (setq token (vconcat (nreverse token))))) - - ;; return first token and remaining pattern - (list token pattern)))) - - - -;;; ------------------------------------------------------------------ -;;; wildcard search - -(defun trie--wildcard-construct-rankfun (trie pattern rankfun reverse) - ;; construct appropriate rank function for wildcard search, and return a - ;; list containing the rankfun and a flags indicating whether to expect - ;; duplicate results - (let (pattern-contains-groups - ;; multiple patterns: need manual lexical sort and duplicate filtering - (manual-lexical-sort (> (length pattern) 1)) - (expect-duplicate-results (> (length pattern) 1))) - ;; convert patterns to lists, and check for groups ; and * wildcards - (setq pattern - (mapcar - (lambda (pat) - ;; convert pattern to list - (setq pat (append pat nil)) -;;; (let ((pos (trie--position ?* pat))) -;;; ;; if pattern contains multiple *'s, have to filter out -;;; ;; duplicate results -;;; (setq expect-duplicate-results -;;; (or expect-duplicate-results -;;; (and pos (trie--position -;;; ?* (trie--subseq pat (1+ pos)))))) -;;; ;; if *'s appear in middle of pattern (other than any group -;;; ;; endings at very end), need to sort manually -;;; (setq manual-lexical-sort -;;; (or manual-lexical-sort -;;; (and pos -;;; (catch 'not-group-end -;;; (dolist (c (last pat (- (length pat) pos 1))) -;;; (unless (eq c ?\)) -;;; (throw 'not-group-end t))) -;;; nil))))) - ;; check if pattern contains groups - (setq pattern-contains-groups - (or pattern-contains-groups (trie--position ?\( pat))) - ;; return pattern as list - pat) - pattern)) - - ;; construct appropriate rankfun - (cond - ((and rankfun pattern-contains-groups) - (setq rankfun - `(lambda (a b) - ;; if car of argument contains a key+group list rather than - ;; a straight key, remove group list - ;; FIXME: the test for straight key, below, will fail if the - ;; key is a list, and the first element of the key is - ;; itself a list (there might be no easy way to fully - ;; fix this...) - (unless (or (atom (car a)) - (and (listp (car a)) (not (sequencep (caar a))))) - (setq a (cons (caar a) (cdr a)))) - (unless (or (atom (car b)) - (and (listp (car b)) (not (sequencep (caar b))))) - (setq b (cons (caar b) (cdr b)))) - ;; call rankfun on massaged arguments - (,rankfun a b)))) - - ((and (null rankfun) manual-lexical-sort (not pattern-contains-groups)) - (setq rankfun - `(lambda (a b) - ;; call lexical rank function on keys - (,(trie-construct-sortfun - (trie--comparison-function trie) - reverse) - (car a) (car b))))) - - ((and (null rankfun) manual-lexical-sort pattern-contains-groups) - (setq rankfun - `(lambda (a b) - ;; extract key from argument, (car of arg if no group data - ;; attached to key, otherwise first element of key+group list - ;; in car) - ;; FIXME: the test for straight key, below, will fail if the - ;; key is a list, and the first element of the key is - ;; itself a list (there might be no easy way to fully - ;; fix this...) - (if (and (listp (car a)) (not (sequencep (caar a)))) - (setq a (car a)) - (setq a (caar a))) - (if (and (listp (car b)) (not (sequencep (caar b)))) - (setq b (car b)) - (setq b (caar b))) - ;; call lexical rank function on extracted keys - (,(trie-construct-sortfun - (trie--comparison-function trie) - reverse) - a b))))) - - ;; return rankfun and duplicate results flag - (list rankfun expect-duplicate-results))) - - - -(defun trie--do-wildcard-search - (node seq pattern rankfun maxnum reverse - idx group-stack groups - comparison-function lookupfun mapfun) - ;; Perform wildcard search for PATTERN starting at NODE which corresponds to - ;; sequence SEQ, where IDX characters have already been matched, GROUP-STACK - ;; contains any pending group start locations, and GROUPS contains alist of - ;; completed groups. Pass the other query parameters in RANKFUN, MAXNUM and - ;; REVERSE, and the trie functions in COMPARISON-FUNCTION, LOOKUPFUN and - ;; MAPFUN (note that COMPARISON-FUNCTION should be the - ;; trie--comparison-function, *not* the trie--cmpfun) - (declare (special accumulator)) + ;; otherwise, create and initialise a regexp stack + (trie--regexp-stack-create trie regexp reverse))) - ;; if pattern is null, accumulate data from current node - (if (null pattern) - (progn - (unless (null group-stack) - (error "Syntax error in trie wildcard pattern: missing \")\"")) - (when (setq node (trie--find-data-node node lookupfun)) - (setq groups - (sort groups - (lambda (a b) - (or (< (car a) (car b)) - (and (= (car a) (car b)) - (> (cdr a) (cdr b))))))) - (funcall accumulator node (if groups (cons seq groups) seq)))) - - ;; otherwise, extract first pattern element and act on it - (destructuring-bind (token pattern) (trie--wildcard-next-token pattern) - (cond - - ;; literal string: descend to corresponding node - ((trie--wildcard-literal-p token) - ;; find node corresponding to literal string pattern - (when (setq node (trie--node-find node token lookupfun)) - (trie--do-wildcard-search - node (trie--seq-concat seq token) - pattern rankfun maxnum reverse - (+ idx (length token)) group-stack groups - comparison-function lookupfun mapfun))) - - ;; start group (: add current character index to pending groups - ((trie--wildcard-group-start-p token) - (dotimes (i (trie--wildcard-group-count token)) - (push idx group-stack)) - (trie--do-wildcard-search - node seq pattern rankfun maxnum reverse - idx group-stack groups - comparison-function lookupfun mapfun)) - - ;; end group ): add completed groups to list - ((trie--wildcard-group-end-p token) - (dotimes (i (trie--wildcard-group-count token)) - (if (null group-stack) - (error "Syntax error in trie wildcard pattern: missing \"(\"") - (push (cons (pop group-stack) idx) groups))) - (trie--do-wildcard-search - node seq pattern rankfun maxnum reverse - idx group-stack groups - comparison-function lookupfun mapfun)) - - ;; terminal *: accumulate everything below current node - ((and (null pattern) (trie--wildcard-*-p token)) - (unless (null group-stack) - (error "Syntax error in trie wildcard pattern: missing \")\"")) - (let ((grps (sort (copy-sequence groups) - (lambda (a b) - (or (< (car a) (car b)) - (and (= (car a) (car b)) - (> (cdr a) (cdr b)))))))) - (trie--mapc - (lambda (node seq) (funcall accumulator node (cons seq grps))) - mapfun node seq (if maxnum reverse (not reverse))))) - - ;; terminal * and ): accumulate everything below current node and - ;; close group(s) - ((and (trie--wildcard-*-p token) - (catch 'not-group - (dolist (tok pattern) - (unless (eq tok ?\)) (throw 'not-group nil))) - t)) - (trie--mapc - (lambda (node seq) - (let ((grp-stack group-stack) - (grps (copy-sequence groups)) - (pat pattern)) - (while (progn - (if (null grp-stack) - (error "Syntax error in trie wildcard\ - pattern: missing \"(\"") - (push (cons (pop grp-stack) (length seq)) grps) - (pop pat)))) - (unless (null grp-stack) - (error "Syntax error in trie wildcard pattern: missing \")\"")) - (setq grps - (sort grps - (lambda (a b) - (or (< (car a) (car b)) - (and (= (car a) (car b)) (> (cdr a) (cdr b))))))) - (funcall accumulator node (cons seq grps)))) - mapfun node seq ;; trie--mapc arguments - (if maxnum reverse (not reverse)))) - - ;; non-terminal *: not supported for efficiency reasons - ((trie--wildcard-*-p token) - (error "Syntax error in trie wildcard pattern:\ -non-terminal * wildcards are not supported")) - -;;; ;; * wildcard: map over all nodes immediately below current one, with -;;; ;; and without using up the * -;;; ((trie--wildcard-*-p token) -;;; (funcall mapfun -;;; (lambda (node) -;;; ;; skip data nodes (terminal * dealt with above) -;;; (unless (trie--node-data-p node) -;;; ;; using up * -;;; (trie--do-wildcard-search -;;; node (trie--seq-append seq (trie--node-split node)) -;;; pattern rankfun maxnum reverse -;;; (1+ idx) group-stack groups -;;; comparison-function lookupfun mapfun) -;;; ;; not using up * -;;; (trie--do-wildcard-search -;;; node (trie--seq-append seq (trie--node-split node)) -;;; (cons ?* pattern) rankfun maxnum reverse -;;; (1+ idx) group-stack groups -;;; comparison-function lookupfun mapfun))) -;;; (trie--node-subtree node))) - - ;; ? wildcard: map over all child nodes - ((trie--wildcard-?-p token) - (funcall mapfun - (lambda (node) - ;; skip data nodes (note: if we wanted to implement a "0 - ;; or 1" wildcard, would accumulate these instead) - (unless (trie--node-data-p node) - (trie--do-wildcard-search - node (trie--seq-append seq (trie--node-split node)) - pattern rankfun maxnum reverse - (1+ idx) group-stack groups - comparison-function lookupfun mapfun) - )) - (trie--node-subtree node) - (if maxnum reverse (not reverse)))) - - ;; character alternative: descend to corresponding nodes in turn - ((trie--wildcard-char-alt-p token) - (let (n) - (mapc - (lambda (c) - (when (setq n (funcall lookupfun (trie--node-subtree node) - (trie--node-create-dummy c))) - (trie--do-wildcard-search - n (trie--seq-append seq c) - pattern rankfun maxnum reverse - (1+ idx) group-stack groups - comparison-function lookupfun mapfun))) - (if rankfun token - (sort token (if (or (and maxnum reverse) ; no xnor in Elisp! - (and (not maxnum) (not reverse))) - (lambda (a b) - (not (funcall comparison-function a b))) - comparison-function)))))) - - ;; negated character alternative: map over all child nodes, skipping - ;; excluded ones - ((trie--wildcard-neg-char-alt-p token) - (funcall mapfun - (lambda (node) - ;; skip data nodes (note: if we wanted to implement a "0 or - ;; 1" wildcard, would need to accumulate these instead) - (unless (or (trie--node-data-p node) - (catch 'excluded - (dolist (c (butlast token)) ; drop final ^ - (when (eq c (trie--node-split node)) - (throw 'excluded t))))) - (trie--do-wildcard-search - node (trie--seq-append seq (trie--node-split node)) - pattern rankfun maxnum reverse - (1+ idx) group-stack groups - comparison-function lookupfun mapfun) - )) - (trie--node-subtree node) - (if maxnum reverse (not reverse)))) - )))) - - - -;;; ------------------------------------------------------------------ -;;; wildcard stack - -;; FIXME: using a defstruct instead of these macros causes *very* weird -;; bugs...why?!?!?!!! -(defmacro trie--wildcard-stack-el-create - (seq pattern node idx group-stack groups) - `(vector ,seq ,pattern ,node ,idx ,group-stack ,groups)) - -(defmacro trie--wildcard-stack-el-seq (el) `(aref ,el 0)) -(defmacro trie--wildcard-stack-el-pattern (el) `(aref ,el 1)) -(defmacro trie--wildcard-stack-el-node (el) `(aref ,el 2)) -(defmacro trie--wildcard-stack-el-idx (el) `(aref ,el 3)) -(defmacro trie--wildcard-stack-el-group-stack (el) `(aref ,el 4)) -(defmacro trie--wildcard-stack-el-groups (el) `(aref ,el 5)) - -;; ;; structure for internal trie-wildcard-stack elements -;; (defstruct -;; (trie--wildcard-stack-el -;; (:type vector) -;; (:constructor nil) -;; (:constructor trie--wildcard-stack-el-create -;; (seq pattern node idx group-stack groups)) -;; (:copier nil)) -;; seq pattern node idx group-stack groups) - - - -(defun trie--wildcard-stack-construct-store - (trie pattern &optional reverse) - ;; Construct store for wildcard stack based on TRIE. - ;; FIXME: the test for a list of patterns, below, will fail if the PATTERN - ;; sequence is a list, and the first element of PATTERN is itself a - ;; list (there might be no easy way to fully fix this...) - (unless (or (atom pattern) - (and (listp pattern) - (not (sequencep (car pattern))))) - (error "Multiple pattern searches are not currently supported by\ - trie-wildcard-stack's")) - (let ((comparison-function (trie--comparison-function trie)) - (seq (cond ((stringp pattern) "") ((listp pattern) ()) (t []))) - cmpfun store) - (setq cmpfun (if reverse - `(lambda (a b) (,comparison-function b a)) - comparison-function) - store (list - (trie--wildcard-stack-el-create - seq (trie--wildcard-next-token (append pattern nil) cmpfun) - (trie--root trie) 0 nil nil))) - (message "init seq: %s" (trie--wildcard-stack-el-seq (car store))) - (trie--wildcard-stack-repopulate + +(defun trie--regexp-stack-construct-store (trie regexp &optional reverse) + ;; Construct store for regexp stack based on TRIE. + (let ((seq (cond ((stringp regexp) "") ((listp regexp) ()) (t []))) + store) + (push (list seq (trie--root trie) (tNFA-from-regexp regexp) 0) + store) + (trie--regexp-stack-repopulate store reverse (trie--comparison-function trie) (trie--lookupfun trie) @@ -2390,268 +1706,78 @@ non-terminal * wildcards are not supported")) (trie--stack-emptyfun trie)))) - -(defun trie--wildcard-stack-repopulate +(defun trie--regexp-stack-repopulate (store reverse comparison-function lookupfun stack-createfun stack-popfun stack-emptyfun) ;; Recursively push matching children of the node at the head of STORE onto - ;; the front of STORE, until a data node is reached. Sort in (reverse) - ;; lexical order if REVERSE is nil (non-nil). The remaining arguments should - ;; be the corresponding trie functions (note that COMPARISON-FUNCTION should - ;; be the trie--comparison-function, *not* the trie--cmpfun) - (let (seq pattern token node idx group-stack groups cmpfun) - (setq cmpfun (if reverse - `(lambda (a b) (,comparison-function b a)) - comparison-function)) - (catch 'done - (while t - ;; nothing to do if stack is empty - (unless store (throw 'done nil)) - ;; wildcard stack elements (other than the final matches, which are - ;; of course cons cells containing matching keys and their - ;; associated data) are lists containing: the sequence corresponding - ;; to the stack element, the index of the last matched character, - ;; the remaining pattern to search for, and the node at which to - ;; start searching - (setq seq (trie--wildcard-stack-el-seq (car store)) - pattern (trie--wildcard-stack-el-pattern (car store)) - node (trie--wildcard-stack-el-node (car store)) - idx (trie--wildcard-stack-el-idx (car store)) - group-stack (trie--wildcard-stack-el-group-stack (car store)) - groups (trie--wildcard-stack-el-groups (car store)) - token (nth 0 pattern) - pattern (nth 1 pattern) - store (cdr store)) - (cond - - ;; empty pattern: look for data node - ((null token) - (unless (null group-stack) - (error "Syntax error in trie wildcard pattern: missing \")\"")) - ;; if we find one, push match onto stack and we're done - (when (setq node (trie--find-data-node node lookupfun)) - (setq groups - (sort (copy-sequence groups) - (lambda (a b) - (or (< (car a) (car b)) - (and (= (car a) (car b)) - (> (cdr a) (cdr b))))))) - (push (cons (if groups (cons seq groups) seq) - (trie--node-data node)) store) - (throw 'done store))) - - ;; start group (: add current character index to pending groups - ((trie--wildcard-group-start-p token) - (dotimes (i (trie--wildcard-group-count token)) - (push idx group-stack)) - (push - (trie--wildcard-stack-el-create - seq (trie--wildcard-next-token pattern cmpfun) - node idx group-stack groups) - store)) - - ;; end group ): add current character index to pending groups - ((trie--wildcard-group-end-p token) - (dotimes (i (trie--wildcard-group-count token)) - (if (null group-stack) - (error "Syntax error in trie wildcard pattern: missing \"(\"") - (push (cons (pop group-stack) idx) groups))) - (push - (trie--wildcard-stack-el-create - seq (trie--wildcard-next-token pattern cmpfun) - node idx group-stack groups) - store)) - - ;; literal string: descend to corresponding node and continue - ((trie--wildcard-literal-p token) - (setq node (trie--node-find node token lookupfun)) - ;; if we found node corresponding to string, push that node onto - ;; the stack (otherwise, current branch of search as failed) - (when node - (push (trie--wildcard-stack-el-create - (trie--seq-concat seq token) - (trie--wildcard-next-token pattern cmpfun) - node (+ idx (length token)) group-stack groups) - store))) - - ;; terminal *: standard repopulation using everything below node - ((and (trie--wildcard-*-p token) - (catch 'not-group - (dolist (tok pattern) - (unless (eq tok ?\)) (throw 'not-group nil))) - t)) - ;; if starting a new * wildcard, push a node stack onto the stack - (if (trie--node-p node) - (push (trie--wildcard-stack-el-create - seq pattern - (funcall stack-createfun - (trie--node-subtree node) reverse) - idx group-stack groups) - store) - ;; otherwise, push node stack back onto the stack - (push (trie--wildcard-stack-el-create - seq pattern node idx group-stack groups) - store)) - (let ((stack (trie--wildcard-stack-el-node (car store)))) - ;; get first node from wildcard node stack - (setq node (funcall stack-popfun stack)) - (when (funcall stack-emptyfun stack) - (setq store (cdr store))) - ;; recursively push node stacks for child node (then its child, - ;; grandchild, etc.) onto the stack until we find a data node - (while (not (trie--node-data-p node)) - (push - (trie--wildcard-stack-el-create - (trie--seq-append seq (trie--node-split node)) - pattern - (funcall stack-createfun (trie--node-subtree node) reverse) - (1+ idx) group-stack groups) - store) - (setq seq (trie--wildcard-stack-el-seq (car store)) - pattern (trie--wildcard-stack-el-pattern (car store)) - stack (trie--wildcard-stack-el-node (car store)) - idx (trie--wildcard-stack-el-idx (car store)) - group-stack (trie--wildcard-stack-el-group-stack - (car store)) - groups (trie--wildcard-stack-el-groups (car store)) - node (funcall stack-popfun stack)) - (when (funcall stack-emptyfun stack) - (setq store (cdr store)))) - ;; add completed groups to list - (when pattern - (setq pattern (trie--wildcard-next-token pattern)) - (dotimes (i (trie--wildcard-group-count token)) - (if (null group-stack) - (error "Syntax error in trie wildcard pattern:\ - missing \"(\"") - (push (cons (pop group-stack) idx) groups))) - (unless (null group-stack) - (error "Syntax error in trie wildcard pattern:\ - missing \")\""))) - ;; sort group list - (setq groups - (sort (copy-sequence groups) - (lambda (a b) - (or (< (car a) (car b)) - (and (= (car a) (car b)) - (> (cdr a) (cdr b))))))) - ;; push result onto stack and we're done - (push (cons (if groups (cons seq groups) seq) - (trie--node-data node)) store) - (throw 'done store))) - - ;; non-terminal *: not supported for efficiency reasons - ((trie--wildcard-*-p token) - (error "Syntax error in trie wildcard pattern:\ -non-terminal * wildcards are not supported")) - - ;; ? wildcard: push wildcard node stack onto stack and repopulate - ;; again - ((trie--wildcard-?-p token) - ;; if we're starting a new ? wildcard, push a node stack onto the - ;; stack - (if (trie--node-p node) - (push (trie--wildcard-stack-el-create - seq pattern - (funcall stack-createfun - (trie--node-subtree node) reverse) - idx group-stack groups) - store) - ;; otherwise, push node stack back onto stack - (push (trie--wildcard-stack-el-create - seq pattern node idx group-stack groups) - store)) - ;; get node stack - (let ((stack (trie--wildcard-stack-el-node (car store)))) - ;; get first non-data node from wildcard node stack - (setq node (funcall stack-popfun stack)) - (when (and node (trie--node-data-p node)) - (setq node (funcall stack-popfun stack))) - ;; if wildcard node stack is exhausted, remove it from the stack - (when (funcall stack-emptyfun stack) - (setq store (cdr store))) - ;; push new non-data node onto the stack - (when node - (push - (trie--wildcard-stack-el-create - (trie--seq-append seq (trie--node-split node)) - (trie--wildcard-next-token pattern cmpfun) - node (1+ idx) group-stack groups) - store)))) - - ;; character alternative: push next matching node onto stack and - ;; repopulate again - ((trie--wildcard-char-alt-p token) - ;; push node back onto the stack - (push (trie--wildcard-stack-el-create - seq pattern node idx group-stack groups) - store) - (let ((c (pop token))) - (while (and c - (not (setq node - (funcall lookupfun - (trie--node-subtree node) - (trie--node-create-dummy c))))) - (setq c (pop token))) - ;; if we've exhausted all characters in the alternative, remove it - ;; from the stack - (when (null token) (setq store (cdr store))) - ;; if we found a match, push matching node onto stack - (when node - (push - (trie--wildcard-stack-el-create - (trie--seq-append seq (trie--node-split node)) - (trie--wildcard-next-token pattern cmpfun) - node (1+ idx) group-stack groups) - store)))) - - ;; negated character alternative: push next non-excluded node onto - ;; stack and repopulate again - ((trie--wildcard-neg-char-alt-p token) - ;; if we're starting a new negated character alternative, push a - ;; node stack onto the stack - (if (trie--node-p node) - (push (trie--wildcard-stack-el-create - seq pattern - (funcall stack-createfun - (trie--node-subtree node) reverse) - idx group-stack groups) - store) - ;; otherwise, push wildcard node stack back onto the stack - (push (trie--wildcard-stack-el-create - seq pattern node idx group-stack groups) - store)) - ;; get wildcard node stack - (let ((stack (trie--wildcard-stack-el-node (car store)))) - ;; pop nodes from wildcard node stack until we find one that - ;; isn't excluded - (setq node (funcall stack-popfun stack)) - (while (and node - (catch 'excluded - (dolist (c (butlast token)) ; drop final ^ - (when (eq (trie--node-split node) c) - (throw 'excluded t))))) - (setq node (funcall stack-popfun stack))) - ;; if wildcard node stack is exhausted, remove it from the stack - (when (funcall stack-emptyfun stack) - (setq store (cdr store))) - ;; if we found match, push node onto stack - (when node - (push - (trie--wildcard-stack-el-create - (trie--seq-append seq (trie--node-split node)) - (trie--wildcard-next-token pattern cmpfun) - node (1+ idx) group-stack groups) - store))))) - - ))) ; end of infinite loop and catches - store) ; return repopulated store - - - - -;; ================================================================ -;; Regexp search + ;; STORE, until a data node is reached. REVERSE is the usual query argument, + ;; and the remaining arguments are the corresponding trie functions. + (let (state seq node pos groups n s) + (while + (progn + (setq pos (pop store) + seq (nth 0 pos) + node (nth 1 pos) + state (nth 2 pos) + pos (nth 3 pos)) + (cond + ;; if stack is empty, we're done + ((null node) nil) + + ;; if stack element is a trie node... + ((trie--node-p node) + (cond + ;; matching data node: add data to the stack and we're done + ((trie--node-data-p node) + (when (tNFA-match-p state) + (setq groups (tNFA-group-data state)) + (push (cons (if groups (cons groups seq) seq) + (trie--node-data node)) + store)) + nil) ; return nil to exit loop + + ;; wildcard transition: add new node stack + ((tNFA-wildcard-p state) + (push (list seq + (funcall stack-createfun + (trie--node-subtree node) reverse) + state pos) + store)) + + (t ;; non-wildcard transition: add all possible next nodes + (dolist (chr (sort (tNFA-transitions state) + (if reverse + comparison-function + `(lambda (a b) + (,comparison-function b a))))) + (when (and (setq n (trie--node-find + node (vector chr) lookupfun)) + (setq s (tNFA-next-state state chr pos))) + (push (list (trie--seq-append seq chr) n s (1+ pos)) + store))) + t))) ; return t to keep looping + + ;; otherwise, stack element is a node stack... + (t + ;; if node stack is empty, dump it and keep repopulating + (if (funcall stack-emptyfun node) + t ; return t to keep looping + ;; otherwise, add node stack back, and add next node from stack + (push (list seq node state pos) store) + (setq node (funcall stack-popfun node) + state (tNFA-next-state state (trie--node-split node) pos)) + (when state + ;; matching data node: add data to the stack and we're done + (if (trie--node-data-p node) + (progn + (push (cons seq (trie--node-data node)) store) + nil) ; return nil to exit loop + ;; normal node: add it to the stack and keep repopulating + (push (list (trie--seq-append seq (trie--node-split node)) + node state (1+ pos)) + store))))) + )))) + store)