branch: externals/parser-generator commit f648b52bbc7124eadf0a79a8511df2ec9f015e3d Author: Christian Johansson <christ...@cvj.se> Commit: Christian Johansson <christ...@cvj.se>
Passing first unit test for FIRST after new data-structure refactor --- parser.el | 192 ++++++++++++++++++++++++++++------------------------ test/parser-test.el | 9 ++- 2 files changed, 106 insertions(+), 95 deletions(-) diff --git a/parser.el b/parser.el index 074b0c4..1ca0e99 100644 --- a/parser.el +++ b/parser.el @@ -256,7 +256,6 @@ (setq is-valid nil)))) (t (setq is-valid nil))) (setq rhs-index (1+ rhs-index))))))) - is-valid)) (defun parser--valid-sentential-form-p (symbols) @@ -268,11 +267,19 @@ is-valid (< symbol-index symbols-length)) (let ((symbol (nth symbol-index symbols))) - (unless (or - (parser--valid-e-p symbol) - (parser--valid-non-terminal-p symbol) - (parser--valid-terminal-p symbol)) - (setq is-valid nil))))) + (unless (parser--valid-symbol-p symbol) + (setq is-valid nil))) + (setq symbol-index (1+ symbol-index)))) + is-valid)) + +(defun parser--valid-symbol-p (symbol) + "Return whether SYMBOL is valid or not." + (let ((is-valid t)) + (unless (or + (parser--valid-e-p symbol) + (parser--valid-non-terminal-p symbol) + (parser--valid-terminal-p symbol)) + (setq is-valid nil)) is-valid)) (defun parser--valid-terminal-p (symbol) @@ -295,6 +302,8 @@ ;; p. 358 (defun parser--f-set (input-tape state stack) "A deterministic push-down transducer (DPDT) for building F-sets from INPUT-TAPE, STATE and STACK." + (unless (listp input-tape) + (setq input-tape (list input-tape))) (parser--debug (message "(parser--f-set)") (message "input-tape: %s" input-tape) @@ -336,7 +345,7 @@ e-first-p (< input-tape-index input-tape-length)) (parser--debug (message "Disregarding empty first terminal")) - (setq leading-terminals "")) + (setq leading-terminals nil)) (let ((leading-terminals-count (length leading-terminals))) (parser--debug (message "leading-terminals-count: %s" leading-terminals-count)) @@ -419,98 +428,101 @@ (setq leading-terminals-count (1+ leading-terminals-count)))))) (setq input-tape-index (1+ input-tape-index))) (when (> leading-terminals-count 0) + (unless (listp leading-terminals) + (setq leading-terminals (list leading-terminals))) (push leading-terminals f-set)))))) f-set)) ;; Algorithm 5.5, p. 357 (defun parser--first (β &optional disallow-e-first) "For sentential-form Β, in grammar, calculate first k terminals, optionally DISALLOW-E-FIRST." + (unless (listp β) + (setq β (list β))) (unless (parser--valid-sentential-form-p β) (error "Invalid sentential form β!")) - (let* ((productions (parser--get-grammar-productions)) - (k parser--look-ahead-number) - (i-max (length productions))) - - ;; Generate F-sets only once per grammar - (unless parser--f-sets - (let ((f-sets (make-hash-table :test 'equal)) - (i 0)) - (while (< i i-max) - (parser--debug (message "i = %s" i)) - (let ((f-set (make-hash-table :test 'equal))) - - ;; Iterate all productions, set F_i - (dolist (p productions) - (let ((production-lhs (car p)) - (production-rhs (cdr p))) - (parser--debug - (message "Production-LHS: %s" production-lhs) - (message "Production-RHS: %s" production-rhs)) - - ;; Iterate all blocks in RHS - (let ((f-p-set)) - (dolist (rhs-p production-rhs) - (let ((rhs-string rhs-p)) - (let ((rhs-leading-terminals - (parser--f-set rhs-string `(,k ,i ,f-sets ,disallow-e-first) '(("" t 0))))) - (parser--debug - (message "Leading %d terminals at index %s (%s) -> %s = %s" k i production-lhs rhs-string rhs-leading-terminals)) - (when rhs-leading-terminals - (when (and - (listp rhs-leading-terminals) - (> (length rhs-leading-terminals) 0)) - (dolist (rhs-leading-terminals-string rhs-leading-terminals) - (when (and - (stringp rhs-leading-terminals-string) - (> (length rhs-leading-terminals-string) 0)) - (push rhs-leading-terminals-string f-p-set)))))))) - - ;; Make set distinct - (setq f-p-set (parser--distinct f-p-set)) + (let ((productions (parser--get-grammar-productions)) + (k parser--look-ahead-number)) + (let ((i-max (length productions))) + ;; Generate F-sets only once per grammar + (unless parser--f-sets + (let ((f-sets (make-hash-table :test 'equal)) + (i 0)) + (while (< i i-max) + (parser--debug (message "i = %s" i)) + (let ((f-set (make-hash-table :test 'equal))) + + ;; Iterate all productions, set F_i + (dolist (p productions) + (let ((production-lhs (car p)) + (production-rhs (cdr p))) (parser--debug - (message "F_%s_%s(%s) = %s" i k production-lhs f-p-set)) - (puthash production-lhs (nreverse f-p-set) f-set)))) - (puthash i f-set f-sets) - (setq i (+ i 1)))) - (setq parser--f-sets f-sets))) - - ;; Iterate each symbol in β using a PDA algorithm - (let ((state 'parsing) - (input-tape β) - (input-tape-length (length β)) - (stack '((0 0 nil))) - (first-list nil)) - (while stack - (let ((stack-topmost (pop stack))) - (let ((input-tape-index (car stack-topmost)) - (first-length (car (cdr stack-topmost))) - (first (car (cdr (cdr stack-topmost))))) - (while (and - (< input-tape-index input-tape-length) - (< first-length k)) - (let ((symbol (nth input-tape-index input-tape))) - (cond - ((parser--valid-terminal-p symbol) - (push symbol first) - (setq first-length (1+ first-length))) - ((parser--valid-non-terminal-p symbol) - (let ((symbol-f-set (sort (gethash symbol (gethash (1- i-max) parser--f-sets)) 'string<))) - (when (> (length symbol-f-set) 0) - ;; Handle this scenario here were a non-terminal can result in different FIRST sets - (let ((symbol-f-set-index 1) - (symbol-f-set-length (length symbol-f-set))) - (while (< symbol-f-set-index symbol-f-set-length) - (let ((symbol-f-set-element (nth symbol-f-set-index symbol-f-set))) - (let ((alternative-first-length (+ first-length (length symbol-f-set-element))) - (alternative-first (append first symbol-f-set-element)) - (alternative-tape-index (1+ input-tape-index))) - (push `(,alternative-tape-index ,alternative-first-length ,alternative-first) stack)))))) - (setq first-length (+ first-length (length (car symbol-f-set)))) - (setq first (append first (car symbol-f-set))))))) - (setq input-tape-index (1+ input-tape-index))) - (when (> first-length 0) - (push first first-list))))) - first-list))) + (message "Production: %s -> %s" production-lhs production-rhs)) + + ;; Iterate all blocks in RHS + (let ((f-p-set)) + (dolist (rhs-p production-rhs) + (let ((rhs-string rhs-p)) + (let ((rhs-leading-terminals + (parser--f-set rhs-string `(,k ,i ,f-sets ,disallow-e-first) '(("" t 0))))) + (parser--debug + (message "Leading %d terminals at index %s (%s) -> %s = %s" k i production-lhs rhs-string rhs-leading-terminals)) + (when rhs-leading-terminals + (when (and + (listp rhs-leading-terminals) + (> (length rhs-leading-terminals) 0)) + (dolist (rhs-leading-terminals-element rhs-leading-terminals) + (push rhs-leading-terminals-element f-p-set))))))) + + ;; Make set distinct + (setq f-p-set (parser--distinct f-p-set)) + (parser--debug + (message "F_%s_%s(%s) = %s" i k production-lhs f-p-set)) + (puthash production-lhs (nreverse f-p-set) f-set)))) + (puthash i f-set f-sets) + (setq i (+ i 1)))) + (setq parser--f-sets f-sets))) + + (parser--debug + (message "Generated F-sets")) + + ;; Iterate each symbol in β using a PDA algorithm + (let ((input-tape β) + (input-tape-length (length β)) + (stack '((0 0 nil))) + (first-list nil)) + (while stack + (let ((stack-topmost (pop stack))) + (parser--debug + (message "stack-topmost: %s" stack-topmost)) + (let ((input-tape-index (car stack-topmost)) + (first-length (car (cdr stack-topmost))) + (first (car (cdr (cdr stack-topmost))))) + (while (and + (< input-tape-index input-tape-length) + (< first-length k)) + (let ((symbol (nth input-tape-index input-tape))) + (cond + ((parser--valid-terminal-p symbol) + (push symbol first) + (setq first-length (1+ first-length))) + ((parser--valid-non-terminal-p symbol) + (let ((symbol-f-set (sort (gethash symbol (gethash (1- i-max) parser--f-sets)) 'string<))) + (when (> (length symbol-f-set) 1) + ;; Handle this scenario here were a non-terminal can result in different FIRST sets + (let ((symbol-f-set-index 1) + (symbol-f-set-length (length symbol-f-set))) + (while (< symbol-f-set-index symbol-f-set-length) + (let ((symbol-f-set-element (nth symbol-f-set-index symbol-f-set))) + (let ((alternative-first-length (+ first-length (length symbol-f-set-element))) + (alternative-first (append first symbol-f-set-element)) + (alternative-tape-index (1+ input-tape-index))) + (push `(,alternative-tape-index ,alternative-first-length ,alternative-first) stack)))))) + (setq first-length (+ first-length (length (car symbol-f-set)))) + (setq first (append first (car symbol-f-set))))))) + (setq input-tape-index (1+ input-tape-index))) + (when (> first-length 0) + (push first first-list))))) + first-list)))) (defun parser--v-set (y) "Calculate valid LRk-sets for the viable-prefix Y in grammar G with look-ahead K." diff --git a/test/parser-test.el b/test/parser-test.el index 7544cc8..3543e12 100644 --- a/test/parser-test.el +++ b/test/parser-test.el @@ -28,13 +28,11 @@ "Test `parser--first'." (message "Starting tests for (parser--first)") + (parser--set-grammar '((S) (a) ((S a)) S) 1) (should (equal - '(a) - (parser--first - 1 - 'S - '((S a))))) + '((a)) + (parser--first 'S))) (message "Passed first 1 with rudimentary grammar") (should @@ -295,6 +293,7 @@ (defun parser-test () "Run test." + ;; (setq debug-on-error t) (parser-test--valid-look-ahead-number-p) (parser-test--valid-production-p) (parser-test--valid-grammar-p)