branch: externals/parser-generator commit 2829d36f4771897a8923583996c857caedcac652 Author: Christian Johansson <christ...@cvj.se> Commit: Christian Johansson <christ...@cvj.se>
More work on FOLLOW --- README.md | 6 ++- parser.el | 147 +++++++++++++++++++++++++++++++++++----------------- test/parser-test.el | 40 ++++++++++++-- 3 files changed, 139 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index 02ce116..fdbf223 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,10 @@ Grammar consists of `N`, `T`, `P` and `S`, where `N` is non-terminals, `T` is te (parser--set-grammar '((S A B C) (a b c) ((S (A B)) (A (B a) e) (B (C b) C) (C c e)) S)) ``` +### e + +The symbol `'e` is hard-coded to be the empty symbol. The symbol is allowed in some grammars and not in others. + ### Non-terminals A non-terminal is either a symbol or a string so `"A"` and `A` are equally valid. @@ -57,7 +61,7 @@ A terminal is either a symbol or a string so `"{"` and `A` are equally valid. ### Sentential-form -A list of one or more non-terminals and terminals, example `'(A "A" c ":")` +A list of one or more non-terminals and terminals, example `'(A "A" c ":")`, the e-symbol is allowed depending on grammar. ### Productions diff --git a/parser.el b/parser.el index 74db139..cb461b1 100644 --- a/parser.el +++ b/parser.el @@ -498,61 +498,112 @@ (message "Generated F-sets")) (let ((first-list nil)) - (if (> (length β) 1) - ;; Iterate each symbol in β using a PDA algorithm - (let ((input-tape β) - (input-tape-length (length β)) - (stack '((0 0 nil)))) - (while stack - (let ((stack-topmost (pop stack))) - (parser--debug - (message "stack-topmost: %s" stack-topmost)) - (let ((input-tape-index (car stack-topmost)) - (first-length (car (cdr stack-topmost))) - (first (car (cdr (cdr stack-topmost))))) - (while (and - (< input-tape-index input-tape-length) - (< first-length k)) - (let ((symbol (nth input-tape-index input-tape))) - (cond - ((parser--valid-terminal-p symbol) - (push symbol first) - (setq first-length (1+ first-length))) - ((parser--valid-non-terminal-p symbol) - (parser--debug - (message "non-terminal symbol: %s" symbol)) - (let ((symbol-f-set (gethash symbol (gethash (1- i-max) parser--f-sets)))) - (parser--debug - (message "symbol-f-set: %s" symbol-f-set)) - (when (> (length symbol-f-set) 1) - ;; Handle this scenario here were a non-terminal can result in different FIRST sets - (let ((symbol-f-set-index 1) - (symbol-f-set-length (length symbol-f-set))) - (while (< symbol-f-set-index symbol-f-set-length) - (let ((symbol-f-set-element (nth symbol-f-set-index symbol-f-set))) - (let ((alternative-first-length (+ first-length (length symbol-f-set-element))) - (alternative-first (append first symbol-f-set-element)) - (alternative-tape-index (1+ input-tape-index))) - (parser--debug - (message "alternative-first: %s" alternative-first)) - (push `(,alternative-tape-index ,alternative-first-length ,alternative-first) stack))) - (setq symbol-f-set-index (1+ symbol-f-set-index))))) - (parser--debug - (message "main-symbol-f-set: %s" (car symbol-f-set))) - (setq first-length (+ first-length (length (car symbol-f-set)))) - (setq first (append first (car symbol-f-set))))))) - (setq input-tape-index (1+ input-tape-index))) - (when (> first-length 0) - (push first first-list)))))) - (setq first-list (gethash (car β) (gethash (1- i-max) parser--f-sets)))) + ;; Iterate each symbol in β using a PDA algorithm + (let ((input-tape β) + (input-tape-length (length β)) + (stack '((0 0 nil)))) + (while stack + (let ((stack-topmost (pop stack))) + (parser--debug + (message "stack-topmost: %s" stack-topmost)) + (let ((input-tape-index (car stack-topmost)) + (first-length (car (cdr stack-topmost))) + (first (car (cdr (cdr stack-topmost))))) + (while (and + (< input-tape-index input-tape-length) + (< first-length k)) + (let ((symbol (nth input-tape-index input-tape))) + (cond + ((parser--valid-terminal-p symbol) + (push symbol first) + (setq first-length (1+ first-length))) + ((parser--valid-non-terminal-p symbol) + (parser--debug + (message "non-terminal symbol: %s" symbol)) + (let ((symbol-f-set (gethash symbol (gethash (1- i-max) parser--f-sets)))) + (parser--debug + (message "symbol-f-set: %s" symbol-f-set)) + (when (> (length symbol-f-set) 1) + ;; Handle this scenario here were a non-terminal can result in different FIRST sets + (let ((symbol-f-set-index 1) + (symbol-f-set-length (length symbol-f-set))) + (while (< symbol-f-set-index symbol-f-set-length) + (let ((symbol-f-set-element (nth symbol-f-set-index symbol-f-set))) + (let ((alternative-first-length (+ first-length (length symbol-f-set-element))) + (alternative-first (append first symbol-f-set-element)) + (alternative-tape-index (1+ input-tape-index))) + (parser--debug + (message "alternative-first: %s" alternative-first)) + (push `(,alternative-tape-index ,alternative-first-length ,alternative-first) stack))) + (setq symbol-f-set-index (1+ symbol-f-set-index))))) + (parser--debug + (message "main-symbol-f-set: %s" (car symbol-f-set))) + (setq first-length (+ first-length (length (car symbol-f-set)))) + (setq first (append first (car symbol-f-set))))))) + (setq input-tape-index (1+ input-tape-index))) + (when (> first-length 0) + (push first first-list)))))) first-list)))) +;; Definition p. 343, FOLLOW(β) = w, w is the set {w|β=>*aβy and w is in FIRST(y)} +(defun parser--follow (β) + "Calculate follow-set of B." + ;; Make sure argument is a list + (unless (listp β) + (setq β (list β))) + (let ((follow-set nil) + (match-length (length β))) + ;; Iterate all productions in grammar + (let ((productions (parser--get-grammar-productions)) + (k parser--look-ahead-number)) + (dolist (p productions) + ;; Iterate all RHS of every production + (let ((production-rhs (cdr p)) + (match-index 0)) + (dolist (rhs production-rhs) + + ;; Make sure RHS is a list + (unless (listp rhs) + (setq rhs (list rhs))) + + ;; Iterate every symbol in RHS + (let ((rhs-count (length rhs)) + (rhs-index 0)) + (while (< rhs-index rhs-count) + (let ((rhs-element (nth rhs-index rhs))) + + ;; Search for all symbols β in RHS + (if (eq rhs-element (nth match-index β)) + ;; Is symbols exists in RHS + (progn + (setq match-index (1+ match-index)) + (when (= match-index match-length) + (parser--debug + (message "found full follow hit: %s" β)) + (if (= rhs-index (1- rhs-count)) + ;; If rest of RHS is empty add e in follow-set + (push '(e) follow-set) + ;; Otherwise add FOLLOW(rest) to follow-set + (let ((rest (nthcdr (1+ rhs-index) rhs))) + (parser--debug + (message "rest: %s" rest)) + (let ((first-set (parser--first rest))) + (parser--debug + (message "rest-first-set: %s" first-set)) + (push first-set follow-set)))) + (setq match-index 0))) + (when (> match-index 0) + (setq match-index 0)))) + (setq rhs-index (1+ rhs-index)))))))) + (when (> (length follow-set) 0) + (setq follow-set (parser--distinct follow-set))) + follow-set)) + (defun parser--v-set (y) "Calculate valid LRk-sets for the viable-prefix Y in grammar G with look-ahead K." (let ((v-set)) (unless (parser--valid-grammar-p G) (error "Invalid grammar G!")) - v-set)) diff --git a/test/parser-test.el b/test/parser-test.el index 11f4915..92074e3 100644 --- a/test/parser-test.el +++ b/test/parser-test.el @@ -24,6 +24,28 @@ (parser--distinct '("aa" "b" "cc" "c" "b" "a" "aa")))) (message "Passed tests for (parser--distinct)")) +(defun parser-test--follow () + "Test `parser--follow'." + (message "Starting tests for (parser--follow)") + + (parser--set-grammar '((S A) (b) ((S A) (A b)) S)) + (parser--set-look-ahead-number 2) + (should + (equal + '((e)) + (parser--follow 'A))) + (message "Passed follow 1 with intermediate grammar") + + (parser--set-grammar '((S A B) (a c d f) ((S (A a)) (A B) (B (c f) d)) S)) + (parser--set-look-ahead-number 2) + (should + (equal + '((a)) + (parser--follow 'A))) + (message "Passed follow 2 with intermediate grammar") + + (message "Passed tests for (parser--follow)")) + (defun parser-test--first () "Test `parser--first'." (message "Starting tests for (parser--first)") @@ -52,6 +74,14 @@ (parser--first '(S a)))) (message "Passed first 1c with rudimentary grammar") + (parser--set-grammar '((S) (a) ((S a)) S)) + (parser--set-look-ahead-number 2) + (should + (equal + '((a)) + (parser--first '(a)))) + (message "Passed first 1d with rudimentary grammar") + (parser--set-grammar '((S) ("a" "b" "c") ((S ("a" "b" "c"))) S)) (parser--set-look-ahead-number 2) (should @@ -96,7 +126,7 @@ (parser--set-look-ahead-number 1) (should (equal - '(("c") ("d")) + '(("d") ("c")) (parser--first 'S))) (message "Passed first 1 with semi-complex grammar") @@ -104,7 +134,7 @@ (parser--set-look-ahead-number 2) (should (equal - '((c f) (d a)) + '((d a) (c f)) (parser--first 'S))) (message "Passed first 2 with semi-complex grammar") @@ -112,7 +142,7 @@ (parser--set-look-ahead-number 3) (should (equal - '(("c" "a" "m") ("d" "a" "m")) + '(("d" "a" "m") ("c" "a" "m")) (parser--first 'S))) (message "Passed first 3 with semi-complex grammar") @@ -120,7 +150,7 @@ (parser--set-look-ahead-number 1) (should (equal - '((a) (e) (c) (b) ) + '((e) (c) (b) (a)) (parser--first 'S))) (message "Passed first 1 with complex grammar") @@ -177,7 +207,6 @@ ;; (message "Passed tests for (parser-test--v-set)")) -;; TODO Re-implement this function (defun parser-test--valid-grammar-p () "Test function `parser--valid-grammar-p'." (message "Starting tests for (parser--valid-grammar-p)") @@ -284,6 +313,7 @@ (parser-test--valid-sentential-form-p) (parser-test--first) (parser-test--e-free-first) + (parser-test--follow) ;; (parser-test--v-set) )