branch: externals/parser-generator commit 2869417d78e8db6eeccf2387900b190f3a65ba62 Author: Christian Johansson <christ...@cvj.se> Commit: Christian Johansson <christ...@cvj.se>
Made new helper functions to make LL-parsing easier --- parser-generator-ll.el | 19 ++++---- parser-generator.el | 101 +++++++++++++++++++++++++++++++++--------- test/parser-generator-test.el | 77 ++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 29 deletions(-) diff --git a/parser-generator-ll.el b/parser-generator-ll.el index 95816fd0d7..5345ec0724 100644 --- a/parser-generator-ll.el +++ b/parser-generator-ll.el @@ -75,6 +75,9 @@ (nth 2 stack-item)) (first-rhs (parser-generator--first production-rhs nil t t)) + (satured-first-rhs + (parser-generator-generate-terminal-saturated-first-set + first-rhs)) (first-parent-follow (parser-generator--first parent-follow nil t t)) (look-aheads) @@ -84,6 +87,10 @@ (message "production-rhs: %S" production-rhs) (message "parent-follow: %S" parent-follow)) + ;; TODO Remove items in first-rhs that ends with the e-identifier + ;; TODO but only if it has other items that does not end with the e-identifier + ;; F('((a e) (a a))) = ((a a)) + (cond ((and first-rhs (not first-parent-follow)) @@ -91,24 +98,21 @@ look-aheads (parser-generator--merge-max-terminal-sets first-rhs - nil - t))) + nil))) ((and first-parent-follow (not first-rhs)) (setq look-aheads (parser-generator--merge-max-terminal-sets nil - first-parent-follow - t))) + first-parent-follow))) ((and first-rhs first-parent-follow) (setq look-aheads (parser-generator--merge-max-terminal-sets first-rhs - first-parent-follow - t))) + first-parent-follow))) (t (error "Unexpected empty FIRST for production: %S and parent-follow: %S" production @@ -412,8 +416,7 @@ (let ((merged-terminal-sets (parser-generator--merge-max-terminal-sets first-sub-symbol-rhs - first-local-follow-sets - t))) + first-local-follow-sets))) (parser-generator--debug (message "sub-symbol-rhs: %S" sub-symbol-rhs) (message "first-sub-symbol-rhs: %S" first-sub-symbol-rhs) diff --git a/parser-generator.el b/parser-generator.el index 05e637536f..28be004d80 100644 --- a/parser-generator.el +++ b/parser-generator.el @@ -1238,8 +1238,8 @@ look-ahead))) (nreverse look-ahead))) -(defun parser-generator--merge-max-terminal-sets (a b &optional abort-on-e-identifier) - "Calculate list of all lists of L1 (+) L2 which is a merge of all terminals in lists A combined with all terminals in lists B but with maximum length of the set look-ahead number, optionally ABORT-ON-E-IDENTIFIER." +(defun parser-generator--merge-max-terminal-sets (a b) + "Calculate list of all lists of L1 (+) L2 which is a merge of all terminals in lists A combined with all terminals in lists B but with maximum length of the set look-ahead number." (let ((a-length (length a)) (a-index 0) (b-length (length b)) @@ -1255,8 +1255,7 @@ ((merged-element (parser-generator--merge-max-terminals a-element - b-element - abort-on-e-identifier))) + b-element))) (if merged-lists (setq merged-lists @@ -1275,8 +1274,7 @@ ((merged-element (parser-generator--merge-max-terminals a-element - nil - abort-on-e-identifier))) + nil))) (if merged-lists (setq merged-lists @@ -1296,8 +1294,7 @@ ((merged-element (parser-generator--merge-max-terminals nil - b-element - abort-on-e-identifier))) + b-element))) (if merged-lists (setq merged-lists @@ -1320,8 +1317,8 @@ merged-lists)) ;; Lemma 5.1 p. 348 -(defun parser-generator--merge-max-terminals (a b &optional abort-on-e-identifier) - "Calculate L1 (+) L2 which is a merge of all terminals in A and B but with exactly length of the set look-ahead number, optionally ABORT-ON-E-IDENTIFIER." +(defun parser-generator--merge-max-terminals (a b) + "Calculate L1 (+) L2 which is a merge of all terminals in A and B but with exactly length of the set look-ahead number." (let ((k (max 1 parser-generator--look-ahead-number)) (merged) (merge-count 0) @@ -1330,31 +1327,24 @@ (a-length (length a)) (b-element) (b-index 0) - (b-length (length b)) - (continue t)) + (b-length (length b))) (while (and - continue (< a-index a-length) (< merge-count k)) (setq a-element (nth a-index a)) - (if (parser-generator--valid-e-p a-element) - (when abort-on-e-identifier - (setq continue nil)) + (unless (parser-generator--valid-e-p a-element) (push a-element merged) (setq merge-count (1+ merge-count))) (setq a-index (1+ a-index))) (while (and - continue (< b-index b-length) (< merge-count k)) (setq b-element (nth b-index b)) - (if (parser-generator--valid-e-p b-element) - (when abort-on-e-identifier - (setq continue nil)) + (unless (parser-generator--valid-e-p b-element) (push b-element merged) (setq merge-count (1+ merge-count))) (setq b-index (1+ b-index))) - (if (= merge-count k) + (if (> merge-count 0) (nreverse merged) nil))) @@ -2098,6 +2088,75 @@ (parser-generator--distinct follow-set))) follow-set)) +(defun parser-generator-generate-terminal-saturated-first-set (first-set) + "Generated a set from FIRST-SET with items that does not end with the e-identifier if there is alternative items that continues with terminals." + (let* ((max-terminal-count + (parser-generator-calculate-max-terminal-count + first-set)) + (saturated-list + (parser-generator-generate-sets-of-terminals + first-set + max-terminal-count))) + saturated-list)) + +(defun parser-generator-generate-sets-of-terminals (sets count) + "Generate set of terminals in sequence from SETS with COUNT." + (let ((sets-of-terminals)) + (dolist (set sets) + (let ((item-count (length set)) + (item-index 0) + (only-terminals t) + (terminal-count 0)) + (while (and + only-terminals + (< item-index item-count)) + (let ((item (nth item-index set))) + (if (parser-generator--valid-terminal-p item) + (setq + terminal-count + (1+ terminal-count)) + (setq + only-terminals + nil))) + (setq + item-index + (1+ item-index))) + (when (and + only-terminals + (= terminal-count count)) + (push + set + sets-of-terminals)))) + (reverse sets-of-terminals))) + +(defun parser-generator-calculate-max-terminal-count (sets) + "Calculate maximum number of terminals in sequence in SETS." + (let ((max-terminal-count 0)) + (dolist (set sets) + (let ((item-count (length set)) + (item-index 0) + (only-terminals t) + (terminal-count 0)) + (while (and + only-terminals + (< item-index item-count)) + (let ((item (nth item-index set))) + (if (parser-generator--valid-terminal-p item) + (setq + terminal-count + (1+ terminal-count)) + (setq + only-terminals + nil))) + (setq + item-index + (1+ item-index))) + (when (> terminal-count max-terminal-count) + (setq + max-terminal-count + terminal-count)))) + max-terminal-count)) + (provide 'parser-generator) diff --git a/test/parser-generator-test.el b/test/parser-generator-test.el index 931053399e..e159585bf5 100644 --- a/test/parser-generator-test.el +++ b/test/parser-generator-test.el @@ -1056,6 +1056,80 @@ (message "Passed tests for (parser-generator-test--generate-list-of-symbol)")) +(defun parser-generator-test--calculate-max-terminal-count () + "Test `parser-generator-calculate-max-terminal-count'." + (message "Starting tests for (parser-generator-calculate-max-terminal-count)") + + (parser-generator-set-look-ahead-number 1) + (parser-generator-set-grammar '((S A B) ("a" "b") ((S A) (S (B)) (B "a") (A "a") (A ("b" "a"))) S)) + (parser-generator-process-grammar) + + (should + (equal + (parser-generator-calculate-max-terminal-count + '(("a" "a") ("b") ("a" e "b" "c") (B "a" "b" "c"))) + 2)) + (should + (equal + (parser-generator-calculate-max-terminal-count + '(("a") ("b") ("a" e "b" "c") (B "a" "b" "c"))) + 1)) + + (message "Passed tests for (parser-generator-calculate-max-terminal-count)")) + +(defun parser-generator-test--generate-sets-of-terminals () + "Test `parser-generator--generate-sets-of-terminals'." + (message "Starting tests for (parser-generator--generate-sets-of-terminals)") + + (parser-generator-set-look-ahead-number 1) + (parser-generator-set-grammar '((S A B) ("a" "b") ((S A) (S (B)) (B "a") (A "a") (A ("b" "a"))) S)) + (parser-generator-process-grammar) + + ;; TODO Test here + (should + (equal + (parser-generator-generate-sets-of-terminals + '(("a" "a") ("b") ("b" "a") ("a" "b" "a") ("a" e S "b" "a") ("b" "b" A)) + 2) + '(("a" "a") ("b" "a")))) + + (should + (equal + (parser-generator-generate-sets-of-terminals + '(("a" "a") ("b") ("b" "a") ("a" "b" "a") ("a" e S "b" "a") ("b" "b" A)) + 1) + '(("b")))) + + (should + (equal + (parser-generator-generate-sets-of-terminals + '(("a" "a") ("b") ("b" "a") ("a" "b" "a") ("a" e S "b" "a") ("b" "b" A)) + 3) + '(("a" "b" "a")))) + + (message "Passed tests for (parser-generator--generate-sets-of-terminals)")) + +(defun parser-generator-test--generate-terminal-saturated-first-set () + "Test `parser-generator-generate-terminal-saturated-first-set'." + (message "Starting tests for (parser-generator-generate-terminal-saturated-first-set)") + + (parser-generator-set-look-ahead-number 1) + (parser-generator-set-grammar '((S A B) ("a" "b") ((S A) (S (B)) (B "a") (A "a") (A ("b" "a"))) S)) + (parser-generator-process-grammar) + + (should + (equal + (parser-generator-generate-terminal-saturated-first-set + '(("a" "b") ("a" "a" e) ("b") ("a" e))) + '(("a" "b")))) + (should + (equal + (parser-generator-generate-terminal-saturated-first-set + '(("a" "b") ("a" "a" e) ("b" "b") ("a" e))) + '(("a" "b") ("b" "b")))) + + (message "Passed tests for (parser-generator-generate-terminal-saturated-first-set)")) + (defun parser-generator-test () "Run test." ;; (setq debug-on-error t) @@ -1079,6 +1153,9 @@ (parser-generator-test--valid-sentential-form-p) (parser-generator-test--valid-terminal-p) (parser-generator-test--generate-f-sets) + (parser-generator-test--calculate-max-terminal-count) + (parser-generator-test--generate-sets-of-terminals) + (parser-generator-test--generate-terminal-saturated-first-set) ;; Algorithms (parser-generator-test--first)