branch: externals/parser-generator
commit f648b52bbc7124eadf0a79a8511df2ec9f015e3d
Author: Christian Johansson <christ...@cvj.se>
Commit: Christian Johansson <christ...@cvj.se>

    Passing first unit test for FIRST after new data-structure refactor
---
 parser.el           | 192 ++++++++++++++++++++++++++++------------------------
 test/parser-test.el |   9 ++-
 2 files changed, 106 insertions(+), 95 deletions(-)

diff --git a/parser.el b/parser.el
index 074b0c4..1ca0e99 100644
--- a/parser.el
+++ b/parser.el
@@ -256,7 +256,6 @@
                     (setq is-valid nil))))
                (t (setq is-valid nil)))
               (setq rhs-index (1+ rhs-index)))))))
-
     is-valid))
 
 (defun parser--valid-sentential-form-p (symbols)
@@ -268,11 +267,19 @@
               is-valid
               (< symbol-index symbols-length))
         (let ((symbol (nth symbol-index symbols)))
-          (unless (or
-                   (parser--valid-e-p symbol)
-                   (parser--valid-non-terminal-p symbol)
-                   (parser--valid-terminal-p symbol))
-            (setq is-valid nil)))))
+          (unless (parser--valid-symbol-p symbol)
+            (setq is-valid nil)))
+        (setq symbol-index (1+ symbol-index))))
+    is-valid))
+
+(defun parser--valid-symbol-p (symbol)
+  "Return whether SYMBOL is valid or not."
+  (let ((is-valid t))
+    (unless (or
+             (parser--valid-e-p symbol)
+             (parser--valid-non-terminal-p symbol)
+             (parser--valid-terminal-p symbol))
+      (setq is-valid nil))
     is-valid))
 
 (defun parser--valid-terminal-p (symbol)
@@ -295,6 +302,8 @@
 ;; p. 358
 (defun parser--f-set (input-tape state stack)
   "A deterministic push-down transducer (DPDT) for building F-sets from 
INPUT-TAPE, STATE and STACK."
+  (unless (listp input-tape)
+    (setq input-tape (list input-tape)))
   (parser--debug
    (message "(parser--f-set)")
    (message "input-tape: %s" input-tape)
@@ -336,7 +345,7 @@
                  e-first-p
                  (< input-tape-index input-tape-length))
             (parser--debug (message "Disregarding empty first terminal"))
-            (setq leading-terminals ""))
+            (setq leading-terminals nil))
 
           (let ((leading-terminals-count (length leading-terminals)))
             (parser--debug (message "leading-terminals-count: %s" 
leading-terminals-count))
@@ -419,98 +428,101 @@
                     (setq leading-terminals-count (1+ 
leading-terminals-count))))))
               (setq input-tape-index (1+ input-tape-index)))
             (when (> leading-terminals-count 0)
+              (unless (listp leading-terminals)
+                (setq leading-terminals (list leading-terminals)))
               (push leading-terminals f-set))))))
     f-set))
 
 ;; Algorithm 5.5, p. 357
 (defun parser--first (β &optional disallow-e-first)
   "For sentential-form Β, in grammar, calculate first k terminals, optionally 
DISALLOW-E-FIRST."
+  (unless (listp β)
+    (setq β (list β)))
   (unless (parser--valid-sentential-form-p β)
     (error "Invalid sentential form β!"))
-  (let* ((productions (parser--get-grammar-productions))
-         (k parser--look-ahead-number)
-         (i-max (length productions)))
-
-    ;; Generate F-sets only once per grammar
-    (unless parser--f-sets
-      (let ((f-sets (make-hash-table :test 'equal))
-            (i 0))
-        (while (< i i-max)
-          (parser--debug (message "i = %s" i))
-          (let ((f-set (make-hash-table :test 'equal)))
-
-            ;; Iterate all productions, set F_i
-            (dolist (p productions)
-              (let ((production-lhs (car p))
-                    (production-rhs (cdr p)))
-                (parser--debug
-                 (message "Production-LHS: %s" production-lhs)
-                 (message "Production-RHS: %s" production-rhs))
-
-                ;; Iterate all blocks in RHS
-                (let ((f-p-set))
-                  (dolist (rhs-p production-rhs)
-                    (let ((rhs-string rhs-p))
-                      (let ((rhs-leading-terminals
-                             (parser--f-set rhs-string `(,k ,i ,f-sets 
,disallow-e-first) '(("" t 0)))))
-                        (parser--debug
-                         (message "Leading %d terminals at index %s (%s) -> %s 
= %s" k i production-lhs rhs-string rhs-leading-terminals))
-                        (when rhs-leading-terminals
-                          (when (and
-                                 (listp rhs-leading-terminals)
-                                 (> (length rhs-leading-terminals) 0))
-                            (dolist (rhs-leading-terminals-string 
rhs-leading-terminals)
-                              (when (and
-                                     (stringp rhs-leading-terminals-string)
-                                     (> (length rhs-leading-terminals-string) 
0))
-                                (push rhs-leading-terminals-string 
f-p-set))))))))
-
-                  ;; Make set distinct
-                  (setq f-p-set (parser--distinct f-p-set))
+  (let ((productions (parser--get-grammar-productions))
+        (k parser--look-ahead-number))
+    (let ((i-max (length productions)))
+      ;; Generate F-sets only once per grammar
+      (unless parser--f-sets
+        (let ((f-sets (make-hash-table :test 'equal))
+              (i 0))
+          (while (< i i-max)
+            (parser--debug (message "i = %s" i))
+            (let ((f-set (make-hash-table :test 'equal)))
+
+              ;; Iterate all productions, set F_i
+              (dolist (p productions)
+                (let ((production-lhs (car p))
+                      (production-rhs (cdr p)))
                   (parser--debug
-                   (message "F_%s_%s(%s) = %s" i k production-lhs f-p-set))
-                  (puthash production-lhs (nreverse f-p-set) f-set))))
-            (puthash i f-set f-sets)
-            (setq i (+ i 1))))
-        (setq parser--f-sets f-sets)))
-
-    ;; Iterate each symbol in β using a PDA algorithm
-    (let ((state 'parsing)
-          (input-tape β)
-          (input-tape-length (length β))
-          (stack '((0 0 nil)))
-          (first-list nil))
-      (while stack
-        (let ((stack-topmost (pop stack)))
-          (let ((input-tape-index (car stack-topmost))
-                (first-length (car (cdr stack-topmost)))
-                (first (car (cdr (cdr stack-topmost)))))
-            (while (and
-                    (< input-tape-index input-tape-length)
-                    (< first-length k))
-              (let ((symbol (nth input-tape-index input-tape)))
-                (cond
-                 ((parser--valid-terminal-p symbol)
-                  (push symbol first)
-                  (setq first-length (1+ first-length)))
-                 ((parser--valid-non-terminal-p symbol)
-                  (let ((symbol-f-set (sort (gethash symbol (gethash (1- 
i-max) parser--f-sets)) 'string<)))
-                    (when (> (length symbol-f-set) 0)
-                      ;; Handle this scenario here were a non-terminal can 
result in different FIRST sets
-                      (let ((symbol-f-set-index 1)
-                            (symbol-f-set-length (length symbol-f-set)))
-                        (while (< symbol-f-set-index symbol-f-set-length)
-                          (let ((symbol-f-set-element (nth symbol-f-set-index 
symbol-f-set)))
-                            (let ((alternative-first-length (+ first-length 
(length symbol-f-set-element)))
-                                  (alternative-first (append first 
symbol-f-set-element))
-                                  (alternative-tape-index (1+ 
input-tape-index)))
-                              (push `(,alternative-tape-index 
,alternative-first-length ,alternative-first) stack))))))
-                    (setq first-length (+ first-length (length (car 
symbol-f-set))))
-                    (setq first (append first (car symbol-f-set)))))))
-              (setq input-tape-index (1+ input-tape-index)))
-            (when (> first-length 0)
-              (push first first-list)))))
-      first-list)))
+                   (message "Production: %s -> %s" production-lhs 
production-rhs))
+
+                  ;; Iterate all blocks in RHS
+                  (let ((f-p-set))
+                    (dolist (rhs-p production-rhs)
+                      (let ((rhs-string rhs-p))
+                        (let ((rhs-leading-terminals
+                               (parser--f-set rhs-string `(,k ,i ,f-sets 
,disallow-e-first) '(("" t 0)))))
+                          (parser--debug
+                           (message "Leading %d terminals at index %s (%s) -> 
%s = %s" k i production-lhs rhs-string rhs-leading-terminals))
+                          (when rhs-leading-terminals
+                            (when (and
+                                   (listp rhs-leading-terminals)
+                                   (> (length rhs-leading-terminals) 0))
+                              (dolist (rhs-leading-terminals-element 
rhs-leading-terminals)
+                                (push rhs-leading-terminals-element 
f-p-set)))))))
+
+                    ;; Make set distinct
+                    (setq f-p-set (parser--distinct f-p-set))
+                    (parser--debug
+                     (message "F_%s_%s(%s) = %s" i k production-lhs f-p-set))
+                    (puthash production-lhs (nreverse f-p-set) f-set))))
+              (puthash i f-set f-sets)
+              (setq i (+ i 1))))
+          (setq parser--f-sets f-sets)))
+
+      (parser--debug
+       (message "Generated F-sets"))
+
+      ;; Iterate each symbol in β using a PDA algorithm
+      (let ((input-tape β)
+            (input-tape-length (length β))
+            (stack '((0 0 nil)))
+            (first-list nil))
+        (while stack
+          (let ((stack-topmost (pop stack)))
+            (parser--debug
+             (message "stack-topmost: %s" stack-topmost))
+            (let ((input-tape-index (car stack-topmost))
+                  (first-length (car (cdr stack-topmost)))
+                  (first (car (cdr (cdr stack-topmost)))))
+              (while (and
+                      (< input-tape-index input-tape-length)
+                      (< first-length k))
+                (let ((symbol (nth input-tape-index input-tape)))
+                  (cond
+                   ((parser--valid-terminal-p symbol)
+                    (push symbol first)
+                    (setq first-length (1+ first-length)))
+                   ((parser--valid-non-terminal-p symbol)
+                    (let ((symbol-f-set (sort (gethash symbol (gethash (1- 
i-max) parser--f-sets)) 'string<)))
+                      (when (> (length symbol-f-set) 1)
+                        ;; Handle this scenario here were a non-terminal can 
result in different FIRST sets
+                        (let ((symbol-f-set-index 1)
+                              (symbol-f-set-length (length symbol-f-set)))
+                          (while (< symbol-f-set-index symbol-f-set-length)
+                            (let ((symbol-f-set-element (nth 
symbol-f-set-index symbol-f-set)))
+                              (let ((alternative-first-length (+ first-length 
(length symbol-f-set-element)))
+                                    (alternative-first (append first 
symbol-f-set-element))
+                                    (alternative-tape-index (1+ 
input-tape-index)))
+                                (push `(,alternative-tape-index 
,alternative-first-length ,alternative-first) stack))))))
+                      (setq first-length (+ first-length (length (car 
symbol-f-set))))
+                      (setq first (append first (car symbol-f-set)))))))
+                (setq input-tape-index (1+ input-tape-index)))
+              (when (> first-length 0)
+                (push first first-list)))))
+        first-list))))
 
 (defun parser--v-set (y)
   "Calculate valid LRk-sets for the viable-prefix Y in grammar G with 
look-ahead K."
diff --git a/test/parser-test.el b/test/parser-test.el
index 7544cc8..3543e12 100644
--- a/test/parser-test.el
+++ b/test/parser-test.el
@@ -28,13 +28,11 @@
   "Test `parser--first'."
   (message "Starting tests for (parser--first)")
 
+  (parser--set-grammar '((S) (a) ((S a)) S) 1)
   (should
    (equal
-    '(a)
-    (parser--first
-     1
-     'S
-     '((S a)))))
+    '((a))
+    (parser--first 'S)))
   (message "Passed first 1 with rudimentary grammar")
 
   (should
@@ -295,6 +293,7 @@
 
 (defun parser-test ()
   "Run test."
+  ;; (setq debug-on-error t)
   (parser-test--valid-look-ahead-number-p)
   (parser-test--valid-production-p)
   (parser-test--valid-grammar-p)

Reply via email to