branch: externals/parser-generator commit b8faa171a03077510df95c1eaa81d907a84f5fa5 Author: Christian Johansson <christ...@cvj.se> Commit: Christian Johansson <christ...@cvj.se>
FIRSTk and EFFk working --- Makefile | 20 +++++ README.md | 25 ++++++- emacs-parser.el | 0 parser.el | 207 ++++++++++++++++++++++++++++++++++++++++++++++++++++ test/parser-test.el | 197 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 448 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e69de29..2c072f5 100644 --- a/Makefile +++ b/Makefile @@ -0,0 +1,20 @@ +EMACS = emacs +ifdef emacs + EMACS = $(emacs) +endif +EMACS_CMD := $(EMACS) -Q -batch -L . -L test/ + +EL := parser.el test/parser-test.el +ELC := $(EL:.el=.elc) + +.PHONY: clean +clean: + rm -f $(ELC) + +.PHONY: compile +compile: + $(EMACS_CMD) -f batch-byte-compile $(EL) + +.PHONY: test +test: + $(EMACS_CMD) -l test/parser-test.el -f "parser-test" diff --git a/README.md b/README.md index bb83076..d46d0a8 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,27 @@ # Emacs Parser -The idea of this plugin is to provide functions for various kinds of parsing. +The idea of this plugin is to provide functions for various kinds of context-free grammar parsing. This project is about implementing algorithms described in the book `The Theory of Parsing, Translation and Compiling (Volume 1)`. +## Lexical Analysis + +We use a regular-language based lexical analyzer that can be implemented by a finite-state-machine (FSM). + +WIP + +## Syntax Analysis / Parsing + +We use a deterministic push down transducer (DPDT) based algorithms. + +### Top-down +#### With backtracking +#### Without backtracking +### Bottom-up +#### With backtracking +#### Without backtracking +##### LL(k) +##### LR(k) +##### LALR(k) + +## Test + +Run in terminal `make clean && make test` diff --git a/emacs-parser.el b/emacs-parser.el deleted file mode 100644 index e69de29..0000000 diff --git a/parser.el b/parser.el new file mode 100644 index 0000000..a6af40c --- /dev/null +++ b/parser.el @@ -0,0 +1,207 @@ +;;; parser.el --- LR(k) Parser -*- lexical-binding: t -*- + + +;;; Commentary: + + +;;; Code: + +(defvar parser--debug nil) + + +;; page 402 + +(defmacro parser--debug (&rest message) + "Output MESSAGE but only if debug is enabled." + `(when parser--debug + ,@message)) + +(defun parser--distinct (elements) + "Return distinct of ELEMENTS." + (let ((processed (make-hash-table :test 'equal)) + (new-elements)) + (dolist (element elements) + (unless (gethash element processed) + (puthash element t processed) + (push element new-elements))) + (nreverse new-elements))) + +;; page 377, Algorithm 5.5 +(defun parser--first (k production productions &optional disallow-empty-first) + "Calculate first K tokens of PRODUCTION in PRODUCTIONS, optionally DISALLOW-EMPTY-FIRST." + (let ((f-sets (make-hash-table :test 'equal)) + (i 0) + (i-max (length productions))) + (while (< i i-max) + (parser--debug (message "i = %s" i)) + (let ((f-set (make-hash-table :test 'equal))) + + ;; Iterate all productions, set F_i + (dolist (p productions) + (let ((production-lhs (symbol-name (car p))) + (production-rhs (cdr p))) + (parser--debug + (message "Production-LHS: %s" production-lhs) + (message "Production-RHS: %s" production-rhs)) + + ;; Iterate all blocks in RHS + (let ((f-p-set)) + (dolist (rhs-p production-rhs) + (let ((rhs-string (symbol-name rhs-p))) + (let ((rhs-leading-terminals + (parser--f-set rhs-string `(,k ,i ,f-sets ,disallow-empty-first) '(("" t 0))))) + (parser--debug + (message "Leading %d terminals at index %s (%s) -> %s = %s" k i production-lhs rhs-string rhs-leading-terminals)) + (when rhs-leading-terminals + (when (and + (listp rhs-leading-terminals) + (> (length rhs-leading-terminals) 0)) + (dolist (rhs-leading-terminals-string rhs-leading-terminals) + (when (and + (stringp rhs-leading-terminals-string) + (> (length rhs-leading-terminals-string) 0)) + (push rhs-leading-terminals-string f-p-set)))))))) + + ;; Make set distinct + (setq f-p-set (parser--distinct f-p-set)) + (parser--debug + (message "F_%s_%s(%s) = %s" i k production-lhs f-p-set)) + (puthash production-lhs (nreverse f-p-set) f-set)))) + (puthash i f-set f-sets) + (setq i (+ i 1)))) + (sort (gethash (symbol-name production) (gethash (1- i-max) f-sets)) 'string<))) + +(defun parser--f-set (input-tape state stack) + "A deterministic push-down transducer (DPDT) for building F-sets from INPUT-TAPE, STATE and STACK." + (parser--debug + (message "(parser--f-set)") + (message "input-tape: %s" input-tape) + (message "state: %s" state) + (message "stack: %s" stack)) + + (let ((f-set) + (input-tape-length (length input-tape)) + (k (nth 0 state)) + (i (nth 1 state)) + (f-sets (nth 2 state)) + (disallow-empty-first (nth 3 state))) + (parser--debug + (message "input-tape-length: %s" input-tape-length) + (message "k: %s" k) + (message "i: %s" i)) + (while stack + (let ((stack-symbol (pop stack))) + (parser--debug + (message "Stack-symbol: %s" stack-symbol)) + (let ((leading-terminals (nth 0 stack-symbol)) + (all-leading-terminals-p (nth 1 stack-symbol)) + (input-tape-index (nth 2 stack-symbol)) + (empty-first-p nil)) + (parser--debug + (message "leading-terminals: %s" leading-terminals) + (message "all-leading-terminals-p: %s" all-leading-terminals-p) + (message "input-tape-index: %s" input-tape-index)) + + ;; Flag whether leading-terminal is empty or not + (when (string= leading-terminals "e") + (setq empty-first-p t)) + + (parser--debug (message "empty-first-p: %s" empty-first-p)) + + ;; If leading terminal is empty and we have input-tape left, disregard it + (when (and + (not disallow-empty-first) + empty-first-p + (< input-tape-index input-tape-length)) + (parser--debug (message "Disregarding empty first terminal")) + (setq leading-terminals "")) + + (let ((leading-terminals-count (length leading-terminals))) + (parser--debug (message "leading-terminals-count: %s" leading-terminals-count)) + (while (and + (< input-tape-index input-tape-length) + (< leading-terminals-count k) + all-leading-terminals-p) + (let ((rhs-element (substring input-tape input-tape-index (1+ input-tape-index))) + (rhs-type)) + (parser--debug (message "rhs-element: %s" rhs-element)) + + ;; Determine symbol type + (if (string= rhs-element (upcase rhs-element)) + (setq rhs-type 'NON-TERMINAL) + (if (string= rhs-element "e") + (setq rhs-type 'EMPTY) + (setq rhs-type 'TERMINAL))) + (parser--debug (message "rhs-type: %s" rhs-type)) + + (cond + + ((equal rhs-type 'NON-TERMINAL) + (if (> i 0) + (let ((sub-terminal-sets (gethash rhs-element (gethash (1- i) f-sets)))) + (if sub-terminal-sets + (progn + (parser--debug + (message "Sub-terminal-sets F_%s_%s(%s) = %s (%d)" (1- i) k rhs-element sub-terminal-sets (length sub-terminal-sets))) + (let ((sub-terminal-set (car sub-terminal-sets))) + + (unless (= (length sub-terminal-sets) 1) + ;; Should branch off here, each unique permutation should be included in set + ;; Follow first alternative in this scope but follow the rest in separate scopes + (let ((sub-terminal-index 0)) + (dolist (sub-terminal-set sub-terminal-sets) + (unless (= sub-terminal-index 0) + + ;; When we have a leading terminal and sub-terminal set is empty, don't append it + (when (and + (> leading-terminals-count 0) + (string= sub-terminal-set "e")) + (setq sub-terminal-set "")) + + (let ((sub-rhs-leading-terminals (concat leading-terminals sub-terminal-set))) + (when (> (length sub-rhs-leading-terminals) k) + (setq sub-rhs-leading-terminals (substring sub-rhs-leading-terminals 0 k))) + (push `(,sub-rhs-leading-terminals ,all-leading-terminals-p ,(1+ input-tape-index)) stack))) + (setq sub-terminal-index (1+ sub-terminal-index))))) + + (parser--debug (message "Sub-terminal-set: %s" sub-terminal-set)) + (when (or + (not (string= sub-terminal-set "e")) + (= input-tape-index (1- input-tape-length))) + (setq leading-terminals (concat leading-terminals sub-terminal-set)) + (setq leading-terminals-count (+ leading-terminals-count (length sub-terminal-set))) + (when (> leading-terminals-count k) + (setq leading-terminals (substring leading-terminals 0 k)) + (setq leading-terminals-count k))))) + (parser--debug + (message "Found no subsets for %s %s" rhs-element (1- i))))) + (setq all-leading-terminals-p nil))) + + ((equal rhs-type 'EMPTY) + (if all-leading-terminals-p + (if disallow-empty-first + (when (= leading-terminals-count 0) + (setq all-leading-terminals-p nil)) + (when (and + (= leading-terminals-count 0) + (= input-tape-index (1- input-tape-length))) + (setq leading-terminals (concat leading-terminals rhs-element)) + (setq leading-terminals-count (1+ leading-terminals-count)))) + (setq all-leading-terminals-p nil))) + + ((equal rhs-type 'TERMINAL) + (when all-leading-terminals-p + (setq leading-terminals (concat leading-terminals rhs-element)) + (setq leading-terminals-count (1+ leading-terminals-count)))))) + (setq input-tape-index (1+ input-tape-index))) + (when (> leading-terminals-count 0) + (push leading-terminals f-set)))))) + f-set)) + +(defun parser--empty-free-first (k production productions) + "Calculate empty-free-first K tokens of PRODUCTION in PRODUCTIONS." + (parser--first k production productions t)) + +(provide 'parser) + +;;; parser.el ends here diff --git a/test/parser-test.el b/test/parser-test.el new file mode 100644 index 0000000..b96a219 --- /dev/null +++ b/test/parser-test.el @@ -0,0 +1,197 @@ +;;; parser-test.el --- Tests for parser -*- lexical-binding: t -*- + + +;;; Commentary: + + +;;; Code: + +(require 'parser) +(require 'ert) + +(defun parser-test--first () + "Test `parser--first'." + + (should + (equal + '("a") + (parser--first + 1 + 'S + '( + (S a))))) + (message "Passed first 1 with rudimentary grammar") + + (should + (equal + '("ab") + (parser--first + 2 + 'S + '( + (S abc))))) + (message "Passed first 2 with rudimentary grammar") + + (should + (equal + '("abc") + (parser--first + 3 + 'S + '( + (S abc))))) + (message "Passed first 3 with rudimentary grammar") + + (should + (equal + '("b") + (parser--first + 1 + 'S + '( + (S A) + (A b))))) + (message "Passed first 1 with intermediate grammar") + + (should + (equal + '("ba") + (parser--first + 2 + 'S + '( + (S A) + (A ba))))) + (message "Passed first 2 with intermediate grammar") + + (should + (equal + '("bac") + (parser--first + 3 + 'S + '( + (S A) + (A bace))))) + (message "Passed first 3 with intermediate grammar") + + (should + (equal + '("c" "d") + (parser--first + 1 + 'S + '( + (S A) + (A B) + (B c d))))) + (message "Passed first 1 with semi-complex grammar") + + (should + (equal + '("cf" "da") + (parser--first + 2 + 'S + '( + (S Aa) + (A B) + (B cf d))))) + (message "Passed first 2 with semi-complex grammar") + + (should + (equal + '("cam" "dam") + (parser--first + 3 + 'S + '( + (S A) + (A Bam) + (B c d))))) + (message "Passed first 3 with semi-complex grammar") + + (should + (equal + '("a" "b" "c" "e") + (parser--first + 1 + 'S + '( + (S AB) + (A Ba e) + (B Cb C) + (C c e))))) + (message "Passed first 1 with complex grammar") + + ;; Example 5.28 p 402 + (should + (equal + '("a" "ab" "ac" "b" "ba" "c" "ca" "cb" "e") + (parser--first + 2 + 'S + '( + (S AB) + (A Ba e) + (B Cb C) + (C c e))))) + (message "Passed first 2 with complex grammar") + + (should + (equal + '("a" "ab" "ac" "acb" "b" "ba" "bab" "bac" "c" "ca" "cab" "cac" "cb" "cba" "e") + (parser--first + 3 + 'S + '( + (S AB) + (A Ba e) + (B Cb C) + (C c e))))) + (message "Passed first 3 with complex grammar") + + (message "Passed tests for (parser--first)")) + +;; Example 5.28 page 402 +(defun parser-test--empty-free-first () + "Test `parser--empty-free-first'." + + ;; Example 5.28 p 402 + (should + (equal + '("ca" "cb") + (parser--empty-free-first + 2 + 'S + '( + (S AB) + (A Ba e) + (B Cb C) + (C c e))))) + (message "Passed empty-free-first 2 with complex grammar") + + (message "Passed tests for (parser-test--empty-free-first)")) + +(defun parser-test--distinct () + "Test `parser--distinct'." + (should + (equal + '(a b c) + (parser--distinct '(a a b c)))) + + (should + (equal + '("aa" "b" "cc" "c" "a") + (parser--distinct '("aa" "b" "cc" "c" "b" "a" "aa")))) + (message "Passed tests for (parser--distinct)")) + +(defun parser-test () + "Run test." + (parser-test--distinct) + (parser-test--first) + (parser-test--empty-free-first)) + +(provide 'parser-test) + +;;; parser-test.el ends here