branch: externals/parser-generator commit cfc687a66263fda07ddad43365604e80f9ecef8d Author: Christian Johansson <christ...@cvj.se> Commit: Christian Johansson <christ...@cvj.se>
More work on refactoring lexer to handle states and using a buffer --- docs/Lexical-Analysis.md | 8 +- parser-generator-lex-analyzer.el | 259 ++++++++++++++++------------- test/parser-generator-lex-analyzer-test.el | 1 + 3 files changed, 144 insertions(+), 124 deletions(-) diff --git a/docs/Lexical-Analysis.md b/docs/Lexical-Analysis.md index ac17c11e00..842c19a02f 100644 --- a/docs/Lexical-Analysis.md +++ b/docs/Lexical-Analysis.md @@ -4,7 +4,7 @@ Set lexical analysis function by setting variable `parser-generator-lex-analyzer The lexical analysis is internally indexed on a local variable `parser-generator-lex-analyzer--index` and has it optional state in the local variable `parser-generation-lex-analyzer--state`. The initial values for the index and state can be set in variables `parser-generation-lex-analyzer--index-init` and `parser-generator-lex-analyzer--state-init`. -All parsers expect a list as response from lexical-analysis, the first item in the list should be a list of one or more tokens. The second is "move index"-flag, if it is non-nil it is expected to be a integer representing the index to move the lex-analyzer to and perform a new lex. Third item is the new index after the lex. The fourth item is the new state after the lex. +All parsers expect a list as response from lexical-analysis, the first item in the list should be a list of one or more tokens. The second is "move index"-flag, if it is non-nil it is expected to be a integer representing the index to temporarily move the index to and perform a new lex. Third item is not used. The fourth item is the new state after the lex. To enable exporting, the functions need to be specified in a way that the entire body is within the same block, do that using `(let)` or `(progn)` for example. @@ -21,7 +21,7 @@ To enable exporting, the functions need to be specified in a way that the entire (< (1- index) max-index)) (push (nth (1- index) string) tokens) (setq index (1+ index))) - (list tokens nil index nil)))) + (list tokens nil nil nil)))) ``` ## Token @@ -62,7 +62,7 @@ Returns the look-ahead number of next terminals in stream, if end of stream is r (setq new-index (cdr (cdr (nth (1- index) string)))) (push next-token tokens) (setq index (1+ index))) - (list (nreverse tokens) nil new-index nil)))) + (list (nreverse tokens) nil nil nil)))) (parser-generator-lex-analyzer--reset) (setq parser-generator--look-ahead-number 1) @@ -104,7 +104,7 @@ Returns the next token in stream and moves the lexical analyzer index one point (setq new-index (cdr (cdr (nth (1- index) string)))) (push (nth (1- index) string) tokens) (setq index (1+ index))) - (list (nreverse tokens) nil new-index nil)))) + (list (nreverse tokens) nil nil nil)))) (parser-generator-lex-analyzer--reset) (setq parser-generator--look-ahead-number 1) diff --git a/parser-generator-lex-analyzer.el b/parser-generator-lex-analyzer.el index 7edbf1df4f..f4ef62fa68 100644 --- a/parser-generator-lex-analyzer.el +++ b/parser-generator-lex-analyzer.el @@ -25,30 +25,45 @@ nil "Get next token like \='(a b . c) or nil, expects signal if input-tape is invalid.") -(defvar-local - parser-generator-lex-analyzer--index +(defvar + parser-generator-lex-analyzer--state-init nil - "Index in lex-analyzer.") + "Initial value of state.") + +(defvar + parser-generator-lex-analyzer--reset-function + nil + "Function used when resetting lex-analyzer.") (defvar parser-generator-lex-analyzer--index-init 1 "Initial value of index.") + +;;; Buffer-Local Variables: + + +(defvar-local + parser-generator-lex-analyzer--index + nil + "Index in lex-analyzer.") + (defvar-local parser-generator-lex-analyzer--state nil "State of lex-analyzer.") -(defvar - parser-generator-lex-analyzer--state-init +(defvar-local + parser-generator-lex-analyzer--state nil - "Initial value of state.") + "State of lex-analyzer.") -(defvar - parser-generator-lex-analyzer--reset-function +(defvar-local + parser-generator-lex-analyzer--buffered-response nil - "Function used when resetting lex-analyzer.") + "Buffered tokens of lex-analyzer.") + ;; Functions @@ -92,8 +107,6 @@ "Peek next look-ahead number of tokens via lex-analyzer." (unless parser-generator-lex-analyzer--index (error "Missing lex-analyzer index when peeking!")) - (unless parser-generator-lex-analyzer--function - (error "Missing lex-analyzer function when peeking!")) (unless parser-generator--look-ahead-number (error "Missing look-ahead-number when peeking!")) (let ((look-ahead) @@ -107,129 +120,69 @@ (max 1 parser-generator--look-ahead-number))) + (while (< look-ahead-length k) - (condition-case error - (progn - (let* ((result-list - (funcall - parser-generator-lex-analyzer--function - index - state)) - (token - (nth 0 result-list)) - (move-to-index-flag - (nth 1 result-list)) - (new-index - (nth 2 result-list)) - (new-state - (nth 3 result-list))) - (if move-to-index-flag - (progn - (setq - index - move-to-index-flag) - (setq - state - new-state)) - (if token - (progn - (setq index new-index) - (unless (listp (car token)) - (setq token (list token))) - (let ((token-count (length token)) - (token-index 0)) - (while - (and - (< - look-ahead-length - k) - (< - token-index - token-count)) - (let ((next-look-ahead-item - (nth token-index token))) - (push - next-look-ahead-item - look-ahead) - (setq - look-ahead-length - (1+ look-ahead-length)) - (setq - token-index - (1+ token-index)))))) - - ;; Fill up look-ahead with EOF-identifier if we found nothing - (push (list parser-generator--eof-identifier) look-ahead) - (setq look-ahead-length (1+ look-ahead-length)) - (setq index (1+ index)))))) - - (error - (error - "Lex-analyze failed to peek next look-ahead at %s, error: %s, look-ahead: %S" - index - error - look-ahead)))) + + (let* ((result-list + (parser-generator-lex-analyzer--get-buffered-lex + index + state)) + (token + (nth 0 result-list)) + (new-index + (nth 2 result-list))) + (if token + (progn + (push + token + look-ahead) + (setq + look-ahead-length + (1+ look-ahead-length)) + (setq + index + new-index)) + + ;; Fill up look-ahead with EOF-identifier if we found nothing + (push (list parser-generator--eof-identifier) look-ahead) + (setq look-ahead-length (1+ look-ahead-length)) + (setq index (1+ index))))) + (nreverse look-ahead))) (defun parser-generator-lex-analyzer--pop-token () "Pop next token via lex-analyzer." (unless parser-generator-lex-analyzer--index (error "Missing lex-analyzer index when popping!")) - (unless parser-generator-lex-analyzer--function - (error "Missing lex-analyzer function when popping!")) (unless parser-generator--look-ahead-number (error "Missing look-ahead-number when popping!")) - (let ((continue t) - (tokens)) - (while continue - (condition-case error - (progn - (let* ((result-list - (funcall - parser-generator-lex-analyzer--function - parser-generator-lex-analyzer--index - parser-generator-lex-analyzer--state)) - (token - (nth 0 result-list)) - (move-to-index-flag - (nth 1 result-list)) - (new-index - (nth 2 result-list)) - (new-state - (nth 3 result-list))) - (if move-to-index-flag - (progn - (setq-local - parser-generator-lex-analyzer--index - move-to-index-flag) - (setq-local - parser-generator-lex-analyzer--state - new-state)) - (setq - parser-generator-lex-analyzer--index - new-index) - (when token - (unless (listp (car token)) - (setq token (list token))) - (let ((first-token (car token))) - (push - first-token - tokens))) - (setq - continue - nil)))) - (error - (error - "Lex-analyze failed to pop token at %s %s, error: %s" - parser-generator-lex-analyzer--index - parser-generator-lex-analyzer--state - (car (cdr error)))))) - (nreverse tokens))) + (let* ((result-list + (parser-generator-lex-analyzer--get-buffered-lex + parser-generator-lex-analyzer--index + parser-generator-lex-analyzer--state)) + (token + (nth 0 result-list)) + (new-index + (nth 2 result-list)) + (new-state + (nth 3 result-list))) + (setq-local + parser-generator-lex-analyzer--index + new-index) + (setq-local + parser-generator-lex-analyzer--state + new-state) + (if token + (list token) + nil))) (defun parser-generator-lex-analyzer--reset () "Reset lex-analyzer." + (setq + parser-generator-lex-analyzer--buffered-response + (make-hash-table :test 'equal)) (setq parser-generator-lex-analyzer--index parser-generator-lex-analyzer--index-init) @@ -239,6 +192,72 @@ (when parser-generator-lex-analyzer--reset-function (funcall parser-generator-lex-analyzer--reset-function))) +(defun parser-generator-lex-analyzer--get-buffered-lex (index state) + "Get next token in stream, use buffer to only call function when needed." + + (unless (gethash + index + parser-generator-lex-analyzer--buffered-response) + (let ((continue t) + (tmp-index index) + (tmp-state state)) + (unless parser-generator-lex-analyzer--function + (error "Missing lex-analyzer function!")) + (while continue + (condition-case error + (progn + (let* ((result-list + (funcall + parser-generator-lex-analyzer--function + tmp-index + tmp-state)) + (tokens + (nth 0 result-list)) + (move-to-index-flag + (nth 1 result-list)) + (new-state + (nth 3 result-list))) + (if move-to-index-flag + (progn + (setq + tmp-index + move-to-index-flag) + (setq + tmp-state + new-state)) + + (if tokens + + (unless (listp (car tokens)) + (setq tokens (list tokens))) + + ;; Fill up look-ahead with EOF-identifier if we found nothing + (push + (list parser-generator--eof-identifier) + tokens)) + + (dolist (token tokens) + (let ((token-start (car (cdr token))) + (token-end (cdr (cdr token)))) + (puthash + token-start + (list token nil token-end new-state) + parser-generator-lex-analyzer--buffered-response))) + + (setq + continue + nil)))) + (error + (error + "Lex-analyze failed to get next token at: %s in state: %s, error: %s" + index + state + (car (cdr error)))))))) + + (gethash + index + parser-generator-lex-analyzer--buffered-response)) + (provide 'parser-generator-lex-analyzer) diff --git a/test/parser-generator-lex-analyzer-test.el b/test/parser-generator-lex-analyzer-test.el index 44ee9a7ad3..43bbd7a89a 100644 --- a/test/parser-generator-lex-analyzer-test.el +++ b/test/parser-generator-lex-analyzer-test.el @@ -86,6 +86,7 @@ (setq index (1+ index))) (list (nreverse tokens) nil new-index nil)))) + (parser-generator-lex-analyzer--reset) (should-error (parser-generator-lex-analyzer--peek-next-look-ahead))