branch: master commit a5ca813b59aa66f26c416004d2cf410f4e6048ba Author: Junpeng Qiu <qjpchm...@gmail.com> Commit: Junpeng Qiu <qjpchm...@gmail.com>
Full & simple parser --- csv-parser.el => full-csv-parser.el | 74 ++++++-------------- parsec.el | 131 +++++++++++++++++++++++------------ simple-csv-parser.el | 56 +++++++++++++++ 3 files changed, 166 insertions(+), 95 deletions(-) diff --git a/csv-parser.el b/full-csv-parser.el similarity index 53% rename from csv-parser.el rename to full-csv-parser.el index 5ead855..75225a4 100644 --- a/csv-parser.el +++ b/full-csv-parser.el @@ -1,4 +1,4 @@ -;;; csv-parser.el --- Sample csv parser using parsec.el -*- lexical-binding: t; -*- +;;; full-csv-parser.el --- Sample csv parser using parsec.el -*- lexical-binding: t; -*- ;; Copyright (C) 2016 Junpeng Qiu @@ -20,64 +20,37 @@ ;;; Commentary: -;; +;; Ref: http://book.realworldhaskell.org/read/using-parsec.html ;;; Code: - (defun csv-file () - (pl-many (csv-line))) + (pl-ensure + (pl-return (pl-endby (csv-line) (csv-eol)) + (pl-eob)))) (defun csv-line () - (prog1 (csv-cells) - (csv-eol))) - -(defun csv-eol () - (pl-or (pl-str "\n\r") - (pl-str "\r\n") - (pl-str "\n") - (pl-str "\r") - (pl-eob))) - -(defun csv-cells () - (cons (csv-cell-content) (csv-remaining-cells))) - -(defun csv-cell-content () - (pl-many-as-string (pl-re "[^,\n]"))) - -(defun csv-remaining-cells () - (pl-or (pl-and (pl-ch ?,) (csv-cells)) - nil)) - + (pl-sepby (csv-cell) (pl-ch ?,))) -(defun csv-file1 () - (pl-endby (csv-line1) (csv-eol))) - -(defun csv-line1 () - (pl-sepby (csv-cell2) (pl-ch ?,))) - -(defun csv-cell1 () - (pl-many-as-string (pl-re "[^,\r\n]"))) - -(defun csv-cell2 () - (pl-or (csv-quoted-cell) (pl-many (pl-re "[^,\n\r]")))) +(defun csv-cell () + (pl-or (csv-quoted-cell) (pl-many-as-string (pl-re "[^,\n\r]")))) (defun csv-quoted-cell () - (pl-ch ?\") - (prog1 (pl-many (csv-quoted-char)) - (pl-failed (pl-ch ?\") "quote at end of cell"))) + (pl-and (pl-ch ?\") + (pl-return (pl-many-as-string (csv-quoted-char)) + (pl-ensure (pl-ch ?\"))))) (defun csv-quoted-char () (pl-or (pl-re "[^\"]") (pl-and (pl-str "\"\"") "\""))) -(defun parse-csv1 (input) - (with-temp-buffer - (insert input) - (goto-char (point-min)) - (csv-file1))) -(parse-csv1 "\"a,1,s,b,\r\nd,e,f") +(defun csv-eol () + (pl-or (pl-str "\n\r") + (pl-str "\r\n") + (pl-str "\n") + (pl-str "\r") + (pl-eob))) (defun parse-csv (input) (with-temp-buffer @@ -85,11 +58,10 @@ (goto-char (point-min)) (csv-file))) -(parse-csv "a1s,b,\n\nd,e,f") -(with-temp-buffer - (insert "a,b,") - (goto-char (point-min)) - (csv-line)) +(parse-csv "\"a,1,s\"s,b,\r\nd,e,f") +(parse-csv "\"e\"\",f") +(parse-csv "\"a,1,\r\n") +(parse-csv "\"a,1,\"\",b,\r\nd,,f") -(provide 'csv-parser) -;;; csv-parser.el ends here +(provide 'full-csv-parser) +;;; full-csv-parser.el ends here diff --git a/parsec.el b/parsec.el index 1b52e22..7bca129 100644 --- a/parsec.el +++ b/parsec.el @@ -30,24 +30,55 @@ "Combinator parsing library for Emacs, similar to Haskell's Parsec" :group 'development) +(defun pl-eob-or-char-as-string () + (let ((c (char-after))) + (if c + (char-to-string c) + "`eob'"))) + +(defun pl-msg (msg) + (cons 'pl-msg msg)) + +(defun pl-msg-p (msg) + (and (consp msg) + (eq (car msg) 'pl-msg))) + +(defalias 'pl-msg-get 'cdr) + +(defun pl-stop (&rest args) + (throw 'failed + (let ((msg (plist-get args :message)) + (expected (plist-get args :expected)) + (found (plist-get args :found))) + (when (or (stringp msg) + (and (stringp expected) + (stringp found))) + (pl-msg (if (stringp msg) + msg + (format "Found \"%s\" -> Expected \"%s\"" + found expected))))))) + (defun pl-ch (ch &rest args) - (if (and (not (eobp)) - (char-equal (char-after) ch)) - (prog1 - (cond - ((memq :nil args) nil) - ((memq :beg args) - (point)) - ((memq :end args) - (1+ (point))) - (t - (char-to-string ch))) - (forward-char 1)) - (throw 'failed nil))) + (let ((next-char (char-after))) + (if (and (not (eobp)) + (char-equal next-char ch)) + (prog1 + (cond + ((memq :nil args) nil) + ((memq :beg args) + (point)) + ((memq :end args) + (1+ (point))) + (t + (char-to-string ch))) + (forward-char 1)) + (pl-stop :expected (char-to-string ch) + :found (pl-eob-or-char-as-string))))) (defun pl-eob () (unless (eobp) - (throw 'failed nil))) + (pl-stop :expected "`eob'" + :found (pl-eob-or-char-as-string)))) (defun pl-re (regexp &rest args) (if (looking-at regexp) @@ -69,7 +100,8 @@ (t (match-string 0))) (goto-char (match-end 0))) - (throw 'failed nil))) + (pl-stop :expected regexp + :found (pl-eob-or-char-as-string)))) (defsubst pl-str (str &rest args) (pl-re (regexp-quote str))) @@ -79,47 +111,58 @@ (defmacro pl-or (&rest parsers) (let ((outer-sym (make-symbol "outer")) - (parser-sym (make-symbol "parser")) - (error-sym (make-symbol "error-message"))) + (parser-sym (make-symbol "parser"))) `(loop named ,outer-sym for ,parser-sym in ',parsers - finally (throw 'failed nil) do - (when (setq ,error-sym - (catch 'failed - (return-from ,outer-sym (eval ,parser-sym)))) - (error ,error-sym))))) + finally (pl-stop :message "None of the parsers succeeds") do + (pl-try + (return-from ,outer-sym (eval ,parser-sym)))))) (defalias 'pl-and 'progn) -(defmacro pl-failed (parser msg) - `(pl-or ,parser - (throw 'failed ,msg))) +(defalias 'pl-return 'prog1) (defmacro pl-try (&rest forms) `(catch 'failed ,@forms)) +(defmacro pl-try-with-message (msg &rest forms) + (declare (indent 1)) + (let ((res-sym (make-symbol "result"))) + `(let ((,res-sym (pl-try ,@forms))) + ,(if msg + `(if (pl-msg-p ,res-sym) + (pl-msg ,msg) + ,res-sym) + `,res-sym)))) + +(defmacro pl-ensure-with-message (msg &rest forms) + (declare (indent 1)) + (let* ((error-sym (make-symbol "err"))) + `(let (,error-sym) + (if (pl-msg-p (setq ,error-sym + (pl-try-with-message ,msg ,@forms))) + (error (pl-msg-get ,error-sym)) + ,error-sym)))) + +(defmacro pl-ensure (&rest forms) + `(pl-ensure-with-message nil ,@forms)) + (defalias 'pl-parse 'pl-try) (defmacro pl-until (parser &optional &key skip) - (let ((error-sym (make-symbol "error-message"))) - `(let (,error-sym) - (catch 'done - (while (not (eobp)) - (when (setq ,error-sym - (catch 'failed - (throw 'done ,parser))) - (error ,error-sym)) - ,(if skip - `(,skip 1) - `(forward-char 1))))))) + `(catch 'done + (while (not (eobp)) + (pl-try + (throw 'done ,parser)) + ,(if skip + `(,skip 1) + `(forward-char 1))))) (defmacro pl-many (parser) - (let ((res (make-symbol "results")) - (msg (make-symbol "error-message"))) - `(let (,res ,msg) - (when (setq ,msg (pl-try - (while (not (eobp)) - (push ,parser ,res)))) - (error ,msg)) + (let ((res (make-symbol "results"))) + `(let (,res) + (pl-try + (while (not (eobp)) + (push ,parser ,res))) (nreverse ,res)))) (defun pl-list-to-string (l) @@ -129,7 +172,7 @@ `(mapconcat #'identity (pl-many ,parser) "")) (defmacro pl-endby (parser end) - `(pl-many (prog1 ,parser + `(pl-many (pl-return ,parser ,end))) (defmacro pl-sepby (parser separator) diff --git a/simple-csv-parser.el b/simple-csv-parser.el new file mode 100644 index 0000000..7bff683 --- /dev/null +++ b/simple-csv-parser.el @@ -0,0 +1,56 @@ +;;; simple-csv-parser.el --- Simple CSV parser using parsec.el -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Keywords: extensions + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; Ref: http://book.realworldhaskell.org/read/using-parsec.html + +;;; Code: + +(defun s-csv-file () + (pl-many (s-csv-line))) + +(defun s-csv-line () + (prog1 (s-csv-cells) + (s-csv-eol))) + +(defun s-csv-eol () + (pl-or (pl-str "\n") + (pl-eob))) + +(defun s-csv-cells () + (cons (s-csv-cell-content) (s-csv-remaining-cells))) + +(defun s-csv-cell-content () + (pl-many-as-string (pl-re "[^,\n]"))) + +(defun s-csv-remaining-cells () + (pl-or (pl-and (pl-ch ?,) (s-csv-cells)) nil)) + +(defun s-parse-csv (input) + (with-temp-buffer + (insert input) + (goto-char (point-min)) + (s-csv-file))) + +(s-parse-csv "a1s,b,d,e,f") + +(provide 'simple-csv-parser) +;;; simple-csv-parser.el ends here