branch: master commit dfb3af010ed9d01df57570b1522afa39d6b4611c Merge: 2e9f962 8f0c266 Author: Junpeng Qiu <qjpchm...@gmail.com> Commit: Junpeng Qiu <qjpchm...@gmail.com>
Add 'packages/parsec/' from commit '8f0c266d8b9b0ee5fcf9b80c518644b2849ff3b3' git-subtree-dir: packages/parsec git-subtree-mainline: 2e9f9625a954d88356fce440c19bec104a6734a2 git-subtree-split: 8f0c266d8b9b0ee5fcf9b80c518644b2849ff3b3 --- packages/parsec/.gitignore | 3 + packages/parsec/README.org | 378 +++++++ packages/parsec/examples/full-csv-parser-tests.el | 51 + packages/parsec/examples/full-csv-parser.el | 61 ++ packages/parsec/examples/pjson-tests.el | 102 ++ packages/parsec/examples/pjson.el | 124 +++ packages/parsec/examples/scheme-tests.el | 88 ++ packages/parsec/examples/scheme.el | 108 ++ .../parsec/examples/simple-csv-parser-tests.el | 39 + packages/parsec/examples/simple-csv-parser.el | 55 ++ packages/parsec/examples/url-str-parser-tests.el | 48 + packages/parsec/examples/url-str-parser.el | 56 ++ packages/parsec/parsec-tests.el | 481 +++++++++ packages/parsec/parsec.el | 1042 ++++++++++++++++++++ 14 files changed, 2636 insertions(+) diff --git a/packages/parsec/.gitignore b/packages/parsec/.gitignore new file mode 100644 index 0000000..4206e73 --- /dev/null +++ b/packages/parsec/.gitignore @@ -0,0 +1,3 @@ +*.hs +*.hi +*.o diff --git a/packages/parsec/README.org b/packages/parsec/README.org new file mode 100644 index 0000000..53d08db --- /dev/null +++ b/packages/parsec/README.org @@ -0,0 +1,378 @@ +#+TITLE: parsec.el + +A parser combinator library for Emacs Lisp similar to Haskell's Parsec library. + +* Overview + +This work is based on [[https://github.com/jwiegley/][John Wiegley]]'s [[https://github.com/jwiegley/emacs-pl][emacs-pl]]. The original [[https://github.com/jwiegley/emacs-pl][emacs-pl]] is awesome, +but I found following problems when I tried to use it: + +- It only contains a very limited set of combinators +- Some of its functions (combinators) have different behaviors than their + Haskell counterparts +- It can't show error messages when parsing fails + +So I decided to make a new library on top of it. This library, however, contains +most of the parser combinators in =Text.Parsec.Combinator=, which should be +enough in most use cases. Of course more combinators can be added if necessary! +Most of the parser combinators have the same behavior as their Haskell +counterparts. =parsec.el= also comes with a simple error handling mechanism so +that it can display an error message showing how the parser fails. + +So we can + +- use these parser combinators to write parsers easily from scratch in Emacs + Lisp like what we can do in Haskell +- port existing Haskell program using Parsec to its equivalent Emacs Lisp + program easily + +* Parsing Functions & Parser Combinators + + We compare the functions and macros defined in this library with their Haskell + counterparts, assuming you're already familiar with Haskell's Parsec. If you + don't have any experience with parser combinators, look at the docstrings of + these functions and macros and try them to see the results! They are really + easy to learn and use! + + The *Usage* column for each function/combinator in the following tables is + much simplified. Check the docstring of the function/combinator to see the + full description. + +** Basic Parsing Functions + These parsing functions are used as the basic building block for a parser. By + default, their return value is a *string*. + + | parsec.el | Haskell's Parsec | Usage | + |------------------------+------------------+-------------------------------------------------------| + | parsec-ch | char | parse a character | + | parsec-any-ch | anyChar | parse an arbitrary character | + | parsec-satisfy | satisfy | parse a character satisfying a predicate | + | parsec-newline | newline | parse '\n' | + | parsec-crlf | crlf | parse '\r\n' | + | parsec-eol | eol | parse newline or CRLF | + | parsec-eof, parsec-eob | eof | parse end of file | + | parsec-eol-or-eof | *N/A* | parse EOL or EOL | + | parsec-re | *N/A* | parse using a regular expression | + | parsec-one-of | oneOf | parse one of the characters | + | parsec-none-of | noneOf | parse any character other than the supplied ones | + | parsec-str | *N/A* | parse a string but consume input only when successful | + | parsec-string | string | parse a string and consume input for partial matches | + | parsec-num | *N/A* | parse a number | + | parsec-letter | letter | parse a letter | + | parsec-digit | digit | parse a digit | + + Note: + - =parsec-str= and =parsec-string= are different. =parsec-string= behaves the + same as =string= in Haskell, and =parsec-str= is more like combining + =string= and =try= in Haskell. Personally I found =parsec-str= easier to use + because =parsec-str= is "atomic", which is similar to =parsec-ch=. + - Use the power of regular expressions provided by =parsec-re= and simplify the parser! + +** Parser Combinators + These combinators can be used to combine different parsers. + + | parsec.el | Haskell's Parsec | Usage | + |---------------------------+------------------+--------------------------------------------------------------| + | parsec-or | choice | try the parsers until one succeeds | + | parsec-try | try | try parser and consume no input when an error occurs | + | parsec-lookahead | lookahead | try parser and consume no input when successful | + | parsec-peek | try && lookahead | try parser without comsuming any input | + | parsec-peek-p | try && lookahead | same as parsec-peek except the return value for failure | + | parsec-with-error-message | <?> (similar) | use the new error message when an error occurs | + | parsec-many | many | apply the parser zero or more times | + | parsec-many1 | many1 | apply the parser one or more times | + | parsec-many-till | manyTill | apply parser zero or more times until end succeeds | + | parsec-until | *N/A* | parse until end succeeds | + | parsec-not-followed-by | notFollowedBy | succeed when the parser fails | + | parsec-endby | endby | apply parser zero or more times, separated and ended by end | + | parsec-sepby | sepby | apply parser zero or more times, separated by sep | + | parsec-between | between | apply parser between open and close | + | parsec-count | count | apply parser n times | + | parsec-option | option | apply parser, if it fails, return opt | + | parsec-optional | *N/A* | apply parser zero or one time and return the result | + | parsec-optional* | optional | apply parser zero or one time and discard the result | + | parsec-optional-maybe | optionMaybe | apply parser zero or one time and return the result in Maybe | + + Note: + - =parsec-or= can also be used to replace =<|>=. + - =parsec-with-error-message= is slightly different from =<?>=. It will + replace the error message even when the input is consumed. + - By default, =parsec-many-till= behaves as Haskell's =manyTill=. However, + =parsec-many-till= and =parsec-until= can accept an optional argument to + specify which part(s) to be returned. You can use =:both= or =:end= as the + optional argument to change the default behavior. See the docstrings for + more information. + +** Parser Utilities + These utilities can be used together with parser combinators to build a + parser and ease the translation process if you're trying to port an existing + Haskell program. + + | parsec.el | Haskell's Parsec | Usage | + |----------------------------------+------------------+---------------------------------------------------------| + | parsec-and | do block | try all parsers and return the last result | + | parsec-return | do block | try all parsers and return the first result | + | parsec-ensure | *N/A* | quit the parsing when an error occurs | + | parsec-ensure-with-error-message | *N/A* | quit the parsing when an error occurs with new message | + | parsec-collect | sequence | try all parsers and collect the results into a list | + | parsec-collect* | *N/A* | try all parsers and collect non-nil results into a list | + | parsec-start | parse | entry point | + | parsec-parse | parse | entry point (same as parsec-start) | + | parsec-with-input | parse | perform parsers on input | + | parsec-from-maybe | fromMaybe | retrieve value from Maybe | + | parsec-maybe-p | *N/A* | is a Maybe value or not | + | parsec-query | *N/A* | change the parser's return value | + +** Variants that Return a String + + By default, the macros/functions that return multiple values will put the + values into a list. These macros/functions are: + - =parsec-many= + - =parsec-many1= + - =parsec-many-till= + - =parsec-until= + - =parsec-count= + - =parsec-collect= and =parsec-collect*= + + They all have a variant that returns a string by concatenating the results in + the list: + - =parsec-many-as-string= or =parsec-many-s= + - =parsec-many1-as-string= or =parsec-many1-s= + - =parsec-many-till-as-string= or =parsec-many-till-s= + - =parsec-until-as-string= or =parsec-until-s= + - =parsec-collect-as-string= or =parsec-collect-s= + - =parsec-count-as-string= or =parsec-count-s= + + The =*-s= and =*-as-string= variants are the same, except the =*-s= variants + have a shorter name. Using these =*-s= functions are recommended if you're + dealing with strings very frequently in your code. These variants accept the + same arguments and have the same behavior as their original counterpart that + returns a list. The only difference is the return value. +* Code Examples + Some very simple examples are given here. You can see many code examples in + the test files in this GitHub repo. + + The following code extract the "hello" from the comment: + #+BEGIN_SRC elisp + (parsec-with-input "/* hello */" + (parsec-string "/*") + (parsec-many-till-as-string (parsec-any-ch) + (parsec-try + (parsec-string "*/")))) + #+END_SRC + + The following Haskell program does a similar thing: + #+BEGIN_SRC haskell + import Text.Parsec + + main :: IO () + main = print $ parse p "" "/* hello */" + where + p = do string "/*" + manyTill anyChar (try (string "*/")) + #+END_SRC + + The following code returns the "aeiou" before "end": + #+BEGIN_SRC elisp + (parsec-with-input "if aeiou end" + (parsec-str "if ") + (parsec-return + (parsec-many-as-string (parsec-one-of ?a ?e ?i ?o ?u)) + (parsec-str " end"))) + #+END_SRC + +* Write a Parser: a Simple CSV Parser + You can find the code in =examples/simple-csv-parser.el=. The code is based + on the Haskell code in [[http://book.realworldhaskell.org/read/using-parsec.html][Using Parsec]]. + + An end-of-line should be a string =\n=. We use =(parsec-str "\n")= to parse it + (Note that since =\n= is also one character, =(parsec-ch ?\n)= also works). + Some files may not contain a newline at the end, but we can view end-of-file + as the end-of-line for the last line, and use =parsec-eof= (or =parsec-eob=) + to parse the end-of-file. We use =parsec-or= to combine these two combinators: + #+BEGIN_SRC elisp + (defun s-csv-eol () + (parsec-or (parsec-str "\n") + (parsec-eof))) + #+END_SRC + + A CSV file contains many lines and ends with an end-of-file. Use + =parsec-return= to return the result of the first parser as the result. + #+BEGIN_SRC elisp + (defun s-csv-file () + (parsec-return (parsec-many (s-csv-line)) + (parsec-eof))) + #+END_SRC + + A CSV line contains many CSV cells and ends with an end-of-line, and we + should return the cells as the results: + #+BEGIN_SRC elisp + (defun s-csv-line () + (parsec-return (s-csv-cells) + (s-csv-eol))) + #+END_SRC + + CSV cells is a list, containing the first cell and the remaining cells: + #+BEGIN_SRC elisp + (defun s-csv-cells () + (cons (s-csv-cell-content) (s-csv-remaining-cells))) + #+END_SRC + + A CSV cell consists any character that is not =,= or =\n=, and we use the + =parsec-many-as-string= variant to return the whole content as a string + instead of a list of single-character strings: + #+BEGIN_SRC elisp + (defun s-csv-cell-content () + (parsec-many-as-string (parsec-none-of ?, ?\n))) + #+END_SRC + + For the remaining cells: if followed by a comma =,=, we try to parse more csv + cells. Otherwise, we should return the =nil=: + #+BEGIN_SRC elisp + (defun s-csv-remaining-cells () + (parsec-or (parsec-and (parsec-ch ?,) (s-csv-cells)) nil)) + #+END_SRC + + OK. Our parser is almost done. To begin parsing the content in buffer =foo=, + you need to wrap the parser inside =parsec-start= (or =parsec-parse=): + #+BEGIN_SRC elisp + (with-current-buffer "foo" + (goto-char (point-min)) + (parsec-parse + (s-csv-file))) + #+END_SRC + + If you want to parse a string instead, we provide a simple wrapper macro + =parsec-with-input=, and you feed a string as the input and put arbitraty + parsers inside the macro body. =parsec-start= or =parsec-parse= is not needed. + #+BEGIN_SRC elisp + (parsec-with-input "a1,b1,c1\na2,b2,c2" + (s-csv-file)) + #+END_SRC + + The above code returns: + #+BEGIN_SRC elisp + (("a1" "b1" "c1") ("a2" "b2" "c2")) + #+END_SRC + + Note that if we replace =parsec-many-as-string= with =parsec-many= in + =s-csv-cell-content=: + #+BEGIN_SRC elisp + (defun s-csv-cell-content () + (parsec-many (parsec-none-of ?, ?\n))) + #+END_SRC + + The result would be: + #+BEGIN_SRC elisp + ((("a" "1") ("b" "1") ("c" "1")) (("a" "2") ("b" "2") ("c" "2"))) + #+END_SRC + +* More Parser Examples + I translate some Haskell Parsec examples into Emacs Lisp using =parsec.el=. + You can see from these examples that it is very easy to write parsers using + =parsec.el=, and if you know haskell, you can see that basically I just + translate the Haskell into Emacs Lisp one by one because most of them are just + the same! + + You can find five examples under the =examples/= directory. + + Three of the examples are taken from the chapter [[http://book.realworldhaskell.org/read/using-parsec.html][Using Parsec]] in the book of + [[http://book.realworldhaskell.org/read/][Real World Haskell]]: + - =simple-csv-parser.el=: a simple csv parser with no support for quoted + cells, as explained in previous section. + - =full-csv-parser.el=: a full csv parser + - =url-str-parser.el=: parser parameters in URL + + =pjson.el= is a translation of Haskell's [[https://hackage.haskell.org/package/json-0.9.1/docs/src/Text-JSON-Parsec.html][json library using Parsec]]. + + =scheme.el= is a much simplified Scheme parser based on [[https://en.wikibooks.org/wiki/Write_Yourself_a_Scheme_in_48_Hours/][Write Yourself a + Scheme in 48 Hours]]. + + They're really simple but you can see how this library works! + +* Change the Return Values using =parsec-query= + Parsing has side-effects such as forwarding the current point. In the original + [[https://github.com/jwiegley/emacs-pl][emacs-pl]], you can specify some optional arguments to some parsing functions + (=pl-ch=, =pl-re= etc.) to change the return values. In =parsec.el=, these + functions don't have such a behavior. Instead, we provide a unified interface + =parsec-query=, which accepts any parser, and changes the return value of the + parser. + + You can speicify following arguments: + #+BEGIN_EXAMPLE + :beg --> return the point before applying the PARSER + :end --> return the point after applying the PARSER + :nil --> return nil + :groups N --> return Nth group for `parsec-re'." + #+END_EXAMPLE + + So instead of returning "b" as the result, the following code returns 2: + #+BEGIN_SRC elisp + (parsec-with-input "ab" + (parsec-ch ?a) + (parsec-query (parsec-ch ?b) :beg)) + #+END_SRC + + Returning a point means that you can also incorporate =parsec.el= with Emacs + Lisp functions that can operate on points/regions, such as =goto-char= and + =kill-region=. + + =:group= can be specified when using =parsec-re=: + #+BEGIN_SRC elisp + (parsec-with-input "ab" + (parsec-query (parsec-re "\\(a\\)\\(b\\)") :group 2)) + #+END_SRC + + The above code will return "b" instead of "ab". +* Error Messages + + =parsec.el= implements a simple error handling mechanism. When an error + happens, it will show how the parser fails. + + For example, the following code fails: + #+BEGIN_SRC elisp + (parsec-with-input "aac" + (parsec-count 2 (parsec-ch ?a)) + (parsec-ch ?b)) + #+END_SRC + + The return value is: + #+BEGIN_SRC elisp + (parsec-error . "Found \"c\" -> Expected \"b\"") + #+END_SRC + + This also works when parser combinators fail: + #+BEGIN_SRC elisp + (parsec-with-input "a" + (parsec-or (parsec-ch ?b) + (parsec-ch ?c))) + #+END_SRC + + The return value is: + #+BEGIN_SRC elisp + (parsec-error . "None of the parsers succeeds: + Found \"a\" -> Expected \"c\" + Found \"a\" -> Expected \"b\"") + #+END_SRC + + If an error occurs, the return value is a cons cell that contains the error + message in its =cdr=. Compared to Haskell's Parsec, it's really simple, but at + least the error message could tell us some information. Yeah, not perfect but + usable. + + To test whether a parser returns an error, use =parsec-error-p=. If it returns + an error, you can use =parsec-error-str= to retrieve the error message as a + string. + + You can decide what to do based on the return value of a parser: + #+BEGIN_SRC elisp + (let ((res (parsec-with-input "hello" + (parsec-str "world")))) + (if (parsec-error-p res) + (message "Parser failed:\n%s" (parsec-error-str res)) + (message "Parser succeeded by returning %s" res))) + #+END_SRC + +* Acknowledgement + - Daan Leijen for Haskell's Parsec + - [[https://github.com/jwiegley/][John Wiegley]] for [[https://github.com/jwiegley/emacs-pl][emacs-pl]] diff --git a/packages/parsec/examples/.nosearch b/packages/parsec/examples/.nosearch new file mode 100644 index 0000000..e69de29 diff --git a/packages/parsec/examples/full-csv-parser-tests.el b/packages/parsec/examples/full-csv-parser-tests.el new file mode 100644 index 0000000..ace150f --- /dev/null +++ b/packages/parsec/examples/full-csv-parser-tests.el @@ -0,0 +1,51 @@ +;;; full-csv-parser-tests.el --- Tests for full-csv-parser -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Keywords: + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; + +;;; Code: + +(require 'ert) +(require 'full-csv-parser) + +(ert-deftest test-full-csv () + (should + (equal + (parse-csv "\"a,1,s\",b,\r\nd,e,f") + '(("a,1,s" "b" "") + ("d" "e" "f")))) + (should + (equal + (parse-csv "\"e\"\",f") + (parsec-error-new-2 "\"" "`EOF'"))) + (should + (equal + (parse-csv "\"a,1,\r\n") + (parsec-error-new-2 "\"" "`EOF'"))) + (should + (equal + (parse-csv "\"a,1,\",b,\r\nd,,f") + '(("a,1," "b" "") + ("d" "" "f"))))) + +(provide 'full-csv-parser-tests) +;;; full-csv-parser-tests.el ends here diff --git a/packages/parsec/examples/full-csv-parser.el b/packages/parsec/examples/full-csv-parser.el new file mode 100644 index 0000000..8c76937 --- /dev/null +++ b/packages/parsec/examples/full-csv-parser.el @@ -0,0 +1,61 @@ +;;; full-csv-parser.el --- Sample csv parser using parsec.el -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Keywords: extensions + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; Ref: http://book.realworldhaskell.org/read/using-parsec.html + +;;; Code: + +(defun csv-file () + (parsec-start + (parsec-return (parsec-endby (csv-line) (csv-eol)) + (parsec-eob)))) + +(defun csv-line () + (parsec-sepby (csv-cell) (parsec-ch ?,))) + +(defun csv-cell () + (parsec-or (csv-quoted-cell) (parsec-many-as-string + (parsec-none-of ?, ?\r ?\n)))) + +(defun csv-quoted-cell () + (parsec-and (parsec-ch ?\") + (parsec-return (parsec-many-as-string (csv-quoted-char)) + (parsec-ch ?\")))) + +(defun csv-quoted-char () + (parsec-or (parsec-re "[^\"]") + (parsec-and (parsec-str "\"\"") + "\""))) + +(defun csv-eol () + (parsec-or (parsec-str "\n\r") + (parsec-str "\r\n") + (parsec-str "\n") + (parsec-str "\r") + (parsec-eob))) + +(defun parse-csv (input) + (parsec-with-input input + (csv-file))) + +(provide 'full-csv-parser) +;;; full-csv-parser.el ends here diff --git a/packages/parsec/examples/pjson-tests.el b/packages/parsec/examples/pjson-tests.el new file mode 100644 index 0000000..6360152 --- /dev/null +++ b/packages/parsec/examples/pjson-tests.el @@ -0,0 +1,102 @@ +;;; pjson-tests.el --- Test for parsec json parser -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Keywords: + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; + +;;; Code: + +(require 'ert) +(require 'pjson) + +(ert-deftest test-pjson-number () + (should + (equal + (parsec-with-input "123" + (pjson-number)) + 123))) + +(ert-deftest test-pjson-boolean () + (should + (equal + (parsec-with-input "false" + (pjson-boolean)) + nil))) + +(ert-deftest test-pjson-null () + (should + (equal + (parsec-with-input "null" + (pjson-null)) + nil))) + +(ert-deftest test-pjson-array () + (should + (equal + (parsec-with-input "[1,true,1,\"abc\",[1],null)" + (pjson-array)) + (parsec-error-new-2 "]" ")"))) + (should + (equal + (parsec-with-input "[1,true,1,\"abc\",[1],null]" + (pjson-array)) + (vector 1 t 1 "abc" + (vector 1) + nil)))) +(ert-deftest test-pjson-string () + (should + (equal + (parsec-with-input "\"asdf\"" + (pjson-string)) + "asdf"))) + +(ert-deftest test-pjson-object () + (should + (equal + (parsec-with-input "{\"a\" :1, \"b\":2, \"c\":[1,true] }" + (pjson-object)) + '(("a" . 1) + ("b" . 2) + ("c" . + [1 t]))))) + +(ert-deftest test-pjson-jvalue () + (should + (equal + (parsec-with-input "[false]" (pjson-jvalue)) + (vector nil)))) + +(ert-deftest test-pjson-parse () + (should + (equal + (pjson-parse "{\"a\" :1, \"b\":2, \"c\":[1,{\"d\":null}]}") + '(("a" . 1) + ("b" . 2) + ("c" . + [1 + (("d"))])))) + (should + (equal + (pjson-parse "{\"a\" :1, \"b\":2, [{ \"c\":[1,true] }]}") + (parsec-error-new-2 "\"" "[")))) + +(provide 'pjson-tests) +;;; pjson-tests.el ends here diff --git a/packages/parsec/examples/pjson.el b/packages/parsec/examples/pjson.el new file mode 100644 index 0000000..be3e9c6 --- /dev/null +++ b/packages/parsec/examples/pjson.el @@ -0,0 +1,124 @@ +;;; pjson.el --- JSON parser using parsec.el -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Keywords: extensions + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; Ref: https://hackage.haskell.org/package/json-0.9.1/docs/src/Text-JSON-Parsec.html + +;;; Code: + +(defvar pjson-special-chars + '((?\" . ?\") + (?\\ . ?\\) + (?/ . ?/) + (?b . ?\b) + (?f . ?\f) + (?n . ?\n) + (?r . ?\r) + (?t . ?\t)) + "Characters which are escaped in JSON, with their elisp counterparts.") + +(defsubst pjson-spaces () + (parsec-many-as-string + (parsec-re "[[:space:]\r\n]"))) + +(defmacro pjson-tok (parser) + `(parsec-return ,parser + (pjson-spaces))) + +(defun pjson-value () + (parsec-and + (pjson-spaces) + (pjson-jvaule))) + +(defun pjson-jvalue () + (parsec-or (pjson-null) + (pjson-boolean) + (pjson-number) + (pjson-string) + (pjson-array) + (pjson-object))) + +(defsubst pjson-null () + (parsec-and + (pjson-tok (parsec-str "null")) + nil)) + +(defsubst pjson-boolean () + (parsec-or (parsec-and + (pjson-tok (parsec-str "true")) + t) + (parsec-and + (pjson-tok (parsec-str "false")) + nil))) + +(defsubst pjson-array () + (apply #'vector + (parsec-between (pjson-tok (parsec-ch ?\[)) + (pjson-tok (parsec-ch ?\])) + (parsec-sepby + (pjson-jvalue) + (pjson-tok (parsec-ch ?,)))))) + +(defun pjson-char () + (parsec-or + (parsec-and (parsec-ch ?\\) (pjson-esc)) + (parsec-none-of ?\" ?\\))) + +(defun pjson-esc () + (parsec-or + (assoc-default + (parsec-satisfy (lambda (x) (assq x pjson-special-chars))) + pjson-special-chars) + (parsec-and (parsec-ch ?u) + (pjson-uni)))) + +(defun pjson-uni () + (format "%c" (string-to-number + (parsec-re "[0-9a-zA-z]\\{4\\}") + 16))) + +(defsubst pjson-string () + (parsec-between (pjson-tok (parsec-ch ?\")) + (pjson-tok (parsec-ch ?\")) + (parsec-many-as-string (pjson-char)))) + +(defun pjson-field () + (cons (parsec-return (pjson-string) + (pjson-tok (parsec-ch ?:))) + (pjson-jvalue))) + +(defun pjson-object () + (parsec-between (pjson-tok (parsec-ch ?\{)) + (pjson-tok (parsec-ch ?\})) + (parsec-sepby + (pjson-field) + (pjson-tok (parsec-ch ?,))))) + +(defun pjson-number () + (pjson-tok (string-to-number + (parsec-re "\\+?\\([0-9]+\\)\\(\\.[0-9]+\\)?\\([Ee][+-]?[0-9]+\\)?")))) + +(defun pjson-parse (input) + (parsec-with-input input + (pjson-object))) + +(provide 'pjson) +;;; pjson.el ends here diff --git a/packages/parsec/examples/scheme-tests.el b/packages/parsec/examples/scheme-tests.el new file mode 100644 index 0000000..0e7e402 --- /dev/null +++ b/packages/parsec/examples/scheme-tests.el @@ -0,0 +1,88 @@ +;;; scheme-tests.el --- Tests for scheme parser -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Keywords: + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; + +;;; Code: + +(require 'ert) +(require 'scheme) + +(ert-deftest test-scheme-number () + (should + (equal (scheme-read "25") + (scheme-number 25)))) + +(ert-deftest test-scheme-string () + (should + (equal + (scheme-read "\"This is a string\"") + "This is a string"))) + +(ert-deftest test-scheme-list () + (should + (equal + (scheme-read "(symbol)") + '(List + (Atom . "symbol")))) + (should + (equal + (scheme-read "(a test)") + '(List + (Atom . "a") + (Atom . "test"))))) + +(ert-deftest test-scheme-dotted-list () + (should + (equal + (scheme-read "(a . test)") + '(DottedList + ((Atom . "a")) + Atom . "test")))) + +(ert-deftest test-scheme-nested () + (should + (equal + (scheme-read "(a (nested) test)") + '(List + (Atom . "a") + (List + (Atom . "nested")) + (Atom . "test"))))) + +(ert-deftest test-scheme-quoted () + (should + (equal + (scheme-read "(a '(quoted (dotted . list)) test)") + '(List + (Atom . "a") + (List + (Atom . "quote") + (List + (Atom . "quoted") + (DottedList + ((Atom . "dotted")) + Atom . "list"))) + (Atom . "test"))))) + +(provide 'scheme-tests) +;;; scheme-tests.el ends here diff --git a/packages/parsec/examples/scheme.el b/packages/parsec/examples/scheme.el new file mode 100644 index 0000000..d750c54 --- /dev/null +++ b/packages/parsec/examples/scheme.el @@ -0,0 +1,108 @@ +;;; scheme.el --- Scheme parser using parsec.el -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Keywords: extensions + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; Ref: https://en.wikibooks.org/wiki/Write_Yourself_a_Scheme_in_48_Hours/ + +;;; Code: + +(defsubst scheme-bool (value) + (cons 'Bool value)) + +(defsubst scheme-true () + (scheme-bool 'True)) + +(defsubst scheme-false () + (scheme-bool 'False)) + +(defsubst scheme-atom (atom) + (cons 'Atom atom)) + +(defsubst scheme-number (number) + (cons 'Number number)) + +(defsubst scheme-list (&rest values) + (cons 'List values)) + +(defsubst scheme-dotted-list (head tail) + (cons 'DottedList (cons head tail))) + +(defsubst scheme-symbol () + (parsec-re "[$!#%&|*+/:<=>?@^_~-]")) + +(defsubst scheme-spaces () + (parsec-many (parsec-ch ? ))) + +(defun scheme-parse-string () + (parsec-and (parsec-ch ?\") + (parsec-return (parsec-many-as-string (parsec-none-of ?\")) + (parsec-ch ?\")))) + +(defun scheme-parse-atom () + (let (first rest atom) + (parsec-and (setq first (parsec-or (parsec-letter) (scheme-symbol))) + (setq rest (parsec-many (parsec-or (parsec-letter) + (parsec-digit) + (scheme-symbol))))) + (setq atom (parsec-list-to-string (cons first rest))) + (cond + ((string= atom "#t") (scheme-true)) + ((string= atom "#f") (scheme-false)) + (t (scheme-atom atom))))) + +(defun scheme-parse-number () + (scheme-number + (string-to-number (parsec-many1-as-string (parsec-digit))))) + +(defun scheme-parse-list () + (apply #'scheme-list (parsec-sepby (scheme-parse-expr) (scheme-spaces)))) + +(defun scheme-parse-dotted-list () + (scheme-dotted-list (parsec-endby (scheme-parse-expr) (scheme-spaces)) + (parsec-and + (parsec-ch ?.) + (scheme-spaces) + (scheme-parse-expr)))) + +(defun scheme-parse-quoted () + (parsec-and + (parsec-ch ?\') + (scheme-list (scheme-atom "quote") (scheme-parse-expr)))) + +(defun scheme-parse-expr () + (parsec-or (scheme-parse-atom) + (scheme-parse-string) + (scheme-parse-number) + (scheme-parse-quoted) + (parsec-between + (parsec-ch ?\() + (parsec-ch ?\)) + (parsec-or + (parsec-try + (scheme-parse-list)) + (scheme-parse-dotted-list))))) + +(defun scheme-read (expr) + (parsec-with-input expr + (scheme-parse-expr))) + +(provide 'scheme) +;;; scheme.el ends here diff --git a/packages/parsec/examples/simple-csv-parser-tests.el b/packages/parsec/examples/simple-csv-parser-tests.el new file mode 100644 index 0000000..d118c12 --- /dev/null +++ b/packages/parsec/examples/simple-csv-parser-tests.el @@ -0,0 +1,39 @@ +;;; simple-csv-parser-tests.el --- Tests for simple csv parser -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Keywords: + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; + +;;; Code: + +(require 'ert) +(require 'simple-csv-parser) + +(ert-deftest test-simple-csv () + (should + (equal + (s-parse-csv "a1s,b,d,e,f\na,,c,d,\n") + '(("a1s" "b" "d" "e" "f") + ("a" "" "c" "d" ""))))) + + +(provide 'simple-csv-parser-tests) +;;; simple-csv-parser-tests.el ends here diff --git a/packages/parsec/examples/simple-csv-parser.el b/packages/parsec/examples/simple-csv-parser.el new file mode 100644 index 0000000..91feaed --- /dev/null +++ b/packages/parsec/examples/simple-csv-parser.el @@ -0,0 +1,55 @@ +;;; simple-csv-parser.el --- Simple CSV parser using parsec.el -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Keywords: extensions + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; Ref: http://book.realworldhaskell.org/read/using-parsec.html + +;;; Code: + +(require 'parsec) + +(defun s-csv-file () + (parsec-return (parsec-many (s-csv-line)) + (parsec-eof))) + +(defun s-csv-line () + (parsec-return (s-csv-cells) + (s-csv-eol))) + +(defun s-csv-eol () + (parsec-or (parsec-str "\n") + (parsec-eof))) + +(defun s-csv-cells () + (cons (s-csv-cell-content) (s-csv-remaining-cells))) + +(defun s-csv-cell-content () + (parsec-many-as-string (parsec-none-of ?, ?\n))) + +(defun s-csv-remaining-cells () + (parsec-or (parsec-and (parsec-ch ?,) (s-csv-cells)) nil)) + +(defun s-parse-csv (input) + (parsec-with-input input + (s-csv-file))) + +(provide 'simple-csv-parser) +;;; simple-csv-parser.el ends here diff --git a/packages/parsec/examples/url-str-parser-tests.el b/packages/parsec/examples/url-str-parser-tests.el new file mode 100644 index 0000000..92bca94 --- /dev/null +++ b/packages/parsec/examples/url-str-parser-tests.el @@ -0,0 +1,48 @@ +;;; url-str-parser-tests.el --- Tests for url-str-parser -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Keywords: + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; + +;;; Code: + +(require 'ert) +(require 'url-str-parser) + +(ert-deftest test-url-str () + (should + (equal + (url-str-parse "foo=bar&a%21=b+c") + '(("foo" Just . "bar") + ("a!" Just . "b c")))) + (should + (equal + (url-str-parse "foo=&a%21=b+c") + '(("foo" Just . "") + ("a!" Just . "b c")))) + (should + (equal + (url-str-parse "foo&a%21=b+c") + '(("foo" . Nothing) + ("a!" Just . "b c"))))) + +(provide 'url-str-parser-tests) +;;; url-str-parser-tests.el ends here diff --git a/packages/parsec/examples/url-str-parser.el b/packages/parsec/examples/url-str-parser.el new file mode 100644 index 0000000..926c6df --- /dev/null +++ b/packages/parsec/examples/url-str-parser.el @@ -0,0 +1,56 @@ +;;; url-str-parser.el --- URL-encoded query string parser using parsec.el -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Keywords: extensions + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; Ref: http://book.realworldhaskell.org/read/using-parsec.html + +;;; Code: + + +(defun url-str-query () + (parsec-sepby (url-str-pair) (parsec-ch ?&))) + +(defun url-str-pair () + (cons + (parsec-many1-as-string (url-str-char)) + (parsec-optional-maybe + (parsec-and (parsec-ch ?=) (parsec-many-as-string (url-str-char)))))) + +(defun url-str-char () + (parsec-or (parsec-re "[a-zA-z0-9$_.!*'(),-]") + (parsec-and (parsec-ch ?+) " ") + (url-str-hex))) + +(defun url-str-hex () + (parsec-and + (parsec-ch ?%) + (format "%c" + (string-to-number (format "%s%s" + (parsec-re "[0-9a-zA-z]") + (parsec-re "[0-9a-zA-z]")) + 16)))) + +(defun url-str-parse (input) + (parsec-with-input input + (url-str-query))) + +(provide 'url-str-parser) +;;; url-str-parser.el ends here diff --git a/packages/parsec/parsec-tests.el b/packages/parsec/parsec-tests.el new file mode 100644 index 0000000..e9c2672 --- /dev/null +++ b/packages/parsec/parsec-tests.el @@ -0,0 +1,481 @@ +;;; parsec-tests.el --- Tests for parsec.el -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Keywords: + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; + +;;; Code: + +(require 'ert) +(require 'parsec) + +(ert-deftest test-parsec-ch () + (should + (equal + (parsec-with-input "ab" + (parsec-ch ?a) + (parsec-ch ?b)) + "b")) + (should + (equal + (parsec-with-input "ab" + (parsec-query (parsec-ch ?a) :beg)) + 1))) + +(ert-deftest test-parsec-satisfy () + (should + (equal + (parsec-with-input "ab" + (parsec-ch ?a) + (parsec-satisfy (lambda (c) (char-equal c ?b)))) + "b")) + (should + (equal + (parsec-with-input "ab" + (parsec-ch ?a) + (parsec-query (parsec-satisfy (lambda (c) (char-equal c ?b))) :end)) + 3))) + +(ert-deftest test-parsec-eol () + (should + (equal + (parsec-with-input "\na" + (parsec-newline) + (parsec-ch ?a)) + "a")) + (should + (equal + (parsec-with-input "\r\na" + (parsec-crlf) + (parsec-ch ?a)) + "a")) + (should + (equal + (parsec-with-input "\r\na" + (parsec-eol) + (parsec-ch ?a)) + "a")) + (should + (equal + (parsec-with-input "\na" + (parsec-eol) + (parsec-ch ?a)) + "a")) + (should + (equal + (parsec-with-input "\ra" + (parsec-eol) + (parsech-ch ?a)) + (parsec-error-new-2 "\n" "a")))) + +(ert-deftest test-parsec-eof () + (should + (equal + (parsec-with-input "\r\na" + (parsec-eol) + (parsec-ch ?a) + (parsec-eof)) + nil))) + +(ert-deftest test-parsec-re () + (should + (equal + (parsec-with-input "abc" + (parsec-query + (parsec-re "\\(a\\)\\(bc\\)") + :group 2)) + "bc"))) + +(ert-deftest test-parsec-make-alternatives () + (should + (equal + (parsec-make-alternatives '(?-)) + "-")) + (should + (equal + (parsec-make-alternatives '(?- ?\] ?a ?^)) + "]a^-")) + (should + (equal + (parsec-make-alternatives '(?- ?^)) + "-^")) + (should + (equal + (parsec-make-alternatives '(?^ ?\")) + "\"^"))) + +(ert-deftest test-parsec-one-of () + (should + (equal + (parsec-with-input "^]-" + (parsec-many-as-string (parsec-one-of ?^ ?\] ?-))) + "^]-")) + (should + (equal + (parsec-with-input "^-" + (parsec-many-as-string (parsec-one-of ?^ ?-))) + "^-"))) + +(ert-deftest test-parsec-none-of () + (should + (equal + (parsec-with-input "-[]" + (parsec-none-of ?\] ?^) + (parsec-one-of ?\[ ?\]) + (parsec-none-of ?- ?^)) + "]"))) + +(ert-deftest test-parsec-str () + (should + (equal + (parsec-with-input "abc" + (parsec-str "abc")) + "abc")) + (should + (equal + (parsec-with-input "abc" + (parsec-or (parsec-str "ac") + (parsec-ch ?a))) + "a"))) + +(ert-deftest test-parsec-string () + (should + (equal + (parsec-with-input "abc" + (parsec-string "abc")) + "abc")) + (should + (equal + (parsec-with-input "abc" + (parsec-or (parsec-string "ac") + (parsec-ch ?a))) + (parsec-error-new-2 "c" "b"))) + (should + (equal + (parsec-with-input "abc" + (parsec-or (parsec-try (parsec-string "ac")) + (parsec-ch ?a))) + "a"))) + +(ert-deftest test-parsec-or () + (should + (equal + (parsec-with-input "1" + (parsec-or (parsec-letter) + (parsec-digit))) + "1")) + (should + (equal + (parsec-with-input "124" + (parsec-or (parsec-string "13") + (parsec-ch ?1))) + (parsec-error-new-2 "3" "2"))) + (should + (equal + (parsec-with-input "124" + (parsec-or (parsec-str "13") + (parsec-ch ?1))) + "1"))) + +(ert-deftest test-parsec-collect-optional () + (should + (equal + (parsec-with-input "abc-def" + (parsec-collect-as-string + (parsec-and + (parsec-ch ?a) + (parsec-str "bc")) + (parsec-optional (parsec-ch ?-)) + (parsec-and + (parsec-return (parsec-str "de") + (parsec-ch ?f))))) + "bc-de")) + (should + (equal + (parsec-with-input "abcdef" + (parsec-collect-as-string + (parsec-and + (parsec-ch ?a) + (parsec-str "bc")) + (parsec-optional (parsec-ch ?-)) + (parsec-and + (parsec-return (parsec-str "de") + (parsec-ch ?f))))) + "bcde"))) + +(ert-deftest test-parsec-try () + (should + (equal + (parsec-with-input "abc" + (parsec-or (parsec-try (parsec-string "abd")) + (parsec-str "abc"))) + "abc"))) + +(ert-deftest test-parsec-lookahead () + (should + (equal + (parsec-with-input "abc" + (parsec-lookahead (parsec-str "abc")) + (point)) + (point-min))) + (should + (equal + (parsec-with-input "abc" + (parsec-start + (parsec-lookahead + (parsec-and + (parsec-ch ?a) + (parsec-ch ?c)))) + (point)) + (1+ (point-min)))) + (should + (equal + (parsec-with-input "abc" + (parsec-start + (parsec-try + (parsec-lookahead + (parsec-and + (parsec-ch ?a) + (parsec-ch ?c))))) + (point)) + (point-min)))) + +(ert-deftest test-parsec-error-handles () + (should + (equal + (parsec-with-input "abc" + (parsec-with-error-message "foo" + (parsec-str "abd"))) + (parsec-error-new "foo"))) + (should + (equal + (parsec-with-input "abc" + (parsec-with-error-message "foo" + (parsec-str "abc"))) + "abc")) + (should + (equal + (condition-case err + (parsec-with-input "abc" + (parsec-ensure-with-error-message "foo" + (parsec-str "abd"))) + (error (cdr err))) + '("foo"))) + (should + (equal + (condition-case err + (parsec-with-input "abc" + (parsec-ensure-with-error-message "foo" + (parsec-str "abc"))) + (error (cdr err))) + "abc"))) + +(ert-deftest test-parsec-many () + (should + (equal + (parsec-with-input "aaaaab" + (parsec-collect-as-string + (parsec-many-as-string (parsec-ch ?a)) + (parsec-many-as-string (parsec-ch ?c)) + (parsec-many1-as-string (parsec-ch ?b)))) + "aaaaab")) + (should + (equal + (parsec-with-input "aaaaab" + (parsec-collect-as-string + (parsec-many-as-string (parsec-ch ?a)) + (parsec-many-as-string (parsec-ch ?c)) + (parsec-many1-as-string (parsec-ch ?b)) + (parsec-many1-as-string (parsec-ch ?c)))) + (parsec-error-new-2 "c" "`EOF'"))) + (should + (equal + (parsec-with-input "abababaa" + (parsec-many1-as-string (parsec-string "ab"))) + (parsec-error-new-2 "b" "a"))) + (should + (equal + (parsec-with-input "abababaa" + (parsec-many1-as-string (parsec-try (parsec-string "ab"))) + (parsec-str "aa")) + "aa")) + (should + (equal + (parsec-with-input "abababaa" + (parsec-many1-as-string (parsec-str "ab")) + (parsec-str "aa")) + "aa"))) + + +(ert-deftest test-parsec-till () + (should + (equal + (parsec-with-input "abcd" + (parsec-many-till-as-string (parsec-any-ch) (parsec-ch ?d))) + "abc")) + (should + (equal + (parsec-with-input "abcd" + (parsec-many-till-as-string (parsec-any-ch) (parsec-ch ?d) :both)) + '("abc" . "d"))) + (should + (equal + (parsec-with-input "abcd" + (parsec-many-till-as-string (parsec-any-ch) (parsec-ch ?d) :end)) + "d")) + (should + (equal + (parsec-with-input "abcd" + (parsec-with-error-message "eof" + (parsec-many-till-as-string (parsec-any-ch) (parsec-ch ?e)))) + (parsec-error-new "eof"))) + (should + (equal + (parsec-with-input "abc" + (parsec-until-as-string (parsec-ch ?c))) + "ab")) + (should + (equal + (parsec-with-input "abc" + (parsec-until-as-string (parsec-ch ?c) :end)) + "c")) + (should + (equal + (parsec-with-input "abc" + (parsec-query (parsec-until-as-string (parsec-ch ?c)) :beg)) + 1))) + +(ert-deftest test-parsec-not-followed-by () + (should + (equal + (parsec-with-input "abd" + (parsec-collect* + (parsec-str "ab") + (parsec-not-followed-by (parsec-ch ?c)) + (parsec-ch ?d))) + '("ab" "d"))) + (should + (equal + (parsec-with-input "abd" + (parsec-collect* + (parsec-str "ab") + (parsec-or (parsec-not-followed-by (parsec-ch ?d)) + (parsec-not-followed-by (parsec-ch ?c))) + (parsec-ch ?d))) + '("ab" "d")))) + +(ert-deftest test-parsec-endby () + (should + (equal + (parsec-with-input "abc\ndef" + (parsec-endby (parsec-many-as-string (parsec-letter)) + (parsec-eol-or-eof))) + '("abc" "def")))) + +(ert-deftest test-parsec-sepby () + (should + (equal + (parsec-with-input "ab,cd,ef" + (parsec-sepby (parsec-many-as-string (parsec-re "[^,]")) + (parsec-ch ?,))) + '("ab" "cd" "ef")))) + +(ert-deftest test-parsec-between () + (should + (equal + (parsec-with-input "{abc}" + (parsec-between + (parsec-ch ?\{) (parsec-ch ?\}) + (parsec-or + (parsec-str "ac") + (parsec-many-as-string (parsec-letter))))) + "abc")) + (should + (equal + (parsec-with-input "{abc}" + (parsec-between + (parsec-ch ?\{) (parsec-ch ?\}) + (parsec-or + (parsec-string "ac") + (parsec-many-as-string (parsec-letter))))) + (parsec-error-new-2 "c" "b")))) + +(ert-deftest test-parsec-count () + (should + (equal + (parsec-with-input "aaaab" + (parsec-return (parsec-count-as-string 3 (parsec-ch ?a)) + (parsec-many1 (parsec-one-of ?a ?b)))) + "aaa"))) + +(ert-deftest test-parsec-option () + (should + (equal + (parsec-with-input "ab" + (parsec-option "opt" (parsec-string "ac"))) + (parsec-error-new-2 "c" "b"))) + (should + (equal + (parsec-with-input "ab" + (parsec-option "opt" (parsec-str "ac"))) + "opt")) + (should + (equal + (parsec-with-input "ab" + (parsec-option "opt" (parsec-string "ab"))) + "ab"))) + +(ert-deftest test-parsec-optional () + (should + (equal + (parsec-with-input "abcdef" + (parsec-collect-as-string + (parsec-str "abc") + (parsec-optional (parsec-ch ?-)) + (parsec-str "def"))) + "abcdef")) + (should + (equal + (parsec-with-input "abc-def" + (parsec-collect-as-string + (parsec-str "abc") + (parsec-optional (parsec-ch ?-)) + (parsec-str "def"))) + "abc-def")) + (should + (equal + (parsec-with-input "abcdef" + (parsec-collect-as-string + (parsec-str "abc") + (parsec-from-maybe (parsec-optional-maybe (parsec-ch ?-))) + (parsec-str "def"))) + "abcdef")) + (should + (equal + (parsec-with-input "abc-def" + (parsec-collect-as-string + (parsec-str "abc") + (parsec-from-maybe (parsec-optional-maybe (parsec-ch ?-))) + (parsec-str "def"))) + "abc-def"))) + +(provide 'parsec-tests) +;;; parsec-tests.el ends here diff --git a/packages/parsec/parsec.el b/packages/parsec/parsec.el new file mode 100644 index 0000000..468ecda --- /dev/null +++ b/packages/parsec/parsec.el @@ -0,0 +1,1042 @@ +;;; parsec.el --- Parser combinator library -*- lexical-binding: t; -*- + +;; Copyright (C) 2016 Junpeng Qiu + +;; Author: Junpeng Qiu <qjpchm...@gmail.com> +;; Maintainer: Junpeng Qiu <qjpchm...@gmail.com> +;; URL: https://github.com/cute-jumper/parsec.el +;; Version: 0.1.3 +;; Package-Requires: ((emacs "24") (cl-lib "0.5")) +;; Keywords: extensions + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; _____________ + +;; PARSEC.EL + +;; Junpeng Qiu +;; _____________ + + +;; Table of Contents +;; _________________ + +;; 1 Overview +;; 2 Parsing Functions & Parser Combinators +;; .. 2.1 Basic Parsing Functions +;; .. 2.2 Parser Combinators +;; .. 2.3 Parser Utilities +;; .. 2.4 Variants that Return a String +;; 3 Code Examples +;; 4 Write a Parser: a Simple CSV Parser +;; 5 More Parser Examples +;; 6 Change the Return Values using `parsec-query' +;; 7 Error Messages +;; 8 Acknowledgement + + +;; A parser combinator library for Emacs Lisp similar to Haskell's Parsec +;; library. + + +;; 1 Overview +;; ========== + +;; This work is based on [John Wiegley]'s [emacs-pl]. The original +;; [emacs-pl] is awesome, but I found following problems when I tried to +;; use it: + +;; - It only contains a very limited set of combinators +;; - Some of its functions (combinators) have different behaviors than +;; their Haskell counterparts +;; - It can't show error messages when parsing fails + +;; So I decided to make a new library on top of it. This library, +;; however, contains most of the parser combinators in +;; `Text.Parsec.Combinator', which should be enough in most use cases. Of +;; course more combinators can be added if necessary! Most of the parser +;; combinators have the same behavior as their Haskell counterparts. +;; `parsec.el' also comes with a simple error handling mechanism so that +;; it can display an error message showing how the parser fails. + +;; So we can + +;; - use these parser combinators to write parsers easily from scratch in +;; Emacs Lisp like what we can do in Haskell +;; - port existing Haskell program using Parsec to its equivalent Emacs +;; Lisp program easily + + +;; [John Wiegley] https://github.com/jwiegley/ + +;; [emacs-pl] https://github.com/jwiegley/emacs-pl + + +;; 2 Parsing Functions & Parser Combinators +;; ======================================== + +;; We compare the functions and macros defined in this library with their +;; Haskell counterparts, assuming you're already familiar with Haskell's +;; Parsec. If you don't have any experience with parser combinators, look +;; at the docstrings of these functions and macros and try them to see +;; the results! They are really easy to learn and use! + +;; The *Usage* column for each function/combinator in the following +;; tables is much simplified. Check the docstring of the +;; function/combinator to see the full description. + + +;; 2.1 Basic Parsing Functions +;; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +;; These parsing functions are used as the basic building block for a +;; parser. By default, their return value is a *string*. + +;; parsec.el Haskell's Parsec Usage +;; ------------------------------------------------------------------------------------------------- +;; parsec-ch char parse a character +;; parsec-any-ch anyChar parse an arbitrary character +;; parsec-satisfy satisfy parse a character satisfying a predicate +;; parsec-newline newline parse '\n' +;; parsec-crlf crlf parse '\r\n' +;; parsec-eol eol parse newline or CRLF +;; parsec-eof, parsec-eob eof parse end of file +;; parsec-eol-or-eof *N/A* parse EOL or EOL +;; parsec-re *N/A* parse using a regular expression +;; parsec-one-of oneOf parse one of the characters +;; parsec-none-of noneOf parse any character other than the supplied ones +;; parsec-str *N/A* parse a string but consume input only when successful +;; parsec-string string parse a string and consume input for partial matches +;; parsec-num *N/A* parse a number +;; parsec-letter letter parse a letter +;; parsec-digit digit parse a digit + +;; Note: +;; - `parsec-str' and `parsec-string' are different. `parsec-string' +;; behaves the same as `string' in Haskell, and `parsec-str' is more +;; like combining `string' and `try' in Haskell. Personally I found +;; `parsec-str' easier to use because `parsec-str' is "atomic", which +;; is similar to `parsec-ch'. +;; - Use the power of regular expressions provided by `parsec-re' and +;; simplify the parser! + + +;; 2.2 Parser Combinators +;; ~~~~~~~~~~~~~~~~~~~~~~ + +;; These combinators can be used to combine different parsers. + +;; parsec.el Haskell's Parsec Usage +;; ----------------------------------------------------------------------------------------------------------- +;; parsec-or choice try the parsers until one succeeds +;; parsec-try try try parser and consume no input when an error occurs +;; parsec-lookahead lookahead try parser and consume no input when successful +;; parsec-peek try && lookahead try parser without comsuming any input +;; parsec-peek-p try && lookahead same as parsec-peek except the return value for failure +;; parsec-with-error-message <?> (similar) use the new error message when an error occurs +;; parsec-many many apply the parser zero or more times +;; parsec-many1 many1 apply the parser one or more times +;; parsec-many-till manyTill apply parser zero or more times until end succeeds +;; parsec-until *N/A* parse until end succeeds +;; parsec-not-followed-by notFollowedBy succeed when the parser fails +;; parsec-endby endby apply parser zero or more times, separated and ended by end +;; parsec-sepby sepby apply parser zero or more times, separated by sep +;; parsec-between between apply parser between open and close +;; parsec-count count apply parser n times +;; parsec-option option apply parser, if it fails, return opt +;; parsec-optional *N/A* apply parser zero or one time and return the result +;; parsec-optional* optional apply parser zero or one time and discard the result +;; parsec-optional-maybe optionMaybe apply parser zero or one time and return the result in Maybe + +;; Note: +;; - `parsec-or' can also be used to replace `<|>'. +;; - `parsec-with-error-message' is slightly different from `<?>'. It +;; will replace the error message even when the input is consumed. +;; - By default, `parsec-many-till' behaves as Haskell's `manyTill'. +;; However, `parsec-many-till' and `parsec-until' can accept an +;; optional argument to specify which part(s) to be returned. You can +;; use `:both' or `:end' as the optional argument to change the default +;; behavior. See the docstrings for more information. + + +;; 2.3 Parser Utilities +;; ~~~~~~~~~~~~~~~~~~~~ + +;; These utilities can be used together with parser combinators to build +;; a parser and ease the translation process if you're trying to port an +;; existing Haskell program. + +;; parsec.el Haskell's Parsec Usage +;; ------------------------------------------------------------------------------------------------------------- +;; parsec-and do block try all parsers and return the last result +;; parsec-return do block try all parsers and return the first result +;; parsec-ensure *N/A* quit the parsing when an error occurs +;; parsec-ensure-with-error-message *N/A* quit the parsing when an error occurs with new message +;; parsec-collect sequence try all parsers and collect the results into a list +;; parsec-collect* *N/A* try all parsers and collect non-nil results into a list +;; parsec-start parse entry point +;; parsec-parse parse entry point (same as parsec-start) +;; parsec-with-input parse perform parsers on input +;; parsec-from-maybe fromMaybe retrieve value from Maybe +;; parsec-maybe-p *N/A* is a Maybe value or not +;; parsec-query *N/A* change the parser's return value + + +;; 2.4 Variants that Return a String +;; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +;; By default, the macros/functions that return multiple values will put +;; the values into a list. These macros/functions are: +;; - `parsec-many' +;; - `parsec-many1' +;; - `parsec-many-till' +;; - `parsec-until' +;; - `parsec-count' +;; - `parsec-collect' and `parsec-collect*' + +;; They all have a variant that returns a string by concatenating the +;; results in the list: +;; - `parsec-many-as-string' or `parsec-many-s' +;; - `parsec-many1-as-string' or `parsec-many1-s' +;; - `parsec-many-till-as-string' or `parsec-many-till-s' +;; - `parsec-until-as-string' or `parsec-until-s' +;; - `parsec-collect-as-string' or `parsec-collect-s' +;; - `parsec-count-as-string' or `parsec-count-s' + +;; The `*-s' and `*-as-string' variants are the same, except the `*-s' +;; variants have a shorter name. Using these `*-s' functions are +;; recommended if you're dealing with strings very frequently in your +;; code. These variants accept the same arguments and have the same +;; behavior as their original counterpart that returns a list. The only +;; difference is the return value. + + +;; 3 Code Examples +;; =============== + +;; Some very simple examples are given here. You can see many code +;; examples in the test files in this GitHub repo. + +;; The following code extract the "hello" from the comment: +;; ,---- +;; | (parsec-with-input "/* hello */" +;; | (parsec-string "/*") +;; | (parsec-many-till-as-string (parsec-any-ch) +;; | (parsec-try +;; | (parsec-string "*/")))) +;; `---- + +;; The following Haskell program does a similar thing: +;; ,---- +;; | import Text.Parsec +;; | +;; | main :: IO () +;; | main = print $ parse p "" "/* hello */" +;; | where +;; | p = do string "/*" +;; | manyTill anyChar (try (string "*/")) +;; `---- + +;; The following code returns the "aeiou" before "end": +;; ,---- +;; | (parsec-with-input "if aeiou end" +;; | (parsec-str "if ") +;; | (parsec-return +;; | (parsec-many-as-string (parsec-one-of ?a ?e ?i ?o ?u)) +;; | (parsec-str " end"))) +;; `---- + + +;; 4 Write a Parser: a Simple CSV Parser +;; ===================================== + +;; You can find the code in `examples/simple-csv-parser.el'. The code is +;; based on the Haskell code in [Using Parsec]. + +;; An end-of-line should be a string `\n'. We use `(parsec-str "\n")' to +;; parse it (Note that since `\n' is also one character, `(parsec-ch +;; ?\n)' also works). Some files may not contain a newline at the end, +;; but we can view end-of-file as the end-of-line for the last line, and +;; use `parsec-eof' (or `parsec-eob') to parse the end-of-file. We use +;; `parsec-or' to combine these two combinators: +;; ,---- +;; | (defun s-csv-eol () +;; | (parsec-or (parsec-str "\n") +;; | (parsec-eof))) +;; `---- + +;; A CSV file contains many lines and ends with an end-of-file. Use +;; `parsec-return' to return the result of the first parser as the +;; result. +;; ,---- +;; | (defun s-csv-file () +;; | (parsec-return (parsec-many (s-csv-line)) +;; | (parsec-eof))) +;; `---- + +;; A CSV line contains many CSV cells and ends with an end-of-line, and +;; we should return the cells as the results: +;; ,---- +;; | (defun s-csv-line () +;; | (parsec-return (s-csv-cells) +;; | (s-csv-eol))) +;; `---- + +;; CSV cells is a list, containing the first cell and the remaining +;; cells: +;; ,---- +;; | (defun s-csv-cells () +;; | (cons (s-csv-cell-content) (s-csv-remaining-cells))) +;; `---- + +;; A CSV cell consists any character that is not =,= or `\n', and we use +;; the `parsec-many-as-string' variant to return the whole content as a +;; string instead of a list of single-character strings: +;; ,---- +;; | (defun s-csv-cell-content () +;; | (parsec-many-as-string (parsec-none-of ?, ?\n))) +;; `---- + +;; For the remaining cells: if followed by a comma =,=, we try to parse +;; more csv cells. Otherwise, we should return the `nil': +;; ,---- +;; | (defun s-csv-remaining-cells () +;; | (parsec-or (parsec-and (parsec-ch ?,) (s-csv-cells)) nil)) +;; `---- + +;; OK. Our parser is almost done. To begin parsing the content in buffer +;; `foo', you need to wrap the parser inside `parsec-start' (or +;; `parsec-parse'): +;; ,---- +;; | (with-current-buffer "foo" +;; | (goto-char (point-min)) +;; | (parsec-parse +;; | (s-csv-file))) +;; `---- + +;; If you want to parse a string instead, we provide a simple wrapper +;; macro `parsec-with-input', and you feed a string as the input and put +;; arbitraty parsers inside the macro body. `parsec-start' or +;; `parsec-parse' is not needed. +;; ,---- +;; | (parsec-with-input "a1,b1,c1\na2,b2,c2" +;; | (s-csv-file)) +;; `---- + +;; The above code returns: +;; ,---- +;; | (("a1" "b1" "c1") ("a2" "b2" "c2")) +;; `---- + +;; Note that if we replace `parsec-many-as-string' with `parsec-many' in +;; `s-csv-cell-content': +;; ,---- +;; | (defun s-csv-cell-content () +;; | (parsec-many (parsec-none-of ?, ?\n))) +;; `---- + +;; The result would be: +;; ,---- +;; | ((("a" "1") ("b" "1") ("c" "1")) (("a" "2") ("b" "2") ("c" "2"))) +;; `---- + + +;; [Using Parsec] http://book.realworldhaskell.org/read/using-parsec.html + + +;; 5 More Parser Examples +;; ====================== + +;; I translate some Haskell Parsec examples into Emacs Lisp using +;; `parsec.el'. You can see from these examples that it is very easy to +;; write parsers using `parsec.el', and if you know haskell, you can see +;; that basically I just translate the Haskell into Emacs Lisp one by one +;; because most of them are just the same! + +;; You can find five examples under the `examples/' directory. + +;; Three of the examples are taken from the chapter [Using Parsec] in the +;; book of [Real World Haskell]: +;; - `simple-csv-parser.el': a simple csv parser with no support for +;; quoted cells, as explained in previous section. +;; - `full-csv-parser.el': a full csv parser +;; - `url-str-parser.el': parser parameters in URL + +;; `pjson.el' is a translation of Haskell's [json library using Parsec]. + +;; `scheme.el' is a much simplified Scheme parser based on [Write +;; Yourself a Scheme in 48 Hours]. + +;; They're really simple but you can see how this library works! + + +;; [Using Parsec] http://book.realworldhaskell.org/read/using-parsec.html + +;; [Real World Haskell] http://book.realworldhaskell.org/read/ + +;; [json library using Parsec] +;; https://hackage.haskell.org/package/json-0.9.1/docs/src/Text-JSON-Parsec.html + +;; [Write Yourself a Scheme in 48 Hours] +;; https://en.wikibooks.org/wiki/Write_Yourself_a_Scheme_in_48_Hours/ + + +;; 6 Change the Return Values using `parsec-query' +;; =============================================== + +;; Parsing has side-effects such as forwarding the current point. In the +;; original [emacs-pl], you can specify some optional arguments to some +;; parsing functions (`pl-ch', `pl-re' etc.) to change the return values. +;; In `parsec.el', these functions don't have such a behavior. Instead, +;; we provide a unified interface `parsec-query', which accepts any +;; parser, and changes the return value of the parser. + +;; You can speicify following arguments: +;; ,---- +;; | :beg --> return the point before applying the PARSER +;; | :end --> return the point after applying the PARSER +;; | :nil --> return nil +;; | :groups N --> return Nth group for `parsec-re'." +;; `---- + +;; So instead of returning "b" as the result, the following code returns +;; 2: +;; ,---- +;; | (parsec-with-input "ab" +;; | (parsec-ch ?a) +;; | (parsec-query (parsec-ch ?b) :beg)) +;; `---- + +;; Returning a point means that you can also incorporate `parsec.el' with +;; Emacs Lisp functions that can operate on points/regions, such as +;; `goto-char' and `kill-region'. + +;; `:group' can be specified when using `parsec-re': +;; ,---- +;; | (parsec-with-input "ab" +;; | (parsec-query (parsec-re "\\(a\\)\\(b\\)") :group 2)) +;; `---- + +;; The above code will return "b" instead of "ab". + + +;; [emacs-pl] https://github.com/jwiegley/emacs-pl + + +;; 7 Error Messages +;; ================ + +;; `parsec.el' implements a simple error handling mechanism. When an +;; error happens, it will show how the parser fails. + +;; For example, the following code fails: +;; ,---- +;; | (parsec-with-input "aac" +;; | (parsec-count 2 (parsec-ch ?a)) +;; | (parsec-ch ?b)) +;; `---- + +;; The return value is: +;; ,---- +;; | (parsec-error . "Found \"c\" -> Expected \"b\"") +;; `---- + +;; This also works when parser combinators fail: +;; ,---- +;; | (parsec-with-input "a" +;; | (parsec-or (parsec-ch ?b) +;; | (parsec-ch ?c))) +;; `---- + +;; The return value is: +;; ,---- +;; | (parsec-error . "None of the parsers succeeds: +;; | Found \"a\" -> Expected \"c\" +;; | Found \"a\" -> Expected \"b\"") +;; `---- + +;; If an error occurs, the return value is a cons cell that contains the +;; error message in its `cdr'. Compared to Haskell's Parsec, it's really +;; simple, but at least the error message could tell us some information. +;; Yeah, not perfect but usable. + +;; To test whether a parser returns an error, use `parsec-error-p'. If it +;; returns an error, you can use `parsec-error-str' to retrieve the error +;; message as a string. + +;; You can decide what to do based on the return value of a parser: +;; ,---- +;; | (let ((res (parsec-with-input "hello" +;; | (parsec-str "world")))) +;; | (if (parsec-error-p res) +;; | (message "Parser failed:\n%s" (parsec-error-str res)) +;; | (message "Parser succeeded by returning %s" res))) +;; `---- + + +;; 8 Acknowledgement +;; ================= + +;; - Daan Leijen for Haskell's Parsec +;; - [John Wiegley] for [emacs-pl] + + +;; [John Wiegley] https://github.com/jwiegley/ + +;; [emacs-pl] https://github.com/jwiegley/emacs-pl + +;;; Code: + +(require 'cl-lib) + +(defgroup parsec nil + "Parser combinators for Emacs Lisp" + :group 'development) + +(defvar parsec-last-error-message nil) + +(defun parsec-eof-or-char-as-string () + (let ((c (char-after))) + (if c + (char-to-string c) + "`EOF'"))) + +(defun parsec-error-new (msg) + (cons 'parsec-error msg)) + +(defun parsec-error-new-2 (expected found) + (parsec-error-new (format "Found \"%s\" -> Expected \"%s\"" + found expected))) + +(defun parsec-error-p (obj) + (and (consp obj) + (eq (car obj) 'parsec-error))) + +(defalias 'parsec-error-str 'cdr) + +(defsubst parsec-throw (msg) + (throw 'parsec-failed msg)) + +(defun parsec-stop (&rest args) + (parsec-throw + (setq parsec-last-error-message + (let ((msg (plist-get args :message)) + (expected (plist-get args :expected)) + (found (plist-get args :found))) + (when (or (stringp msg) + (and (stringp expected) + (stringp found))) + (if (stringp msg) + (parsec-error-new msg) + (parsec-error-new-2 expected found))))))) + +(defun parsec-ch (ch) + "Parse a character CH." + (let ((next-char (char-after))) + (if (and (not (eobp)) + (char-equal next-char ch)) + (progn (forward-char 1) + (char-to-string ch)) + (parsec-stop :expected (char-to-string ch) + :found (parsec-eof-or-char-as-string))))) + +(defun parsec-any-ch () + "Parse any character." + (if (not (eobp)) + (prog1 (char-to-string (char-after)) + (forward-char)) + (parsec-stop :expected "any char" + :found (parsec-eof-or-char-as-string)))) + +(defun parsec-satisfy (pred) + "Parse any character that satisfies the predicate PRED." + (let ((next-char (char-after))) + (if (and (not (eobp)) + (funcall pred next-char)) + (progn (forward-char 1) + (char-to-string next-char)) + (parsec-stop :expected (format "%s" pred) + :found (parsec-eof-or-char-as-string))))) + +(defun parsec-newline () + "Parse a newline character \"\\n\"." + (parsec-ch ?\n)) + +(defun parsec-crlf () + "Parse a carriage return (\'\\r\') followed by a newline \"\\n\"." + (parsec-and (parsec-ch ?\r) (parsec-ch ?\n))) + +(defun parsec-eol () + "Parse a newline or a CRLF and return \"\\n\"." + (parsec-or (parsec-newline) (parsec-crlf))) + +(defun parsec-eob () + "Indicate the end of file (buffer)." + (unless (eobp) + (parsec-stop :expected "`EOF'" + :found (parsec-eof-or-char-as-string)))) + +(defalias 'parsec-eof 'parsec-eob) + +(defun parsec-eol-or-eof () + "Indicate either eol or eof." + (parsec-or (parsec-eol) (parsec-eof))) + +(defun parsec-re (regexp) + "Parse the input matching the regular expression REGEXP." + (if (looking-at regexp) + (progn (goto-char (match-end 0)) + (match-string 0)) + (parsec-stop :expected regexp + :found (parsec-eof-or-char-as-string)))) + +(defun parsec-make-alternatives (chars) + (let ((regex-head "") + (regex-str "") + (regex-end "") + contains-caret-p) + (dolist (c chars) + (cond + ((char-equal c ?\]) (setq regex-head "]")) + ((char-equal c ?-) (setq regex-end "-")) + ((char-equal c ?^) (setq contains-caret-p t)) + (t (setq regex-str (concat regex-str (char-to-string c)))))) + (when contains-caret-p + (if (and + (string-equal regex-end "-") + (string-equal regex-head "") + (string-equal regex-str "")) + (setq regex-end "-^") + (setq regex-str (concat regex-str "^")))) + (concat regex-head regex-str regex-end))) + +(defun parsec-one-of (&rest chars) + "Succeed if the current character is in the supplied list of CHARS. +Return the parsed character. + +> (parsec-one-of ?a ?e ?i ?o ?u) + +Note this function is just a wrapper of `parsec-re'. For complicated use cases, +consider using `parsec-re' instead." + (parsec-re (format "[%s]" (parsec-make-alternatives chars)))) + +(defun parsec-none-of (&rest chars) + "Succeed if the current character not in the supplied list of CHARS. +Return the parsed character. + +> (parsec-none-of ?a ?e ?i ?o ?u) + +Note this function is just a wrapper of `parsec-re'. For complicated use cases, +consider using `parsec-re' instead." + (parsec-re (format "[^%s]" (parsec-make-alternatives chars)))) + +(defsubst parsec-str (str) + "Parse STR and only consume the input for an exact match. +Return the parsed string. + +Note this function's behavior is different from the `string' +function of Haskll's Parsec. Use `parsec-string' if you want the +same behavior as in Haskell." + (parsec-re (regexp-quote str))) + +(defsubst parsec-string (str) + "Parse STR and consume the input even for a partial match. +Return the parsed string. + +It is equivalent to calling `parsec-ch' multiples times so the +input will be consumed if the parser fails in the middle of the +STR. This function has the same behavior as the `string' function +of Haskell's Parsec. See also `parsec-str'." + (mapc (lambda (c) (parsec-ch c)) str)) + +(defsubst parsec-num (num) + "Parse the number NUM and return the parsed number as a string." + (parsec-re (regexp-quote (number-to-string num)))) + +(defsubst parsec-letter () + "Parse any English letter." + (parsec-re "[a-zA-Z]")) + +(defsubst parsec-digit () + "Parse any digit." + (parsec-re "[0-9]")) + +(defmacro parsec-or (&rest parsers) + "Try the PARSERS one by one. +If the current parser succeeds, return its results. If the +current parser fails without consuming any input, try the next +parser if available. This combinator fails if the current parser +fails after consuming some input or there is no more parsers." + (let ((parser-sym (make-symbol "parser")) + (error-sym (make-symbol "err")) + (error-str-list-sym (make-symbol "err-list"))) + `(let (,error-str-list-sym ,parser-sym ,error-sym) + (catch 'parsec-failed-or + ,@(mapcar + (lambda (parser) + `(parsec-protect-atom parsec-or + (parsec-start + (throw 'parsec-failed-or + (parsec-eavesdrop-error ,error-sym + (parsec-make-atom parsec-or ,parser) + (push (parsec-error-str ,error-sym) ,error-str-list-sym)))))) + parsers) + (parsec-stop + :message + (replace-regexp-in-string + "\n" "\n\t" + (concat "None of the parsers succeeds:\n" + (mapconcat #'identity ,error-str-list-sym "\n")))))))) + +(defalias 'parsec-and 'progn + "Eval BODY sequentially and return the result of the last parser. +This combinator fails if one of the parsers fails.") + +(defalias 'parsec-return 'prog1 + "Eval FIRST and BODY sequentially and return the results of the first parser. +This combinator fails if one of the parsers fails.") + +(defalias 'parsec-collect 'list + "Collect the results of all the parsers OBJECTS into a list.") + +(defun parsec-collect* (&rest args) + "Collect the non-nil results of all the parsers ARGS into a list." + (delq nil (apply #'parsec-collect args))) + +(defmacro parsec-collect-as-string (&rest forms) + "Collect the results of all the parsers FORMS as a string." + `(parsec-list-to-string (parsec-collect ,@forms))) + +(defalias 'parsec-collect-s 'parsec-collect-as-string) + +(defmacro parsec-start (&rest forms) + "Eval the parsers FORMS and return the results or a `parsec-error'. +This combinator should be used at the top level as the entry +point of your parsing program." + `(catch 'parsec-failed ,@forms)) + +(defalias 'parsec-parse 'parsec-start) + +(defmacro parsec-try (parser) + "Try PARSER, and pretend that no input is consumed when an error occurs." + (let ((orig-pt-sym (make-symbol "orig-pt")) + (error-sym (make-symbol "err"))) + `(let ((,orig-pt-sym (point))) + (parsec-eavesdrop-error ,error-sym + (parsec-and ,parser) + (goto-char ,orig-pt-sym))))) + +(defmacro parsec-lookahead (parser) + "Try PARSER, and pretend that no input is consumed when it succeeds." + (let ((orig-pt-sym (make-symbol "orig-pt"))) + `(let ((,orig-pt-sym (point))) + (parsec-return ,parser + (goto-char ,orig-pt-sym))))) + +(defsubst parsec--atom-tag (name) + (intern (format "parsec-failed-at-half-%s" name))) + +(defmacro parsec-protect-atom (name parser) + "This must be used together with `parsec-make-atom'." + (declare (indent 1)) + (let ((tag (parsec--atom-tag name))) + `(catch 'parsec-failed-protect-atom + (parsec-throw (catch ',tag + (throw 'parsec-failed-protect-atom ,parser)))))) + +(defmacro parsec-make-atom (name parser) + (let ((orig-pt-sym (make-symbol "orig-pt")) + (error-sym (make-symbol "err")) + (tag (parsec--atom-tag name))) + `(let ((,orig-pt-sym (point))) + (parsec-eavesdrop-error ,error-sym + ,parser + (unless (= (point) ,orig-pt-sym) + (throw ',tag ,error-sym)))))) + +(defmacro parsec-eavesdrop-error (error-sym parser &rest handler) + (declare (indent 2)) + `(catch 'parsec-failed-eavesdrop-error + (let ((,error-sym (parsec-start + (throw 'parsec-failed-eavesdrop-error ,parser)))) + ,@handler + (parsec-throw ,error-sym)))) + +(defmacro parsec-with-error-message (msg &rest forms) + "Use MSG as the error message if an error occurs when Evaling the FORMS." + (declare (indent 1)) + `(parsec-eavesdrop-error _ + (parsec-and ,@forms) + (parsec-throw (parsec-error-new ,msg)))) + +(defmacro parsec-ensure (&rest forms) + "Exit the program immediately if FORMS fail." + (let ((error-sym (make-symbol "err"))) + `(parsec-eavesdrop-error ,error-sym + (parsec-and ,@forms) + (error "%s" (parsec-error-str ,error-sym))))) + +(defmacro parsec-ensure-with-error-message (msg &rest forms) + "Exit the program immediately with MSG if FORMS fail." + (declare (indent 1)) + `(parsec-ensure + (parsec-with-error-message ,msg + (parsec-and ,@forms)))) + +(defmacro parsec-many (parser) + "Apply the PARSER zero or more times and return a list of the results." + (let ((res-sym (make-symbol "results"))) + `(let (,res-sym) + (parsec-protect-atom parsec-many + (parsec-start + (while (not (eobp)) + (push (parsec-make-atom parsec-many ,parser) ,res-sym)))) + (nreverse ,res-sym)))) + +(defmacro parsec-many1 (parser) + "Apply the PARSER one or more times and return a list of the results." + `(cons ,parser (parsec-many ,parser))) + +(defsubst parsec-list-to-string (l) + (if (stringp l) + l + (mapconcat #'identity l ""))) + +(defmacro parsec-many-as-string (parser) + "Apply the PARSER zero or more times and return the results as a string." + `(mapconcat #'identity (parsec-many ,parser) "")) + +(defalias 'parsec-many-s 'parsec-many-as-string) + +(defmacro parsec-many1-as-string (parser) + "Apply the PARSER one or more times and return the results as a string." + `(mapconcat #'identity (parsec-many1 ,parser) "")) + +(defalias 'parsec-many1-s 'parsec-many1-as-string) + +(defmacro parsec-many-till (parser end &optional type) + "Apply PARSER zero or more times until END succeeds. +The return value is determined by TYPE. If TYPE is `:both', return +the cons `(many . end)'. If TYPE is `:end', return the result of END. +In other cases, return the result of PARSER. + +Used to scan comments: + +> (parsec-and +> (parsec-str \"<--\") +> (parsec-many-till (parsec-any-ch) (parsec-str \"-->\")))" + + (let ((res-sym (make-symbol "results")) + (end-res-sym (make-symbol "end-result"))) + `(let ((,res-sym nil) ,end-res-sym) + (setq ,end-res-sym + (catch 'parsec-failed-many-till + (while t + (parsec-or (throw 'parsec-failed-many-till ,end) + (push ,parser ,res-sym))))) + (setq ,res-sym (nreverse ,res-sym)) + ,(cond + ((eq type :both) `(cons ,res-sym ,end-res-sym)) + ((eq type :end) end-res-sym) + (t res-sym))))) + +(defmacro parsec-many-till-as-string (parser end &optional type) + "Apply PARSER zero or more times until END succeeds. +Return the result of PARSER or END as a string. TYPE has the same +meaning as `parsec-many-till'." + (let ((res-sym (make-symbol "results"))) + (cond + ((eq type :both) + `(let ((,res-sym (parsec-many-till ,parser ,end ,type))) + (cons (parsec-list-to-string (car ,res-sym)) + (parsec-list-to-string (cdr ,res-sym))))) + (t + `(parsec-list-to-string (parsec-many-till ,parser ,end ,type)))))) + +(defalias 'parsec-many-till-s 'parsec-many-till-as-string) + +(defmacro parsec-until (parser &optional type) + "Parse any characters until PARSER succeeds. +TYPE has the same meaning as `parsec-many-till'." + `(parsec-many-till (parsec-any-ch) ,parser ,type)) + +(defmacro parsec-until-as-string (parser &optional type) + "Parse any characters until PARSER succeeds. +Return the result of either part as a string. TYPE has the same +meaning as `parsec-many-till'." + `(parsec-many-till-as-string (parsec-any-ch) ,parser ,type)) + +(defalias 'parsec-until-s 'parsec-until-as-string) + +(defmacro parsec-not-followed-by (parser) + "Succeed only when PARSER fails. Consume no input." + (let ((res-sym (make-symbol "results"))) + `(catch 'parsec-failed-not-followed-by-out + (parsec-try + (let ((,res-sym + (catch 'parsec-failed-not-followed-by-in + (throw 'parsec-failed-not-followed-by-out + (parsec-or (throw 'parsec-failed-not-followed-by-in (parsec-try ,parser)) + nil))))) + (parsec-stop :message (format "Unexpected followed by: %s" ,res-sym))))))) + +(defmacro parsec-endby (parser end) + "Parse zero or more occurrences of PARSER, separated and ended by END. +Return a list of values returned by PARSER." + `(parsec-many (parsec-return ,parser + ,end))) + +(defmacro parsec-sepby (parser separator) + "Parse zero or more occurrences of PARSER, separated by SEPARATOR. +Return a list of values returned by PARSER." + `(parsec-or + (cons ,parser (parsec-many (parsec-and ,separator ,parser))) + nil)) + +(defmacro parsec-between (open close parser) + "Parse OPEN, followed by PARSER and CLOSE. +Return the value returned by PARSER." + `(parsec-and + ,open + (parsec-return ,parser + ,close))) + +(defmacro parsec-count (n parser) + "Parse N occurrences of PARSER. +Return a list of N values returned by PARSER." + (let ((res-sym (make-symbol "results"))) + `(let (,res-sym) + (dotimes (_ ,n ,res-sym) + (push ,parser ,res-sym))))) + +(defmacro parsec-count-as-string (n parser) + "Parse N occurrences of PARSER. +Return the N values returned by PARSER as a string." + `(parsec-list-to-string (parsec-count ,n ,parser))) + +(defalias 'parsec-count-s 'parsec-count-as-string) + +(defmacro parsec-option (opt parser) + "Try to apply PARSER and return OPT if PARSER fails without comsuming input." + `(parsec-or ,parser ,opt)) + +(defmacro parsec-optional (parser) + "Apply PARSER zero or one time. Fail if PARSER fails after consuming input. +Return the result of PARSER or nil. + +Note this combinator doesn't discard the result of PARSER so it is +different from the `optional' function of Haskell's Parsec. If +you want the Haskell's behavior, use `parsec-optional*'." + `(parsec-or ,parser nil)) + +(defmacro parsec-optional* (parser) + "Apply PARSER zero or one time and discard the result. +Fail if PARSER fails after consuming input. + +This combinator has the same behavior as the `optional' function of +Haskell's Parsec." + `(parsec-and ,parser nil)) + +(defmacro parsec-peek (parser) + "Apply PARSER without consuming any input. +When PARSER succeeds, the result of the PARSER is returned. +Otherwise, the return value is an error. Use `parsec-error-p' on +the return value to see whether the PARSER fails or not. Use +`parsec-peek-p' if you want nil to be returned when PARSER fails. + +This is a shortcut of combining `parsec-start', `parsec-try' and +`parsec-lookahead'. Since arbitrary parser is allowed, this +function can be viewed as a more powerful version of `looking-at' +in Emacs Lisp." + `(parsec-start + (parsec-try + (parsec-lookahead ,parser)))) + +(defmacro parsec-peek-p (parser) + "Same as `parsec-peek' except a nil is returned when the PARSER fails." + (let ((res-sym (make-symbol "res"))) + `(let ((,res-sym (parsec-peek ,parser))) + (unless (parsec-error-p ,res-sym) + ,res-sym)))) + +(defmacro parsec-query (parser &rest args) + "Get an alternative return value of the PARSER specified by the ARGS. + +The args can be in the following forms: + + :beg --> return the point before applying the PARSER + :end --> return the point after applying the PARSER + :nil --> return nil + :groups N --> return Nth group for `parsec-re'." + (let ((orig-pt-sym (make-symbol "orig-pt")) + (res-sym (make-symbol "results"))) + `(let ((,orig-pt-sym (point)) + (,res-sym ,parser)) + ,(cond + ((memq :beg args) orig-pt-sym) + ((memq :end args) '(point)) + ((memq :nil args) nil) + ((and (memq :group args) + (consp parser) + (eq (car parser) 'parsec-re)) + (let ((group + (cl-loop named outer for arg on args + when (eq (car arg) :group) do + (cl-return-from outer (cadr arg))))) + (if (and group (integerp group)) + `(match-string ,group) + (error "Invalid query :group %s" group)))) + (t res-sym))))) + +(defsubst parsec-just (x) (cons 'Just x)) + +(defconst parsec-nothing 'Nothing) + +(defun parsec-maybe-p (x) + (or (eq x parsec-nothing) + (and + (consp x) + (eq (car x) 'Just)))) + +(defun parsec-from-maybe (x) + "Retrieve the value from Maybe monad X. +If X is `(Just . p)', return p. Otherwise return nil." + (and (consp x) + (eq (car x) 'Just) + (cdr x))) + +(defmacro parsec-optional-maybe (parser) + "Apply PARSER zero or one time and return the value in a Maybe monad. +If PARSER fails without consuming any input, return `parsec-nothing'. +Otherwise, return `(Just . p)' where p is the result of PARSER." + (let ((res-sym (make-symbol "result"))) + `(let ((,res-sym (parsec-optional ,parser))) + (if ,res-sym + (parsec-just ,res-sym) + parsec-nothing)))) + +(defmacro parsec-with-input (input &rest parsers) + "With INPUT, start parsing by applying PARSERS sequentially." + (declare (indent 1)) + `(with-temp-buffer + (insert ,input) + (goto-char (point-min)) + (parsec-start + ,@parsers))) + +(provide 'parsec) +;;; parsec.el ends here