branch: elpa/clojure-ts-mode commit cb2cb18640d3f643ea5c87cda8e5caaaab1e4015 Author: Roman Rudakov <rruda...@fastmail.com> Commit: Bozhidar Batsov <bozhi...@batsov.dev>
Update the design documentation --- clojure-ts-mode.el | 54 +++++++++++++++++--------------------- doc/design.md | 76 +++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 79 insertions(+), 51 deletions(-) diff --git a/clojure-ts-mode.el b/clojure-ts-mode.el index 03a93af85b..a5b504bcbb 100644 --- a/clojure-ts-mode.el +++ b/clojure-ts-mode.el @@ -306,7 +306,7 @@ Only intended for use at development time.") "Syntax table for `clojure-ts-mode'.") (defconst clojure-ts--builtin-dynamic-var-regexp - (eval-and-compile + (eval-when-compile (concat "^" (regexp-opt '("*1" "*2" "*3" "*agent*" @@ -323,7 +323,7 @@ Only intended for use at development time.") "$"))) (defconst clojure-ts--builtin-symbol-regexp - (eval-and-compile + (eval-when-compile (concat "^" (regexp-opt '("do" "if" "let*" "var" @@ -372,52 +372,46 @@ Only intended for use at development time.") (concat "^" (regexp-opt symbols) "$")) (defconst clojure-ts-function-docstring-symbols - (eval-and-compile - (rx line-start - (or "definline" - "defmulti" - "defmacro" - "defn" - "defn-" - "defprotocol" - "ns") - line-end)) + (rx line-start + (or "definline" + "defmulti" + "defmacro" + "defn" + "defn-" + "defprotocol" + "ns") + line-end) "Symbols that accept an optional docstring as their second argument.") (defconst clojure-ts-definition-docstring-symbols - (eval-and-compile - (rx line-start "def" line-end)) + (rx line-start "def" line-end) "Symbols that accept an optional docstring as their second argument. Any symbols added here should only treat their second argument as a docstring if a third argument (the value) is provided. \"def\" is the only builtin Clojure symbol that behaves like this.") (defconst clojure-ts--variable-definition-symbol-regexp - (eval-and-compile - (rx line-start (or "def" "defonce") line-end)) + (rx line-start (or "def" "defonce") line-end) "A regular expression matching a symbol used to define a variable.") (defconst clojure-ts--typedef-symbol-regexp - (eval-and-compile - (rx line-start - (or "defprotocol" "defmulti" "deftype" "defrecord" - "definterface" "defmethod" "defstruct") - line-end)) + (rx line-start + (or "defprotocol" "defmulti" "deftype" "defrecord" + "definterface" "defmethod" "defstruct") + line-end) "A regular expression matching a symbol used to define a type.") (defconst clojure-ts--type-symbol-regexp - (eval-and-compile - (rx line-start - (or "deftype" "defrecord" - ;; While not reifying, helps with doc strings - "defprotocol" "definterface" - "reify" "proxy" "extend-type" "extend-protocol") - line-end)) + (rx line-start + (or "deftype" "defrecord" + ;; While not reifying, helps with doc strings + "defprotocol" "definterface" + "reify" "proxy" "extend-type" "extend-protocol") + line-end) "A regular expression matching a symbol used to define or instantiate a type.") (defconst clojure-ts--interface-def-symbol-regexp - (eval-and-compile - (rx line-start (or "defprotocol" "definterface") line-end)) + (rx line-start (or "defprotocol" "definterface") line-end) "A regular expression matching a symbol used to define an interface.") (defun clojure-ts--docstring-query (capture-symbol) diff --git a/doc/design.md b/doc/design.md index 8afeaffee7..4c19319054 100644 --- a/doc/design.md +++ b/doc/design.md @@ -32,29 +32,43 @@ In short: ## tree-sitter-clojure -Clojure-ts-mode uses the tree-sitter-clojure grammar, which can be found at <https://github.com/sogaiu/tree-sitter-clojure> -The clojure-ts-mode grammar provides very basic, low level nodes that try to match Clojure's very light syntax. +`clojure-ts-mode` uses the experimental version tree-sitter-clojure grammar, which +can be found at +<https://github.com/sogaiu/tree-sitter-clojure/tree/unstable-20250526>. The +`clojure-ts-mode` grammar provides very basic, low level nodes that try to match +Clojure's very light syntax. There are nodes to represent: -- Symbols (sym_lit) - - Contain (sym_ns) and (sym_name) nodes -- Keywords (kwd_lit) - - Contain (kwd_ns) and (kw_name) nodes -- Strings (str_lit) -- Chars (char_lit) -- Nil (nil_lit) -- Booleans (bool_lit) -- Numbers (num_lit) -- Comments (comment, dis_expr) - - dis_expr is the `#_` discard expression -- Lists (list_list) -- Vectors (vec_lit) -- Maps (map_lit) -- Sets (set_lit) - -There are also nodes to represent metadata, which appear on `meta:` child fields of the nodes the metadata is defined on. -For example a simple vector with metadata defined on it like so +- Symbols `(sym_lit)` + - Contain `(sym_ns)` and `(sym_name)` nodes +- Keywords `(kwd_lit)` + - Contain `(kwd_ns)` and `(kw_name)` nodes +- Strings `(str_lit)` + - Contains `(str_content)` node +- Chars `(char_lit)` +- Nil `(nil_lit)` +- Booleans `(bool_lit)` +- Numbers `(num_lit)` +- Comments `(comment, dis_expr)` + - `dis_expr` is the `#_` discard expression +- Lists `(list_list)` +- Vectors `(vec_lit)` +- Maps `(map_lit)` +- Sets `(set_lit)` +- Metadata nodes `(meta_lit)` +- Regex content `(regex_content)` +- Function literals `(anon_fn_lit)` + +The best place to learn more about the tree-sitter-clojure grammar is to read +the [grammar.js file from the tree-sitter-clojure repository](https://github.com/sogaiu/tree-sitter-clojure/blob/master/grammar.js "grammar.js"). + +### Difference between stable grammar and experimental + +#### Standalone metadata nodes + +Metadata nodes in stable grammar appear as child nodes of the nodes the metadata +is defined on. For example a simple vector with metadata defined on it like so: ```clojure ^:has-metadata [1] @@ -69,7 +83,27 @@ will produce a parse tree like so value: (num_lit)) ``` -The best place to learn more about the tree-sitter-clojure grammar is to read the [grammar.js file from the tree-sitter-clojure repository](https://github.com/sogaiu/tree-sitter-clojure/blob/master/grammar.js "grammar.js"). +Although it's somewhat closer to hoe Clojure treats metadata itself, in the +context of a text editor it creates some problems, which were discussed [here](https://github.com/sogaiu/tree-sitter-clojure/issues/65). To +name a few: +- `forward-sexp` command would skip both, metadata and the node it's attached + to. Called from an opening paren it would signal an error "No more sexp to + move across". +- `kill-sexp` command would kill both, metadata and the node it's attached to. +- `backward-up-list` called from the inside of a list with metadata would move + point to the beginning of metadata node. +- Internally we had to introduce some workarounds to skip metadata nodes or + figure out where the actual node starts. + +#### Special nodes for string content and regex content + +To parse the content of certain strings with a separate grammar, it is necessary +to extract the string's content, excluding its opening and closing quotes. To +achieve this, Emacs 31 allows specifying offsets for `treesit-range-settings`. +However, in Emacs 30.1, this feature is broken due to bug [#77848](https://debbugs.gnu.org/cgi/bugreport.cgi?bug=77848) (a fix is +anticipated in Emacs 30.2). The presence of `str_content` and `regex_content` nodes +allows us to support this feature across all Emacs versions without relying on +offsets. ### Clojure Syntax, not Clojure Semantics