branch: elpa/adoc-mode commit 4ded97193ca8b1ae30d05d0c37b4dfe47059a4d4 Author: TobiasZawada <i...@tn-home.de> Commit: GitHub <nore...@github.com>
Implement fontification of source blocks (#21) The method is adapted from Org and consists of the following steps: 1. Create temp buffer 2. copy source code there 3. apply font-lock in the temp buffer 4. transfer text properties back to the adoc buffer --- README.adoc | 8 ++ adoc-mode.el | 314 ++++++++++++++++++++++++++++++++++++++++++------- test/adoc-mode-test.el | 45 +++++++ 3 files changed, 327 insertions(+), 40 deletions(-) diff --git a/README.adoc b/README.adoc index 999769e6c3..71fb83755d 100644 --- a/README.adoc +++ b/README.adoc @@ -34,6 +34,7 @@ be easily ignored. Here are some of the main features of `adoc-mode`: - sophisticated highlighting +- native fontification of code blocks - promote / demote title - toggle title type between one line title and two line title - adjust underline length of a two line title to match title text's length @@ -97,6 +98,13 @@ or if you're into `use-package`: `buffer-face-mode` is for you: `(add-hook 'adoc-mode-hook (lambda() (buffer-face-mode t)))` +* Settings regarding native fontification of source blocks: +** Native fontification of source blocks can be switched off by setting `adoc-fontify-code-blocks-natively` to `nil`. +** Native fontification of lengthy code blocks can cause performance problems. If the value of `adoc-fontify-code-blocks-natively` is an integer only those code blocks are fontified natively whose length is less or equal to that value. +** To avoid performance problems with code block beginnings that do not have a matching end yet the scanning for the code block end is delimited by `adoc-font-lock-extend-after-change-max`. +** All programming languages `XYZ` that have an Emacs major mode `XYZ-mode` and use `font-lock` are automatically supported. Some other languages not fitting into that name scheme are supported through the alist `adoc-code-lang-modes`. You can add your own languages and modes there if they work based on `font-lock` and are not automatically supported. +** The fall-back language mode is `prog-mode` without any fontification. You can set your own default by `adoc-fontify-code-block-default-mode`. + === Syntax Highlighting Customization It is possible to customize the way `adoc-mode` renders different text diff --git a/adoc-mode.el b/adoc-mode.el index df2c9d66d5..7734433876 100644 --- a/adoc-mode.el +++ b/adoc-mode.el @@ -7,7 +7,7 @@ ;; URL: https://github.com/bbatsov/adoc-mode ;; Maintainer: Bozhidar Batsov <bozhi...@batsov.dev> ;; Created: 2009 -;; Version: 0.7.0 +;; Version: 0.8.0-snapshot ;; Package-Requires: ((emacs "26")) ;; Keywords: docs, wp ;; @@ -45,8 +45,8 @@ (require 'cl-lib) (require 'tempo) -(defconst adoc-mode-version "0.7.0" - "Adoc-mode version number. +(defconst adoc-mode-version "0.8.0-snapshot" + "adoc mode version number. Based upon AsciiDoc version 8.5.2. I.e. regexeps and rules are taken from that version's asciidoc.conf / manual.") @@ -55,7 +55,7 @@ taken from that version's asciidoc.conf / manual.") (defgroup adoc nil "Support for editing AsciiDoc files in GNU Emacs." :group 'text :prefix "adoc-" - :version "0.7.0" + :version "0.8.0" :link '(url-link "https://github.com/bbatsov/adoc-mode")) (defcustom adoc-script-raise '(-0.3 0.3) @@ -78,13 +78,13 @@ You need to call `adoc-calc' after a change." (defcustom adoc-insert-replacement nil "When non-nil the character/string a replacement/entity stands for is displayed. -E.g. after '&' an '&' is displayed, after '(C)' the copy right -sign is displayed. It's only about display, neither the file nor +E.g. after \\='&\\=' an \\='&\\=' is displayed, after \\='(C)\\=' the copy right +sign is displayed. It is only about display, neither the file nor the buffer content is affected. You need to call `adoc-calc' after you change `adoc-insert-replacement'. For named character entities (e.g. -'&', in contrast to '' or '(C)' ) to be displayed you +\\='&\\=', in contrast to \\='\\=' or \\='(C)\\=' ) to be displayed you need to set `adoc-unichar-name-resolver'. Setting it to non-nil interacts very badly with minor-modes using @@ -238,6 +238,70 @@ See for example `tempo-template-adoc-title-1'." (const :tag "tempo-snippets" tempo-snippets)) :group 'adoc) +(defcustom adoc-fontify-code-blocks-natively 5000 + "When non-nil, fontify code in code blocks using the native major mode. +This only works for code blocks where the language is +specified where we can automatically determine the appropriate +mode to use. The language to mode mapping may be customized by +setting the variable `adoc-code-lang-modes'. + +The value can be a number that determines the size +up to which code blocks are fontified natively. +If the value is another non-nil value then code blocks +are fontified natively regardless of their size." + :group 'adoc + :type '(choice :tag "Fontify code blocks " :format "\n%{%t%}: %[Size%] %v" + (integer :tag "limited to") + (boolean :tag "unlimited")) + :safe '(lambda (x) (or (booleanp x) (numberp x))) + :package-version '(adoc-mode . "0.8.0")) + +;; This is based on `org-src-lang-modes' from org-src.el +(defcustom adoc-code-lang-modes + '( + ("asymptote" . asy-mode) + ("bash" . sh-mode) + ("C" . c-mode) + ("cpp" . c++-mode) + ("C++" . c++-mode) + ("calc" . fundamental-mode) + ("ditaa" . artist-mode) + ("dot" . fundamental-mode) + ("elisp" . emacs-lisp-mode) + ("ocaml" . tuareg-mode) + ("screen" . shell-script-mode) + ("shell" . sh-mode) + ("sqlite" . sql-mode) + ) + "Alist mapping languages to their major mode. +The key is the language name, the value is the major mode. For +many languages this is simple, but for language where this is not +the case, this variable provides a way to simplify things on the +user side. For example, there is no ocaml-mode in Emacs, but the +mode to use is `tuareg-mode'." + :group 'adoc + :type '(repeat + (cons + (string "Language name") + (symbol "Major mode"))) + :package-version '(adoc-mode . "0.8.0")) + +(defcustom adoc-fontify-code-block-default-mode 'prog-mode + "Default mode to use to fontify code blocks. +This mode is used when automatic detection fails, such as for +code blocks with no language specified." + :group 'adoc + :type '(choice function (const :tag "None" nil)) + :package-version '(adoc-mode . "0.8.0")) + +(defcustom adoc-font-lock-extend-after-change-max 5000 + "Number of chars scanned backwards for re-fontification of code block headers. +Also used to delimit the scan for the end delimiter." + :type 'integer + :group 'adoc + :package-version '(adoc-mode . "0.8.0")) + + ;;;; faces / font lock (define-obsolete-face-alias 'adoc-orig-default 'adoc-align-face "23.3") (defface adoc-align-face @@ -604,15 +668,15 @@ easier for major mode to write font lock regular expressions." '((default (:inherit adoc-meta-face)) (((background light)) :foreground "gray75") (((background dark)) :foreground "gray25")) - "For meta characters which can be \='hidden\='. -Hidden in the sense of *almost* not visible. They don't need to + "For meta characters which can be \\='hidden\\='. +Hidden in the sense of *almost* not visible. They does not need to be properly seen because one knows what these characters must be; deduced from the highlighting of the near context. E.g in -AsciiDocs \='_important_\=', the underlines would be highlighted with -adoc-hide-delimiter-face, and the text \='important\=' would be -highlighted with adoc-emphasis-face. Because 'important' is +AsciiDocs \\='_important_\\=', the underlines would be highlighted with +adoc-hide-delimiter-face, and the text \\='important\\=' would be +highlighted with adoc-emphasis-face. Because \\='important\\=' is highlighted, one knows that it must be surrounded with the meta -characters \='_\=', and thus the meta characters don't need to be +characters \\='_\\=', and thus the meta characters do not need to be properly seen. For example: AsciiDoc: *bold emphasis text* or _emphasis text_ @@ -691,12 +755,12 @@ AsciiDoc: *bold emphasis text* or _emphasis text_ "For verbatim text. Verbatim in a sense that all its characters are to be taken -literally. Note that doesn't necessarily mean that that it is in +literally. Note that does not necessarily mean that that it is in a typewritter font. -For example 'foo' in the following examples. In parantheses is a +For example \\='foo\\=' in the following examples. In parantheses is a summary what the command is for according to the given markup language. -`foo` (verbatim and typewriter font) +\\=`foo\\=` (verbatim and typewriter font) +++foo+++ (only verbatim)" :group 'adoc-faces) (defvar adoc-verbatim-face 'adoc-verbatim-face) @@ -726,16 +790,16 @@ language. "Meta characters that are replaced by text in the output. See also `adoc-complex-replacement-face'. For example -AsciiDoc: '->' is replaced by an Unicode arrow -It's difficult to say whether adoc-replacement-face is part of +AsciiDoc: \\='->\\=' is replaced by an Unicode arrow +It is difficult to say whether adoc-replacement-face is part of the group adoc-faces-meta or part of the group adoc-faces-text. Technically they are clearly meta characters. However they are just another representation of normal text and I -want to fontify them as such. E.g. in HTML '<b>foo & bar</b>', -the output 'foo & bar' is fontified bold, thus I also want 'foo -& bar' in the Emacs buffer be fontified with -markup-bold-face. Thus markup-replacement-face needs to be -something that is orthogonal to the markup-bold-face etc faces." +want to fontify them as such. E.g. in HTML \\='<b>foo & bar</b>\\=', +the output \\='foo & bar\\=' is fontified bold, thus I also want \\='foo +& bar\\=' in the Emacs buffer be fontified with +adoc-bold-face. Thus adoc-replacement-face needs to be +something that is orthogonal to the adoc-bold-face etc faces." :group 'adoc-faces) (defvar adoc-replacement-face 'adoc-replacement-face) @@ -766,9 +830,9 @@ something that is orthogonal to the markup-bold-face etc faces." (defface adoc-superscript-face '((t :inherit adoc-gen-face :height 0.8)) "For superscript text. -For example 'foo' in the ^foo^ +For example \\='foo\\=' in the ^foo^ Note that typically the major mode doing the font lock -additionaly raises the text; face customization doesn't provide +additionaly raises the text; face customization does not provide this feature." :group 'adoc-faces) (defvar adoc-superscript-face 'adoc-superscript-face) @@ -776,9 +840,9 @@ this feature." (defface adoc-subscript-face '((t :inherit adoc-gen-face :height 0.8)) "For subscript text. -For example 'foo' in the ~foo~ +For example \\='foo\\=' in the ~foo~ Note that typically the major mode doing the font lock -additionally lowers the text; face customization doesn't provide +additionally lowers the text; face customization does not provide this feature." :group 'adoc-faces) (defvar adoc-subscript-face 'adoc-subscript-face) @@ -829,9 +893,9 @@ this feature." '((t :inherit (fixed-pitch adoc-gen-face))) "For text in typewriter/monospaced font. - For example 'foo' in the following examples: + For example \\='foo\\=' in the following examples: +foo+ (only typewriter font) - `foo` (verbatim and typewriter font)" + \\=`foo\\=` (verbatim and typewriter font)" :group 'adoc-faces) (defvar adoc-typewriter-face 'adoc-typewriter-face) @@ -844,10 +908,19 @@ this feature." (defface adoc-secondary-text-face '((t :inherit adoc-gen-face :foreground "firebrick" :height 0.9)) "For text that is not part of the running text. - For example for captions of tables or images, or for footnotes, or for floating text." +For example for captions of tables or images, +or for footnotes, or for floating text." :group 'adoc-faces) (defvar adoc-secondary-text-face 'adoc-secondary-text-face) +(defface adoc-native-code-face + '((((background light)) + (:background "cornsilk" :extend t)) + (((background dark)) + (:background "saddlebrown" :extend t))) + "For code blocks that are highlighted natively." + :group 'adoc-faces) +(defvar adoc-native-code-face 'adoc-native-code-face) ;;;; regexps ;; from AsciiDoc manual: The attribute name/value syntax is a single line ... @@ -1365,10 +1438,10 @@ subgroups: Id CMD-NAME is nil, any command is matched. It maybe a regexp itself in order to match multiple commands. If TARGET is nil, any target is matched. When UNCONSTRAINED is nil, the returned regexp -begins with '\<', i.e. it will _not_ match when CMD-NAME is part -of a previous word. When ATTRIBUTE-LIST-CONSTRAINTS is 'empty, -only an empty attribute list is matched, if it's -'single-attribute, only an attribute list with exactly one +begins with \\='\\<\\=', i.e. it will _not_ match when CMD-NAME is part +of a previous word. When ATTRIBUTE-LIST-CONSTRAINTS is the symbol +`empty', only an empty attribute list is matched, if it is +`single-attribute', only an attribute list with exactly one attribute is matched. Subgroups of returned regexp: @@ -1491,7 +1564,7 @@ the limit of the search. REXEXP the regexp to be searched. MUST-FREE-GROUPS a list of regexp group numbers which may not match text that has an adoc-reserved text-property with a non-nil value. Likewise, groups in NO-BLOCK-DEL-GROUPS may not contain -text having adoc-reserved set to 'block-del." +text having adoc-reserved set to symbol `block-del'." (let ((found t) (prevented t) saved-point) (while (and found prevented (<= (point) end) (not (eobp))) (setq saved-point (point)) @@ -1864,6 +1937,7 @@ meta characters." ;; font (most probably), because then it also won't look aligned (text-property-not-all (match-beginning 1) (match-end 1) 'face 'adoc-typewriter-face) (text-property-not-all (match-beginning 1) (match-end 1) 'face 'adoc-code-face) + (text-property-not-all (match-beginning 1) (match-end 1) 'adoc-code-block t) (text-property-not-all (match-beginning 1) (match-end 1) 'face 'adoc-passthrough-face) (text-property-not-all (match-beginning 1) (match-end 1) 'face 'adoc-comment-face))) @@ -1897,6 +1971,163 @@ meta characters." nil) +;;; Natively highlite source code blocks. +;; The code is an adaption of the code in markdown-mode.el. + +(defun adoc-get-lang-mode (lang) + "Return major mode that should be used for LANG. +LANG is a string, and the returned major mode is a symbol." + (cl-find-if + 'fboundp + (list (cdr (assoc lang adoc-code-lang-modes)) + (cdr (assoc (downcase lang) adoc-code-lang-modes)) + (intern (concat lang "-mode")) + (intern (concat (downcase lang) "-mode"))))) + +;; Based on `org-src-font-lock-fontify-block' from org-src.el. +(defun adoc-fontify-code-block-natively (lang start-block end-block start-src end-src) + "Fontify source code block. +This function is called by Emacs for automatic fontification when +`adoc-fontify-code-blocks-natively' is non-nil. LANG is the +language used in the block. +START-BLOCK and END-BLOCK specify the limits of the full source block +with header lines and delimiters (but without header arguments). +START-SRC and END-SRC delimit the actual source code." + (let ((lang-mode (if lang (adoc-get-lang-mode lang) + adoc-fontify-code-block-default-mode))) + (when (fboundp lang-mode) + (let ((string (buffer-substring-no-properties start-src end-src)) + (modified (buffer-modified-p)) + (adoc-buffer (current-buffer)) int pos next) + (remove-text-properties start-block end-block '(face nil adoc-code-block nil font-lock-fontified nil font-lock-multiline nil)) + (with-current-buffer + (get-buffer-create + (concat " adoc-code-fontification:" (symbol-name lang-mode))) + ;; Make sure that modification hooks are not inhibited in + ;; the org-src-fontification buffer in case we're called + ;; from `jit-lock-function' (Bug#25132). + (let ((inhibit-modification-hooks nil)) + (erase-buffer) + (insert string)) + (unless (eq major-mode lang-mode) (funcall lang-mode)) + (font-lock-ensure) + (setq pos (point-min)) + (cl-loop for int being the intervals property 'face + for pos = (car int) + for next = (cdr int) + for val = (get-text-property pos 'face) + when val do + (put-text-property + (+ start-src (1- pos)) (1- (+ start-src next)) 'face + val adoc-buffer))) + (add-text-properties start-block start-src '(face adoc-meta-face)) + (add-text-properties end-src end-block '(face adoc-meta-face)) + (add-text-properties + start-block end-block + '(font-lock-fontified t fontified t font-lock-multiline t + adoc-code-block t adoc-reserved t)) + (set-buffer-modified-p modified))))) + +(defconst adoc-code-block-begin-regexp + (cl-flet ((rx-or (first second) (format "\\(?:%s\\|%s\\)" first second)) + (rx-optional (stuff) (format "\\(?:%s\\)?" stuff)) + (outer-brackets-and-delimiter (&rest stuff) + (format "^\\[%s\\]\n\\(?2:----+\\)\n" + (apply #'concat stuff))) + (lang () ",\\(?1:[^],]+\\)") + (optional-other-args () "\\(?:,[^]]+\\)?")) + (outer-brackets-and-delimiter + (rx-or + (concat + "source" + (rx-optional (lang)) + (optional-other-args)) + (concat + (lang) + (optional-other-args))) + )) + "Regexp matching the beginning of source blocks. +Group 1 contains the language attribute. +Group 2 contains the block delimiter.") + +(defun adoc-search-forward-code-block (last &optional noerror) + "Search for next adoc-code block up to LAST. +NOERROR is the same as for `search-forward'. + +Return the source block language and +set match data if a source block is found. +Otherwise return nil. + +The overall match data begins at the +header of the code block and ends at the end of the +end delimiter. +The first group of the match data delimits the +actual source code." + (let (start-header start-src end-src end-block lang) + (save-match-data + (and (setq start-src (re-search-forward adoc-code-block-begin-regexp last noerror)) + (setq lang (or (match-string 1) t) + start-header (match-beginning 0)) + (setq end-block (re-search-forward (format "\n%s$" (match-string 2)))) + (setq end-src (match-beginning 0))) + ) + (when end-block + (set-match-data (list start-header end-block start-src end-src (current-buffer))) + lang))) + +(defun adoc-font-lock-extend-after-change-region (beg end _old-len) + "Enlarge region for re-fontification after edit. +BEG is the beginning of the region and END its end. +The region is extended if it includes a part of a source block. +Returns a cons (BEG . END) with the updated limits of the region." + (save-match-data + (save-excursion + (goto-char beg) + ;; Maybe edits in header line: Skip to body + (cl-case (char-after (line-beginning-position)) + (?\[ (forward-line 2)) + (?- (forward-line 1))) + ;; Search backward for header: + (let ((beg-block (re-search-backward adoc-code-block-begin-regexp (max 0 (- (point) adoc-font-lock-extend-after-change-max)) t)) + end-block) + (when beg-block + (goto-char (match-end 0)) + (setq end-block (or (re-search-forward (format "\n%s$" (match-string 2)) (+ (point) adoc-font-lock-extend-after-change-max) t) end)) + (when (and end-block (> end-block beg)) ;; block reaches really into edited area + (cons (min beg beg-block) (max end end-block)))))))) + +(defun adoc-fontify-code-blocks (last) + "Add text properties to next code block from point to LAST. +Use this function as matching function MATCHER in `font-lock-keywords'." + (let ((lang (adoc-search-forward-code-block last 'noError))) + (when lang + (save-excursion + (save-match-data + (let* ((start-block (match-beginning 0)) + (end-block (match-end 0)) + (start-src (match-beginning 1)) + (end-src (match-end 1)) + (end-src+nl (if (eq (char-after end-src) ?\n) (1+ end-src) end-src)) + (size (1+ (- end-src start-src))) + (bol-prev (progn (goto-char start-block) + (if (bolp) (line-beginning-position 0) (line-beginning-position)))) + (eol-next (progn (goto-char end-block) + (if (bolp) (line-beginning-position 2) (line-beginning-position 3))))) + (if (if (numberp adoc-fontify-code-blocks-natively) + (<= size adoc-fontify-code-blocks-natively) + adoc-fontify-code-blocks-natively) + (adoc-fontify-code-block-natively lang start-block end-block start-src end-src) + (add-text-properties + start-src + end-src + '(font-lock-face adoc-verbatim-face))) + ;; Set background for block as well as opening and closing lines. + (font-lock-append-text-property + start-src end-src+nl 'face 'adoc-native-code-face) + ))) + t))) + + ;;;; font lock (defun adoc-unfontify-region-function (beg end) (font-lock-default-unfontify-region beg end) @@ -1926,6 +2157,8 @@ meta characters." (defun adoc-get-font-lock-keywords () "Return list of keywords for `adoc-mode'." (list + ;; Fontify code blocks first to mark these regions as fontified. + '(adoc-fontify-code-blocks) ;; Asciidoc BUG: Lex.next has a different order than the following extract ;; from the documentation states. @@ -3205,12 +3438,13 @@ Turning on Adoc mode runs the normal hook `adoc-mode-hook'." ;; font lock (setq-local font-lock-defaults - '(adoc-font-lock-keywords - nil nil nil nil - (font-lock-multiline . t) - (font-lock-mark-block-function . adoc-font-lock-mark-block-function))) - (setq-local font-lock-extra-managed-props '(adoc-reserved adoc-attribute-list)) + '(adoc-font-lock-keywords + nil nil nil nil + (font-lock-multiline . t) + (font-lock-mark-block-function . adoc-font-lock-mark-block-function))) + (setq-local font-lock-extra-managed-props '(adoc-reserved adoc-attribute-list adoc-code-block)) (setq-local font-lock-unfontify-region-function 'adoc-unfontify-region-function) + (setq-local font-lock-extend-after-change-region-function #'adoc-font-lock-extend-after-change-region) ;; outline mode ;; BUG: if there are many spaces\tabs after =, level becomes wrong diff --git a/test/adoc-mode-test.el b/test/adoc-mode-test.el index 82b06d4dc3..c6b1d3e272 100644 --- a/test/adoc-mode-test.el +++ b/test/adoc-mode-test.el @@ -13,6 +13,8 @@ ;; ;;; Code: + +;;;; Helpers (require 'ert) (require 'adoc-mode) @@ -122,6 +124,30 @@ removed before TRANSFORM is evaluated. ;; verify (should (string-equal (buffer-substring (point-min) (point-max)) expected-text))))) +;; We define our own generic mode for testing code blocks. +;; All other languages except adoc can change fontification without us noticing. +;; Adoc in a code block is a good test case, but it should not be used for the +;; simplest test case. Use `adoctest-lang-mode' instead. +(define-generic-mode adoctest-lang-mode + '(("//" . nil) ("/*" . "*/")) ;; cpp-like comment syntax + '("if" "else" "for" "while" "do" "break" "continue" "throw" "catch") ;; some keywords from c/cpp + nil ;; no additional entries for font-lock-keywords + nil ;; no entries for auto-mode-alist + nil ;; no additional actions + "Mode for testing code blocks in `adoc-mode'. +Don't use it for anything real.") + +(defmacro adoctest-with-uncustomized-vars (vars &rest body) + "Run BODY without customization of VARS." + (declare (debug (list body)) (indent 1)) + `(let ,(mapcar + (lambda (var) + (cons var (get var 'standard-value))) + vars) + ,@body)) + + +;;;; Actual Tests (ert-deftest adoctest-test-titles-simple-one-line-before () (adoctest-faces "titles-simple-one-line-before" "= " adoc-meta-hide-face "document title" adoc-title-0-face "\n" nil @@ -268,6 +294,25 @@ removed before TRANSFORM is evaluated. ;; as delimited block it's tested in delimited-blocks-simple )) +(ert-deftest adoctest-test-code-blocks () + (adoctest-with-uncustomized-vars + (adoc-fontify-code-blocks-natively + adoc-code-lang-modes + adoc-fontify-code-block-default-mode + adoc-font-lock-extend-after-change-max) + (adoctest-faces "code-block-natively" + "\n" nil + "[source,adoctest-lang]\n----\n" 'adoc-meta-face + "if" '(font-lock-keyword-face adoc-native-code-face) + "\n" '(adoc-native-code-face) + "//" '(font-lock-comment-delimiter-face adoc-native-code-face) + "comment" '(font-lock-comment-face adoc-native-code-face) + "\n" '(adoc-meta-face adoc-native-code-face) + "----" 'adoc-meta-face + "\n" nil + ) + )) + (ert-deftest adoctest-test-anchors () (adoctest-faces "anchors" ;; block id