branch: elpa/extmap commit 83d5c74adc4994b6662b994e41da85b3052a64b0 Author: Paul Pogonyshev <pogonys...@gmail.com> Commit: Paul Pogonyshev <pogonys...@gmail.com>
Implement `:compress-values' flag; this doesn't affect database structure. --- extmap.el | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- test/extmap-test.el | 26 ++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/extmap.el b/extmap.el index 7d09061a59..18910d16d7 100644 --- a/extmap.el +++ b/extmap.el @@ -333,6 +333,14 @@ OPTIONS can be a list of the following keyword arguments: must be prepared that `extmap-get' can return `eq' values for different keys (for this reason, this is not the default). + :compress-values + + Replace equal parts within values with the same object. This + can decrease database size, but you must be prepared that + values returned `extmap-get' can contain `eq' elements (in + lists, vectors, etc.). It also makes map creation noticeably + slower. For these reason, this is not the default. + :max-inline-bytes Inline values for which `print' results in this many bytes. @@ -367,6 +375,7 @@ Only available on Emacs 25, as this requires `generator' package." (let ((print-level nil) (print-length nil) (shared-values (when (plist-get options :share-values) (make-hash-table :test #'extmap--equal-including-properties))) + (canonical-subvalues (when (plist-get options :compress-values) (make-hash-table :test #'extmap--equal-including-properties))) (max-inline-bytes (or (plist-get options :max-inline-bytes) 16)) (offset (bindat-length extmap--header-bindat-spec nil)) (buffer (current-buffer)) @@ -389,7 +398,14 @@ Only available on Emacs 25, as this requires `generator' package." (puthash key t used-keys) (insert (encode-coding-string (symbol-name key) 'utf-8 t)) (insert 0) - (let ((serialized (if (extmap--plain-string-p value) value (prin1-to-string value)))) + (let ((serialized (if (extmap--plain-string-p value) + value + (let ((print-circle t) + (print-continuous-numbering nil)) + (when canonical-subvalues + (clrhash canonical-subvalues) + (setq value (extmap--compress-value value canonical-subvalues))) + (prin1-to-string value))))) (unless (or (extmap--plain-string-p value) (condition-case _ (equal (read serialized) value) (error nil))) (error "Value for key `%s' cannot be saved in database: it cannot be read back or is different after reading" key)) ;; The whole point of this buffer is to be used for @@ -428,6 +444,38 @@ Only available on Emacs 25, as this requires `generator' package." (null (text-properties-at 0 object)) (null (next-property-change 0 object)))) +(defun extmap--compress-value (value canonical-subvalues) + (cond ((stringp value) + (if (and (<= (length value) 4) (extmap--plain-string-p value)) + ;; Don't try to compress very short strings without text properties. + value + (or (gethash value canonical-subvalues) + (puthash value value canonical-subvalues)))) + ((consp value) + (let ((original-value value) + canonical-head + canonical-tail) + (while (unless (setq canonical-tail (gethash value canonical-subvalues)) + (push (extmap--compress-value (car value) canonical-subvalues) canonical-head) + (consp (setq value (cdr value))))) + (setq canonical-head (nreverse canonical-head)) + (puthash original-value + (if canonical-tail + (nconc canonical-head canonical-tail) + (when value + (setcdr (last canonical-head) (extmap--compress-value value canonical-subvalues))) + canonical-head) + canonical-subvalues))) + ((or (vectorp value) (with-no-warnings (when (fboundp #'recordp) (recordp value)))) + (or (gethash value canonical-subvalues) + (let* ((length (length value)) + (result (if (vectorp value) (make-vector length nil) (with-no-warnings (make-record nil (1- length) nil))))) + (dotimes (k length) + (aset result k (extmap--compress-value (aref value k) canonical-subvalues))) + (puthash value result canonical-subvalues)))) + (t + value))) + ;; This is like built-in `equal-including-properties', except that ;; property values are compared with the same function, not with `eq'. ;; Slow, but is used only during extmap creation and testing, both of diff --git a/test/extmap-test.el b/test/extmap-test.el index 2ff1de1197..b1d570609b 100644 --- a/test/extmap-test.el +++ b/test/extmap-test.el @@ -39,6 +39,11 @@ (defun extmap--test-sort-keys (keys) (sort keys (lambda (a b) (string< (symbol-name a) (symbol-name b))))) +(defun extmap--test-compress-value (value) + (let ((compressed (extmap--compress-value value (make-hash-table :test #'extmap--equal-including-properties)))) + (should (equal compressed value)) + compressed)) + (ert-deftest extmap-1 () (extmap--test-alist `((foo . 1) @@ -73,6 +78,18 @@ (bar . (value with different ,(propertize "string properties" 'face 'italic) must not be shared))) :share-values t :max-inline-bytes 0)) +(ert-deftest extmap-compressed-values-1 () + (let* ((extmap (extmap--test-alist `((foo . (compress-this: (1 2 3) (1 2 3) (0 1 2 3))) + (bar . (compress-this: (1 2 3) (1 2 3) (0 1 2 3)))) + :compress-values t :max-inline-bytes 0)) + (foo (extmap-get extmap 'foo)) + (bar (extmap-get extmap 'bar))) + (should (eq (nth 1 foo) (nth 2 foo))) + (should (eq (nth 2 foo) (cdr (nth 3 foo)))) + (should (eq (nth 1 bar) (nth 2 bar))) + (should (eq (nth 2 bar) (cdr (nth 3 bar)))) + (should-not (eq foo bar)))) + (ert-deftest extmap-plain-string-p () (should (extmap--plain-string-p "foo")) @@ -97,3 +114,12 @@ (should (extmap--equal-including-properties (vector 1 2 3) (vector 1 2 3))) (should (extmap--equal-including-properties "foo" "foo")) (should (extmap--equal-including-properties (propertize "foo" 'face (list 'bold 'italic)) (propertize "foo" 'face (list 'bold 'italic))))) + +(ert-deftest extmap-internal-compress-value () + (extmap--test-compress-value '(nothing to compress here)) + (let ((compressed (extmap--test-compress-value '((1 2 3) (4 5 6) (1 2 3))))) + (should (eq (nth 0 compressed) (nth 2 compressed)))) + (let ((compressed (extmap--test-compress-value '((1 2 3) (4 5 6) . (1 2 3))))) + (should (eq (car compressed) (cddr compressed)))) + (let ((compressed (extmap--test-compress-value '[[1 2 3] [4 5 6] [1 2 3]]))) + (should (eq (aref compressed 0) (aref compressed 2)))))