branch: elpa/extmap
commit 83d5c74adc4994b6662b994e41da85b3052a64b0
Author: Paul Pogonyshev <pogonys...@gmail.com>
Commit: Paul Pogonyshev <pogonys...@gmail.com>

    Implement `:compress-values' flag; this doesn't affect database structure.
---
 extmap.el           | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
 test/extmap-test.el | 26 ++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/extmap.el b/extmap.el
index 7d09061a59..18910d16d7 100644
--- a/extmap.el
+++ b/extmap.el
@@ -333,6 +333,14 @@ OPTIONS can be a list of the following keyword arguments:
     must be prepared that `extmap-get' can return `eq' values for
     different keys (for this reason, this is not the default).
 
+  :compress-values
+
+    Replace equal parts within values with the same object.  This
+    can decrease database size, but you must be prepared that
+    values returned `extmap-get' can contain `eq' elements (in
+    lists, vectors, etc.).  It also makes map creation noticeably
+    slower.  For these reason, this is not the default.
+
   :max-inline-bytes
 
     Inline values for which `print' results in this many bytes.
@@ -367,6 +375,7 @@ Only available on Emacs 25, as this requires `generator' 
package."
     (let ((print-level                nil)
           (print-length               nil)
           (shared-values              (when (plist-get options :share-values) 
(make-hash-table :test #'extmap--equal-including-properties)))
+          (canonical-subvalues        (when (plist-get options 
:compress-values) (make-hash-table :test #'extmap--equal-including-properties)))
           (max-inline-bytes           (or (plist-get options 
:max-inline-bytes) 16))
           (offset                     (bindat-length 
extmap--header-bindat-spec nil))
           (buffer                     (current-buffer))
@@ -389,7 +398,14 @@ Only available on Emacs 25, as this requires `generator' 
package."
             (puthash key t used-keys)
             (insert (encode-coding-string (symbol-name key) 'utf-8 t))
             (insert 0)
-            (let ((serialized (if (extmap--plain-string-p value) value 
(prin1-to-string value))))
+            (let ((serialized (if (extmap--plain-string-p value)
+                                  value
+                                (let ((print-circle               t)
+                                      (print-continuous-numbering nil))
+                                  (when canonical-subvalues
+                                    (clrhash canonical-subvalues)
+                                    (setq value (extmap--compress-value value 
canonical-subvalues)))
+                                  (prin1-to-string value)))))
               (unless (or (extmap--plain-string-p value) (condition-case _ 
(equal (read serialized) value) (error nil)))
                 (error "Value for key `%s' cannot be saved in database: it 
cannot be read back or is different after reading" key))
               ;; The whole point of this buffer is to be used for
@@ -428,6 +444,38 @@ Only available on Emacs 25, as this requires `generator' 
package."
        (null (text-properties-at 0 object))
        (null (next-property-change 0 object))))
 
+(defun extmap--compress-value (value canonical-subvalues)
+  (cond ((stringp value)
+         (if (and (<= (length value) 4) (extmap--plain-string-p value))
+             ;; Don't try to compress very short strings without text 
properties.
+             value
+           (or (gethash value canonical-subvalues)
+               (puthash value value canonical-subvalues))))
+        ((consp value)
+         (let ((original-value value)
+               canonical-head
+               canonical-tail)
+           (while (unless (setq canonical-tail (gethash value 
canonical-subvalues))
+                    (push (extmap--compress-value (car value) 
canonical-subvalues) canonical-head)
+                    (consp (setq value (cdr value)))))
+           (setq canonical-head (nreverse canonical-head))
+           (puthash original-value
+                    (if canonical-tail
+                        (nconc canonical-head canonical-tail)
+                      (when value
+                        (setcdr (last canonical-head) (extmap--compress-value 
value canonical-subvalues)))
+                      canonical-head)
+                    canonical-subvalues)))
+        ((or (vectorp value) (with-no-warnings (when (fboundp #'recordp) 
(recordp value))))
+         (or (gethash value canonical-subvalues)
+             (let* ((length (length value))
+                    (result (if (vectorp value) (make-vector length nil) 
(with-no-warnings (make-record nil (1- length) nil)))))
+               (dotimes (k length)
+                 (aset result k (extmap--compress-value (aref value k) 
canonical-subvalues)))
+               (puthash value result canonical-subvalues))))
+        (t
+         value)))
+
 ;; This is like built-in `equal-including-properties', except that
 ;; property values are compared with the same function, not with `eq'.
 ;; Slow, but is used only during extmap creation and testing, both of
diff --git a/test/extmap-test.el b/test/extmap-test.el
index 2ff1de1197..b1d570609b 100644
--- a/test/extmap-test.el
+++ b/test/extmap-test.el
@@ -39,6 +39,11 @@
 (defun extmap--test-sort-keys (keys)
   (sort keys (lambda (a b) (string< (symbol-name a) (symbol-name b)))))
 
+(defun extmap--test-compress-value (value)
+  (let ((compressed (extmap--compress-value value (make-hash-table :test 
#'extmap--equal-including-properties))))
+    (should (equal compressed value))
+    compressed))
+
 
 (ert-deftest extmap-1 ()
   (extmap--test-alist `((foo  . 1)
@@ -73,6 +78,18 @@
                         (bar . (value with different ,(propertize "string 
properties" 'face 'italic) must not be shared)))
                       :share-values t :max-inline-bytes 0))
 
+(ert-deftest extmap-compressed-values-1 ()
+  (let* ((extmap (extmap--test-alist `((foo . (compress-this: (1 2 3) (1 2 3) 
(0 1 2 3)))
+                                       (bar . (compress-this: (1 2 3) (1 2 3) 
(0 1 2 3))))
+                                     :compress-values t :max-inline-bytes 0))
+         (foo    (extmap-get extmap 'foo))
+         (bar    (extmap-get extmap 'bar)))
+    (should     (eq (nth 1 foo) (nth 2 foo)))
+    (should     (eq (nth 2 foo) (cdr (nth 3 foo))))
+    (should     (eq (nth 1 bar) (nth 2 bar)))
+    (should     (eq (nth 2 bar) (cdr (nth 3 bar))))
+    (should-not (eq foo bar))))
+
 
 (ert-deftest extmap-plain-string-p ()
   (should (extmap--plain-string-p "foo"))
@@ -97,3 +114,12 @@
   (should (extmap--equal-including-properties (vector 1 2 3) (vector 1 2 3)))
   (should (extmap--equal-including-properties "foo" "foo"))
   (should (extmap--equal-including-properties (propertize "foo" 'face (list 
'bold 'italic)) (propertize "foo" 'face (list 'bold 'italic)))))
+
+(ert-deftest extmap-internal-compress-value ()
+  (extmap--test-compress-value '(nothing to compress here))
+  (let ((compressed (extmap--test-compress-value '((1 2 3) (4 5 6) (1 2 3)))))
+    (should (eq (nth 0 compressed) (nth 2 compressed))))
+  (let ((compressed (extmap--test-compress-value '((1 2 3) (4 5 6) . (1 2 
3)))))
+    (should (eq (car compressed) (cddr compressed))))
+  (let ((compressed (extmap--test-compress-value '[[1 2 3] [4 5 6] [1 2 3]])))
+    (should (eq (aref compressed 0) (aref compressed 2)))))

Reply via email to