branch: elpa/extmap
commit 070cc972a919dd016f1a6950251fb31d5a44aaa7
Author: Paul Pogonyshev <pogonys...@gmail.com>
Commit: Paul Pogonyshev <pogonys...@gmail.com>

    Implement `:share-values' flag; bump generated file version accordingly.
---
 extmap.el           | 180 +++++++++++++++++++++++++++++++++++++++-------------
 test/extmap-test.el |  67 +++++--------------
 2 files changed, 153 insertions(+), 94 deletions(-)

diff --git a/extmap.el b/extmap.el
index b29575c736..7d09061a59 100644
--- a/extmap.el
+++ b/extmap.el
@@ -71,7 +71,8 @@
 ;;   0 -- inlined string (short specification is used);
 ;;   1 -- inlined Lisp object (short specification is used);
 ;;   2 -- string;
-;;   3 -- Lisp object.
+;;   3 -- Lisp object;
+;;   4 -- shared value (short specification is used).
 (defconst extmap--item-short-bindat-spec '((type      u8)
                                            (length    u24)))
 (defconst extmap--item-bindat-spec       '((type      u8)
@@ -125,7 +126,7 @@ just stop using it."
       (erase-buffer)
       (unless (= (bindat-get-field header 'magic) #x91f7)
         (error "Wrong or corrupted extmap in file `%s'" filename))
-      (unless (= (bindat-get-field header 'version) 0)
+      (unless (<= (bindat-get-field header 'version) 1)
         (error "Future version of extmap in file `%s', upgrade your `extmap' 
package" filename))
       (setq items (make-hash-table :test #'eq :size (bindat-get-field header 
'num-items)))
       (insert-file-contents-literally filename nil (bindat-get-field header 
'offset))
@@ -134,17 +135,22 @@ just stop using it."
                (item-header (bindat-unpack extmap--item-short-bindat-spec 
(encode-coding-string (buffer-substring-no-properties (point) (+ (point) 
item-short-header-length)) 'no-conversion)))
                (type        (bindat-get-field item-header 'type))
                (length      (bindat-get-field item-header 'length)))
-          (if (<= type 1)
-              ;; Inlined item.
-              (let ((value (decode-coding-string 
(buffer-substring-no-properties (+ (point) item-short-header-length) (+ (point) 
item-short-header-length length)) 'utf-8 t)))
-                (goto-char (+ (point) item-short-header-length length))
-                (when (= type 1)
-                  (setq value (car (read-from-string value))))
-                (puthash key (cons t value) items))
-            ;; Non-inlined item.
-            (let ((item-header (bindat-unpack extmap--item-bindat-spec 
(encode-coding-string (buffer-substring-no-properties (point) (+ (point) 
item-header-length)) 'no-conversion))))
-              (goto-char (+ (point) item-header-length))
-              (puthash key (cons nil (cons type (cons (bindat-get-field 
item-header 'offset) length))) items)))))
+          (if (or (= type 2) (= type 3))
+              ;; Non-inlined item.
+              (let ((item-header (bindat-unpack extmap--item-bindat-spec 
(encode-coding-string (buffer-substring-no-properties (point) (+ (point) 
item-header-length)) 'no-conversion))))
+                (goto-char (+ (point) item-header-length))
+                (puthash key (cons nil (cons type (cons (bindat-get-field 
item-header 'offset) length))) items))
+            (let ((value (decode-coding-string (buffer-substring-no-properties 
(+ (point) item-short-header-length) (+ (point) item-short-header-length 
length)) 'utf-8 t)))
+              (goto-char (+ (point) item-short-header-length length))
+              (if (= type 4)
+                  ;; Shared-value item.
+                  (puthash key (cons nil (cons type (intern value))) items)
+                ;; Inlined item.
+                (if (= type 1)
+                    (setq value (car (read-from-string value)))
+                  (unless (= type 0)
+                    (error "Corrupted extmap file")))
+                (puthash key (cons t value) items))))))
       ;; Fifth element of `file-attributes' result is the modification date.
       ;; `file-attribute-modification-time' doesn't exist in Emacs 25.
       (list (cons filename (when (plist-get options :auto-reload) (nth 5 
(file-attributes filename))))
@@ -189,16 +195,20 @@ returns nil."
                    (if (car value)
                        ;; Already loaded.
                        (cdr value)
-                     ;; Load now.
-                     (let ((coding-system-for-read 'utf-8)
-                           (offset                 (cadr (cdr value))))
-                       (with-temp-buffer
-                         (insert-file-contents (car (nth 0 extmap)) nil offset 
(+ offset (cddr (cdr value))))
-                         (let ((new-value (if (= (cadr value) 2) 
(buffer-string) (read (current-buffer)))))
-                           (if weak-data
-                               (puthash key new-value weak-data)
-                             (prog1 (setcdr value new-value)
-                               (setcar value t)))))))
+                     (let (new-value)
+                       (if (= (cadr value) 4)
+                           ;; Value is shared with a different key.
+                           (setq new-value (extmap-get extmap (cddr value)))
+                         ;; Load now.
+                         (let ((coding-system-for-read 'utf-8)
+                               (offset                 (cadr (cdr value))))
+                           (with-temp-buffer
+                             (insert-file-contents (car (nth 0 extmap)) nil 
offset (+ offset (cddr (cdr value))))
+                             (setq new-value (if (= (cadr value) 2) 
(buffer-string) (read (current-buffer)))))))
+                       (if weak-data
+                           (puthash key new-value weak-data)
+                         (prog1 (setcdr value new-value)
+                           (setcar value t)))))
                  (unless no-error
                    (error "No value for key `%s'" key))))
       value)))
@@ -217,10 +227,16 @@ In case the map has been initialized with `:weak-data' 
option, it
 may happen that this function returns t, but value for the KEY
 has to be loaded again in the future."
   (extmap--reload-if-needed extmap)
-  (or (car-safe (gethash key (nth 1 extmap)))
-      (let ((weak-data (nth 2 extmap)))
-        (when weak-data
-          (not (eq (gethash key weak-data weak-data) weak-data))))))
+  (let* ((items (nth 1 extmap))
+         (value (gethash key items)))
+    (or (car value)
+        (when value
+          (let ((synonym-of (when (= (cadr value) 4) (cddr value))))
+            (or (and synonym-of (car (gethash synonym-of items)))
+                (let ((weak-data (nth 2 extmap)))
+                  (when weak-data
+                    (or (not (eq (gethash key weak-data weak-data) weak-data))
+                        (and synonym-of (not (eq (gethash synonym-of weak-data 
weak-data) weak-data))))))))))))
 
 (defun extmap-keys (extmap)
   "Return a list of all the keys in the map.
@@ -310,6 +326,13 @@ OPTIONS can be a list of the following keyword arguments:
     file already exists.  However, you can order it to
     overwrite (not merge!) the file.
 
+  :share-values
+
+    When values for different keys are equal, store only one copy
+    in the database.  This can decrease database size, but you
+    must be prepared that `extmap-get' can return `eq' values for
+    different keys (for this reason, this is not the default).
+
   :max-inline-bytes
 
     Inline values for which `print' results in this many bytes.
@@ -343,6 +366,7 @@ Only available on Emacs 25, as this requires `generator' 
package."
   (with-temp-buffer
     (let ((print-level                nil)
           (print-length               nil)
+          (shared-values              (when (plist-get options :share-values) 
(make-hash-table :test #'extmap--equal-including-properties)))
           (max-inline-bytes           (or (plist-get options 
:max-inline-bytes) 16))
           (offset                     (bindat-length 
extmap--header-bindat-spec nil))
           (buffer                     (current-buffer))
@@ -365,26 +389,36 @@ Only available on Emacs 25, as this requires `generator' 
package."
             (puthash key t used-keys)
             (insert (encode-coding-string (symbol-name key) 'utf-8 t))
             (insert 0)
-            (with-temp-buffer
-              (let ((serialized (if (extmap--plain-string-p value) value 
(prin1-to-string value))))
-                (unless (or (extmap--plain-string-p value) (condition-case _ 
(equal (read serialized) value) (error nil)))
-                  (error "Value for key `%s' cannot be saved in database: it 
cannot be read back or is different after reading" key))
-                (insert (encode-coding-string serialized 'utf-8 t))
-                (let ((num-bytes (buffer-size)))
-                  (if (<= num-bytes max-inline-bytes)
-                      (let ((serialized-in (current-buffer)))
-                        (with-current-buffer buffer
-                          (insert (bindat-pack extmap--item-short-bindat-spec 
`((type . ,(if (extmap--plain-string-p value) 0 1)) (length . ,num-bytes))))
-                          (insert-buffer-substring serialized-in)))
-                    (write-region (point-min) (point-max) filename t)
-                    (with-current-buffer buffer
-                      (insert (bindat-pack extmap--item-bindat-spec `((type . 
,(if (extmap--plain-string-p value) 2 3)) (length . ,num-bytes) (offset . 
,offset))))
-                      (setq offset (+ offset num-bytes))))))))))
+            (let ((serialized (if (extmap--plain-string-p value) value 
(prin1-to-string value))))
+              (unless (or (extmap--plain-string-p value) (condition-case _ 
(equal (read serialized) value) (error nil)))
+                (error "Value for key `%s' cannot be saved in database: it 
cannot be read back or is different after reading" key))
+              ;; The whole point of this buffer is to be used for
+              ;; `write-region' below (in the most common case).
+              (with-temp-buffer
+                (let* ((serialized-in (current-buffer))
+                       (num-bytes     (encode-coding-string serialized 'utf-8 
t serialized-in))
+                       canonical-key)
+                  (cond ((<= num-bytes max-inline-bytes)
+                         (with-current-buffer buffer
+                           (insert (bindat-pack extmap--item-short-bindat-spec 
`((type . ,(if (extmap--plain-string-p value) 0 1)) (length . ,num-bytes))))
+                           (insert-buffer-substring serialized-in)))
+                        ((and shared-values (setq canonical-key (gethash value 
shared-values)))
+                         (with-current-buffer buffer
+                           (let ((encoded (encode-coding-string (symbol-name 
canonical-key) 'utf-8 t)))
+                             (insert (bindat-pack 
extmap--item-short-bindat-spec `((type . 4) (length . ,(length encoded))))
+                                     encoded))))
+                        (t
+                         (write-region (point-min) (point-max) filename t)
+                         (with-current-buffer buffer
+                           (insert (bindat-pack extmap--item-bindat-spec 
`((type . ,(if (extmap--plain-string-p value) 2 3)) (length . ,num-bytes) 
(offset . ,offset))))
+                           (setq offset (+ offset num-bytes))
+                           (when shared-values
+                             (puthash value key shared-values)))))))))))
       (write-region (point-min) (point-max) filename t)
       ;; Update the header.
       (erase-buffer)
       (insert (bindat-pack extmap--header-bindat-spec `((magic     . #x91f7)
-                                                        (version   . 0)
+                                                        (version   . 1)
                                                         (num-items . 
,(hash-table-count used-keys))
                                                         (offset    . 
,offset))))
       (write-region (point-min) (point-max) filename 0))))
@@ -394,6 +428,66 @@ Only available on Emacs 25, as this requires `generator' 
package."
        (null (text-properties-at 0 object))
        (null (next-property-change 0 object))))
 
+;; This is like built-in `equal-including-properties', except that
+;; property values are compared with the same function, not with `eq'.
+;; Slow, but is used only during extmap creation and testing, both of
+;; which are not performance-critical.
+(defun extmap--equal-including-properties (a b)
+  (cond ((stringp a)
+         (and (stringp b)
+              (string= a b)
+              (let ((at    0)
+                    (equal t))
+                (while (and at equal)
+                  (let ((next (next-property-change at a)))
+                    (setq equal (and (equal next (next-property-change at b))
+                                     (let ((a-properties    
(text-properties-at at a))
+                                           (b-properties    
(text-properties-at at b))
+                                           (a-property-hash (make-hash-table))
+                                           (b-property-hash (make-hash-table)))
+                                       (while a-properties
+                                         (puthash (pop a-properties) (pop 
a-properties) a-property-hash))
+                                       (while b-properties
+                                         (puthash (pop b-properties) (pop 
b-properties) b-property-hash))
+                                       (extmap--equal-including-properties 
a-property-hash b-property-hash)))
+                          at    next)))
+                equal)))
+        ((consp a)
+         (let ((equal t))
+           (while (if (and (consp b) (extmap--equal-including-properties (car 
a) (car b)))
+                      (consp (setq b (cdr b)
+                                   a (cdr a)))
+                    (setq equal nil)))
+           (and equal (extmap--equal-including-properties a b))))
+        ((or (vectorp a) (with-no-warnings (when (fboundp #'recordp) (recordp 
a))))
+         (and (if (vectorp a) (vectorp b) (with-no-warnings (recordp b)))
+              (let ((length (length a)))
+                (and (= length (length b))
+                     (let ((equal t)
+                           (k     0))
+                       (while (< k length)
+                         (if (extmap--equal-including-properties (aref a k) 
(aref b k))
+                             (setq k (1+ k))
+                           (setq k     length
+                                 equal nil)))
+                       equal)))))
+        ((hash-table-p a)
+         (and (hash-table-p b)
+              (= (hash-table-count a) (hash-table-count b))
+              (catch 'equal
+                (maphash (lambda (key value)
+                           (unless (extmap--equal-including-properties value 
(gethash key b (not a)))
+                             (throw 'equal nil)))
+                         a)
+                t)))
+        ;; Works for all the primitive types, as well as for bool vectors.
+        (t
+         (equal a b))))
+
+;; No special hashing function: `sxhash' ignores text properties, but
+;; it is not required that hashes of different values are different.
+(define-hash-table-test 'extmap--equal-including-properties 
#'extmap--equal-including-properties #'sxhash)
+
 
 (provide 'extmap)
 
diff --git a/test/extmap-test.el b/test/extmap-test.el
index 2a27e63772..2ff1de1197 100644
--- a/test/extmap-test.el
+++ b/test/extmap-test.el
@@ -25,56 +25,6 @@
 (defvar extmap--test-filename nil)
 
 
-;; This is like built-in `equal-including-properties', except that
-;; property values are compared with the same function, not with `eq'.
-;; Probably not complete.  Slow.
-(defun extmap--equal-including-properties (a b)
-  (cond ((stringp a)
-         (and (stringp b)
-              (string= a b)
-              (let ((at    0)
-                    (equal t))
-                (while (and at equal)
-                  (let ((next (next-property-change at a)))
-                    (setq equal (and (equal next (next-property-change at b))
-                                     (let ((a-properties    
(text-properties-at at a))
-                                           (b-properties    
(text-properties-at at b))
-                                           (a-property-hash (make-hash-table))
-                                           (b-property-hash (make-hash-table)))
-                                       (while a-properties
-                                         (puthash (pop a-properties) (pop 
a-properties) a-property-hash))
-                                       (while b-properties
-                                         (puthash (pop b-properties) (pop 
b-properties) b-property-hash))
-                                       (extmap--equal-including-properties 
a-property-hash b-property-hash)))
-                          at    next)))
-                equal)))
-        ((consp a)
-         ;; Recursive for lists, but that's not important for testing.
-         (and (consp b)
-              (extmap--equal-including-properties (car a) (car b))
-              (extmap--equal-including-properties (cdr a) (cdr b))))
-        ((vectorp a)
-         (and (vectorp b)
-              (let ((length (length a)))
-                (and (= length (length b))
-                     (let ((equal t)
-                           (k     0))
-                       (while (and equal (< k length))
-                         (setq equal (extmap--equal-including-properties (aref 
a k) (aref b k))
-                               k     (1+ k)))
-                       equal)))))
-        ((hash-table-p a)
-         (and (hash-table-p b)
-              (= (hash-table-count a) (hash-table-count b))
-              (catch 'equal
-                (maphash (lambda (key value)
-                           (unless (extmap--equal-including-properties value 
(gethash key b (not a)))
-                             (throw 'equal nil)))
-                         a)
-                t)))
-        (t
-         (equal a b))))
-
 (defun extmap--test-alist (data &rest options)
   (let ((filename (concat extmap--test-directory (or extmap--test-filename 
"test.extmap"))))
     (apply #'extmap-from-alist filename data :overwrite t options)
@@ -83,7 +33,8 @@
       (dolist (entry data)
         (should (extmap-contains-key extmap (car entry)))
         (should (extmap--equal-including-properties (extmap-get extmap (car 
entry)) (cdr entry)))
-        (should (extmap-value-loaded extmap (car entry)))))))
+        (should (extmap-value-loaded extmap (car entry))))
+      extmap)))
 
 (defun extmap--test-sort-keys (keys)
   (sort keys (lambda (a b) (string< (symbol-name a) (symbol-name b)))))
@@ -108,6 +59,20 @@
                         (spam . ,(propertize "lalala lalala lalala lalala 
lalala lalala lalala lalala lalala lalala lalala" 'face '(bold italic)))
                         (ham  . ,(list (propertize "string" 'face '(bold 
italic)))))))
 
+(ert-deftest extmap-shared-values-1 ()
+  (let ((extmap (extmap--test-alist `((foo . (this value is supposed to be 
shared))
+                                      (bar . (this value is supposed to be 
shared)))
+                                    :share-values t :max-inline-bytes 0)))
+    (should (eq (extmap-get extmap 'foo) (extmap-get extmap 'bar))))
+  (let ((extmap (extmap--test-alist `((foo . (this value will not be shared 
even if equal))
+                                      (bar . (this value will not be shared 
even if equal))))))
+    (should-not (eq (extmap-get extmap 'foo) (extmap-get extmap 'bar)))))
+
+(ert-deftest extmap-shared-values-2 ()
+  (extmap--test-alist `((foo . (value with different ,(propertize "string 
properties" 'face 'bold)   must not be shared))
+                        (bar . (value with different ,(propertize "string 
properties" 'face 'italic) must not be shared)))
+                      :share-values t :max-inline-bytes 0))
+
 
 (ert-deftest extmap-plain-string-p ()
   (should (extmap--plain-string-p "foo"))

Reply via email to