branch: elpa/datetime
commit 462015a50d412396e29320eb78ab3c2b39fb9aa9
Author: Paul Pogonyshev <pogonys...@gmail.com>
Commit: Paul Pogonyshev <pogonys...@gmail.com>

    Implement timezone offset parsing for all the various formats at once.
---
 datetime.el                | 124 ++++++++++++++++++++++++++++++++++++++-------
 test/ProcessTimestamp.java |  14 ++---
 test/base.el               |  32 ++++++++++++
 test/format.el             |  17 -------
 test/parse.el              |  53 ++++++++++++-------
 5 files changed, 182 insertions(+), 58 deletions(-)

diff --git a/datetime.el b/datetime.el
index a208857919..f3ec46c03e 100644
--- a/datetime.el
+++ b/datetime.el
@@ -621,6 +621,23 @@ form:
           (format "GMT%c%02d:%02d" sign (/ offset (* 60 60)) (/ (% offset (* 
60 60)) 60))
         (format "GMT%c%02d:%02d:%02d" sign (/ offset (* 60 60)) (/ (% offset 
(* 60 60)) 60) seconds)))))
 
+(defvar datetime--timezone-offset-matching-regexps
+  '((offset-hhmm            . "[-+][01][0-9][0-5][0-9]")
+    (offset-hh?mm           . "[-+][01][0-9]\\(?:[0-5][0-9]\\)?")
+    (offset-hhmm?ss         . "[-+][01][0-9][0-5][0-9]\\(?:[0-5][0-9]\\)?")
+    (offset-hh:mm           . "[-+][01][0-9]:[0-5][0-9]")
+    (offset-hh:mm?:ss       . "[-+][01][0-9]:[0-5][0-9]\\(?::[0-5][0-9]\\)?")
+    (offset-hhmm-or-z       . "[-+][01][0-9][0-5][0-9]\\|Z")
+    (offset-hh?mm-or-z      . "[-+][01][0-9]\\(?:[0-5][0-9]\\)?\\|Z")
+    (offset-hhmm?ss-or-z    . "[-+][01][0-9][0-5][0-9]\\(?:[0-5][0-9]\\)?\\|Z")
+    (offset-hh:mm-or-z      . "[-+][01][0-9]:[0-5][0-9]\\|Z")
+    (offset-hh:mm?:ss-or-z  . 
"[-+][01][0-9]:[0-5][0-9]\\(?::[0-5][0-9]\\)?\\|Z")
+    (offset-localized-short . 
"GMT\\(?:[-+]\\([0-9]\\|1[0-9]\\)\\(?::[0-5][0-9]\\(?::[0-5][0-9]\\)?\\)?\\)?")
+    (offset-localized-full  . 
"GMT\\(?:[-+][01][0-9]:[0-5][0-9]\\(?::[0-5][0-9]\\)?\\)?")))
+
+(defun datetime--timezone-offset-matching-regexp (details)
+  (cdr (assq details datetime--timezone-offset-matching-regexps)))
+
 
 (defsubst datetime--digits-format (num-repetitions)
   (if (> num-repetitions 1) (format "%%0%dd" num-repetitions) "%d"))
@@ -787,8 +804,8 @@ to this function.
                      (push "%s" format-parts)
                      (push `(,formatter-function (round 
datetime--last-conversion-offset)) format-arguments)))))
                (_
-                (signal 'datetime-unsupported-timezone details))))
-            (_ (error "Unexpected value %s" type))))))
+                (error "Unexpected timezone details `%s'" details))))
+            (_ (error "Unexpected value `%s'" type))))))
     ;; 400 is the size of Gregorian calendar leap year loop.
     (let* ((days-in-400-years datetime--gregorian-days-in-400-years)
            (formatter `(lambda (date-time)
@@ -841,9 +858,10 @@ to this function.
                                 ,@(when need-hour
                                     `((hour (/ (mod (floor time) ,(* 24 60 
60)) ,(* 60 60))))))
                            (format ,(apply #'concat (nreverse format-parts)) 
,@(nreverse format-arguments))))))
-      (if (plist-get options :debug)
-          formatter
-        (byte-compile formatter)))))
+      (unless (plist-get options :debug)
+        (unless (setf formatter (byte-compile formatter))
+          (error "Internal error: unable to byte-compile generated 
formatter")))
+      formatter)))
 
 ;; Not available on older Emacs versions.  Copied from recent Emacs source.
 (defun datetime--macroexp-quote (v)
@@ -1096,6 +1114,7 @@ unless specified otherwise.
          minute-part-indices
          second-part-indices
          second-fractional-part-indices
+         timezone-offset-part-indices
          have-case-sensitive-parts)
     ;; Doing this in two loops, so that the second can look ahead and easily 
find out if
     ;; the next regexp part is going to be a numeric value.
@@ -1171,7 +1190,20 @@ unless specified otherwise.
                         (`second-fractional    (push (cons part-index (expt 
10.0 details)) second-fractional-part-indices)
                                                (apply #'concat (make-list 
details (rx (any "0-9")))))
                         (`timezone
-                         (signal 'datetime-unsupported-timezone nil))
+                         (pcase details
+                           ((or `abbreviated `full)
+                            (signal 'datetime-unsupported-timezone nil))
+                           ((or `offset-localized-short `offset-localized-full
+                                `offset-hh?mm `offset-hhmm `offset-hh:mm 
`offset-hhmm?ss `offset-hh:mm?:ss
+                                `offset-hh?mm-or-z `offset-hhmm-or-z 
`offset-hh:mm-or-z `offset-hhmm?ss-or-z `offset-hh:mm?:ss-or-z
+                                `offset-hhmm)
+                            ;; FIXME: Use the most specific (not just the 
first) offset if there are several.
+                            (when (or validating (null 
timezone-offset-part-indices))
+                              (push (cons part-index details) 
timezone-offset-part-indices))
+                            ;; t means that this is supposed to match numbers.
+                            (cons (datetime--timezone-offset-matching-regexp 
details) t))
+                           (_
+                            (error "Unexpected timezone details `%s'" 
details))))
                         (_ (error "Unexpected value %s" type)))
                       details)))
             regexp-part-sources)
@@ -1204,7 +1236,10 @@ unless specified otherwise.
                                (format (if run-together-numeric-groups 
"[0-%d][0-9]" "0*[1-%d]?[0-9]") (/ regexp 10)))
                               (t
                                (if run-together-numeric-groups "[01][0-9]" 
"0*1?[0-9]"))))))
-                (setf last-part-was-numeric nil)
+                (if (consp regexp)
+                    (setf last-part-was-numeric (cdr regexp)
+                          regexp                (car regexp))
+                  (setf last-part-was-numeric nil))
                 (cond ((vectorp regexp)
                        ;; A vector of options returned by 
`datetime-locale-field'.
                        (setq have-case-sensitive-parts t)
@@ -1226,7 +1261,8 @@ unless specified otherwise.
           regexp-parts                   (nreverse regexp-parts)
           minute-part-indices            (nreverse minute-part-indices)
           second-part-indices            (nreverse second-part-indices)
-          second-fractional-part-indices (nreverse 
second-fractional-part-indices))
+          second-fractional-part-indices (nreverse 
second-fractional-part-indices)
+          timezone-offset-part-indices   (nreverse 
timezone-offset-part-indices))
     (unless validating
       (when month-number-part-indices
         (setq month-name-part-indices nil))
@@ -1244,7 +1280,7 @@ unless specified otherwise.
                                           year-part-indices 
month-number-part-indices month-name-part-indices day-of-month-part-indices
                                           am-pm-part-indices 
day-period-part-indices
                                           hour-0-23-part-indices 
hour-1-24-part-indices hour-am-pm-1-12-part-indices hour-am-pm-0-11-part-indices
-                                          minute-part-indices 
second-part-indices second-fractional-part-indices))
+                                          minute-part-indices 
second-part-indices second-fractional-part-indices 
timezone-offset-part-indices))
            (part-index              0)
            (group-index             1))
       (while regexp-parts
@@ -1297,6 +1333,9 @@ unless specified otherwise.
            (second-computation (or (datetime--parser-computation pattern 
"second" validating nil 59
                                                                  
(second-part-indices datetime--parser-int-computation))
                                    (plist-get 'second defaults) 0))
+           (timezone-offset-computation
+                               (datetime--parser-computation pattern "timezone 
offset" validating nil nil
+                                                             
(timezone-offset-part-indices datetime--parser-timezone-offset-computation)))
            (parser            `(+ ,@(when (or year-computation
                                               (not (memq constant-year     
'(nil 1970)))
                                               (not (memq month-computation 
'(nil 0)))
@@ -1325,15 +1364,19 @@ unless specified otherwise.
                                   (* ,hour-computation ,(* 60 60))
                                   (* ,minute-computation 60)
                                   ,second-computation
+                                  ,@(when timezone-offset-computation
+                                      `((- ,timezone-offset-computation)))
                                   ,@(when second-fractional-part-indices
                                       `((/ (string-to-number (match-string 
,(caar second-fractional-part-indices) string))
                                            ,(cdar 
second-fractional-part-indices)))))))
-      (pcase timezone-data
-        (`(,constant-offset)
-         (unless (= constant-offset 0)
-           (setq parser `(- ,parser ,constant-offset))))
-        (_
-         (setq parser `(datetime--convert-from-utc-float ,parser 
,(datetime--macroexp-quote timezone-data)))))
+      ;; Apply the timezone from `options', but only if one is not specified 
in the argument.
+      (unless timezone-offset-part-indices
+        (pcase timezone-data
+          (`(,constant-offset)
+           (unless (= constant-offset 0)
+             (setq parser `(- ,parser ,constant-offset))))
+          (_
+           (setq parser `(datetime--convert-from-utc-float ,parser 
,(datetime--macroexp-quote timezone-data))))))
       (setq parser `(save-match-data
                       (if (string-match ,(concat "^" (apply #'concat 
regexp-parts) "$") ,(if downcased `(downcase string) 'string))
                           ,parser
@@ -1342,9 +1385,10 @@ unless specified otherwise.
         (setq parser `(let ((case-fold-search ,case-insensitive))
                         ,parser)))
       (setq parser `(lambda (string) ,parser))
-      (if (plist-get options :debug)
-          parser
-        (byte-compile parser)))))
+      (unless (plist-get options :debug)
+        (unless (setf parser (byte-compile parser))
+          (error "Internal error: unable to byte-compile generated parser")))
+      parser)))
 
 (defun datetime--parser-year-computation (argument)
   (pcase (cdr argument)
@@ -1393,6 +1437,50 @@ unless specified otherwise.
          ,if-first-form
        ,if-second-form)))
 
+;; Where ARGUMENT is expected to be `(STRING-INDEX . OFFSET-TYPE)'.
+(defun datetime--parser-timezone-offset-computation (argument)
+  (let* ((type             (cdr argument))
+         (localized        (memq type '(offset-localized-short 
offset-localized-full)))
+         ;; For "localized" offset types the prefix is "GMT".
+         (prefix-length    (if localized 3 0))
+         (separator-length (if (or localized (memq type '(offset-hh:mm 
offset-hh:mm?:ss offset-hh:mm-or-z offset-hh:mm?:ss-or-z))) 1 0))
+         (has-seconds      (or localized (memq type '(offset-hhmm?ss 
offset-hh:mm?:ss offset-hhmm?ss-or-z offset-hh:mm?:ss-or-z))))
+         (hour-form        `(* (string-to-number (substring offset-string
+                                                            ,(+ prefix-length 
1)
+                                                            ,@(if (eq type 
'offset-localized-short) '(hour-end) `(,(+ prefix-length 1 2)))))
+                               ,(* 60 60)))
+         (minutes-form     `(* (string-to-number (substring offset-string
+                                                            ,(if (eq type 
'offset-localized-short)
+                                                                 '(1+ hour-end)
+                                                               (+ 
prefix-length 1 2 separator-length))
+                                                            ,@(when has-seconds
+                                                                `(,(if (eq 
type 'offset-localized-short)
+                                                                       '(+ 
hour-end 3)
+                                                                     (+ 
prefix-length 1 2 separator-length 2))))))
+                               60))
+         (seconds-form     (when has-seconds
+                             `(string-to-number (substring offset-string
+                                                           ,(if (eq type 
'offset-localized-short)
+                                                                `(+ hour-end 4)
+                                                              (+ prefix-length 
1 2 separator-length 2 separator-length))))))
+         (form             `(+ ,hour-form
+                               ,(if (eq type 'offset-localized-short)
+                                    `(if (> (length offset-string) 6) 
,minutes-form 0)
+                                  minutes-form)
+                               ,@(when has-seconds
+                                   ;; Condition works for 
`offset-localized-short' too.
+                                   `((if (> (length offset-string) ,(+ 
prefix-length 1 2 separator-length 2)) ,seconds-form 0))))))
+    ;; For all other types `hour-end' is constant.
+    (when (eq type 'offset-localized-short)
+      (setf form `(let ((hour-end (if (or (= (length offset-string) 5) (eq 
(aref offset-string 5) ?:)) 5 6))) ,form)))
+    (setf form `(let ((offset ,form)) (if (= (aref offset-string 
,prefix-length) ?+) offset (- offset))))
+    (unless (memq type '(offset-hhmm offset-hh?mm offset-hhmm?ss offset-hh:mm 
offset-hh:mm?:ss))
+      ;; For all types, there is a fixed length of the match that implies zero 
offset
+      ;; (either for "Z" or "GMT") and cannot happen in any other case.
+      (setf form `(if (= (length offset-string) ,(if localized 3 1)) 0 ,form)))
+    `(let ((offset-string (match-string ,(car argument) string)))
+       ,form)))
+
 ;; Pretty similar to `datetime--convert-to-utc-float', but not quite.
 (defun datetime--convert-from-utc-float (date-time timezone-data)
   (let ((year-offset          (floor (/ (- date-time (car timezone-data)) 
datetime--average-seconds-in-year)))
diff --git a/test/ProcessTimestamp.java b/test/ProcessTimestamp.java
index 200ccd194b..a00c978b26 100644
--- a/test/ProcessTimestamp.java
+++ b/test/ProcessTimestamp.java
@@ -17,7 +17,8 @@ public class ProcessTimestamp
      *      epoch time UTC;
      *    FORMATTED is (only for command `parse'): a timestamp formatted 
according to
      *      the parameters that follow; read until linefeed;
-     *    TIMEZONE and LOCALE are string identifiers;
+     *    TIMEZONE and LOCALE are string identifiers; when parsing, TIMEZONE 
may also
+     *      be "nil";
      *    PATTERN is according to SimpleDateFormat documentation and is taken 
until
      *      the end of line with starting and ending whitespace removed.
      *
@@ -34,11 +35,12 @@ public class ProcessTimestamp
             if (!"format".equals (command) && !"parse".equals (command))
                 throw new IllegalArgumentException (String.format ("unknown 
command '%s'", command));
 
-            double  timestamp = ("format".equals (command) ? input.nextDouble 
()       : 0.0);
-            String  formatted = ("parse" .equals (command) ? input.nextLine 
().trim () : null);
-            ZoneId  timezone  = ZoneId.of (input.next ());
-            Locale  locale    = Locale.forLanguageTag (input.next ());
-            String  pattern   = input.nextLine ().trim ();
+            double  timestamp     = ("format".equals (command) ? 
input.nextDouble ()       : 0.0);
+            String  formatted     = ("parse" .equals (command) ? 
input.nextLine ().trim () : null);
+            String  timezone_lisp = input.next ();
+            ZoneId  timezone      = (!"nil".equals (timezone_lisp) ? ZoneId.of 
(timezone_lisp) : null);
+            Locale  locale        = Locale.forLanguageTag (input.next ());
+            String  pattern       = input.nextLine ().trim ();
 
             switch (command) {
             case "format":
diff --git a/test/base.el b/test/base.el
index 59c9fdad6c..8be2f1d098 100644
--- a/test/base.el
+++ b/test/base.el
@@ -28,6 +28,14 @@
 
 (defvar datetime--test-directory (file-name-directory (or load-file-name 
(buffer-file-name))))
 
+;; Spaces are included only for readability where needed.  They don't affect 
anything otherwise (or,
+;; rather, should affect the library and the Java benchmark in the same way).
+(defvar datetime--test-offset-format-specifiers
+  '("Z" "ZZ" "ZZZ" " ZZZZ" "ZZZZZ"
+    " O" " OOOO"
+    "x" "xx" "xxx" "xxxx" "xxxxx"
+    "X" "XX" "XXX" "XXXX" "XXXXX"))
+
 
 (defmacro datetime--test-set-up (timezone locale pattern &rest body)
   (declare (debug (form form form body))
@@ -37,6 +45,30 @@
          (datetime--test-pattern   ,pattern))
      ,@body))
 
+(defvar datetime--test-formatter nil)
+(defvar datetime--test-parser    nil)
+
+(defmacro datetime--test-set-up-formatter (timezone locale pattern &rest body)
+  (declare (debug (form form form body))
+           (indent 3))
+  `(datetime--test-set-up ,timezone ,locale ,pattern
+     (let ((datetime--test-formatter (datetime-float-formatter 'java 
datetime--test-pattern :timezone datetime--test-timezone :locale 
datetime--test-locale)))
+       ,@body)))
+
+(defmacro datetime--test-set-up-parser (timezone locale pattern &rest body)
+  (declare (debug (form form form body))
+           (indent 3))
+  `(datetime--test-set-up ,timezone ,locale ,pattern
+     (let ((datetime--test-parser (datetime-parser-to-float 'java 
datetime--test-pattern :timezone datetime--test-timezone :locale 
datetime--test-locale)))
+       ,@body)))
+
+(defmacro datetime--test-set-up-formatter-and-parser (timezone locale pattern 
&rest body)
+  (declare (debug (form form form body))
+           (indent 3))
+  `(datetime--test-set-up-formatter ,timezone ,locale ,pattern
+     (datetime--test-set-up-parser datetime--test-timezone 
datetime--test-locale datetime--test-pattern
+       ,@body)))
+
 (defun datetime--test (command times)
   (unless (listp times)
     (setq times (list times)))
diff --git a/test/format.el b/test/format.el
index 89ee7b3a20..e9e479967f 100644
--- a/test/format.el
+++ b/test/format.el
@@ -18,15 +18,6 @@
 
 (require 'test/base)
 
-(defvar datetime--test-formatter nil)
-
-
-(defmacro datetime--test-set-up-formatter (timezone locale pattern &rest body)
-  (declare (debug (form form form body))
-           (indent 3))
-  `(datetime--test-set-up ,timezone ,locale ,pattern
-     (let ((datetime--test-formatter (datetime-float-formatter 'java 
datetime--test-pattern :timezone datetime--test-timezone :locale 
datetime--test-locale)))
-       ,@body)))
 
 (defun datetime--test-formatter (times)
   (unless (listp times)
@@ -143,14 +134,6 @@
       (datetime--test-formatter (mapcar (lambda (k) (* k 7000000)) 
(number-sequence -300 400))))))
 
 
-;; Spaces are included only for readability where needed.  They don't affect 
anything otherwise (or,
-;; rather, should affect the library and Java benchmark in the same way).
-(defvar datetime--test-offset-format-specifiers
-  '("Z" "ZZ" "ZZZ" " ZZZZ" "ZZZZZ"
-    " O" " OOOO"
-    "x" "xx" "xxx" "xxxx" "xxxxx"
-    "X" "XX" "XXX" "XXXX" "XXXXX"))
-
 (ert-deftest datetime-formatting-with-timezone-offset-1 ()
   (dolist (timezone (datetime-list-timezones))
     (dolist (offset-format-specifier datetime--test-offset-format-specifiers)
diff --git a/test/parse.el b/test/parse.el
index 66c0169581..d944a1ebea 100644
--- a/test/parse.el
+++ b/test/parse.el
@@ -18,23 +18,28 @@
 
 (require 'test/base)
 
-(defvar datetime--test-parser nil)
 
-
-(defun datetime--~= (a b &optional epsilon)
-  (when (and a b)
-    (unless (<= (abs (- a b)) (or epsilon 0.0000001))
-      (message "Error of %s" (funcall (datetime-float-formatter 'java 
"HH:mm:ss.SSSSSS") (abs (- a b))))))
-  (if (and a b)
-      (<= (abs (- a b)) (or epsilon 0.0000001))
-    (not (or a b))))
-
-(defmacro datetime--test-set-up-parser (timezone locale pattern &rest body)
-  (declare (debug (form form form body))
-           (indent 3))
-  `(datetime--test-set-up ,timezone ,locale ,pattern
-     (let ((datetime--test-parser (datetime-parser-to-float 'java 
datetime--test-pattern :timezone datetime--test-timezone :locale 
datetime--test-locale)))
-       ,@body)))
+(defun datetime--~= (our-result java-result &optional epsilon as-string)
+  (cond ((and our-result java-result)
+         (let ((error (abs (- our-result java-result))))
+           (if (<= error (or epsilon 0.0000001))
+               t
+             (message "Error of %s" (funcall (datetime-float-formatter 'java 
"HH:mm:ss.SSSSSS") error))
+             nil)))
+        (our-result
+         ;; Apparently there is a bug in Java in that `x' cannot parse offsets 
between
+         ;; +0001 and +0059 (all other offsets are fine).  Tried reporting it, 
only for
+         ;; `bugreport.java.com' to die on me, so fuck it.
+         (if (and (string-match-p "[^x]x$" datetime--test-pattern)
+                  as-string (string-match-p "\\+00\\(0[1-9]\\|[1-5][0-9]\\)$" 
as-string))
+             t
+           (message "Successfully parsed by us, but not by Java")
+           nil))
+        (java-result
+         (message "Successfully parsed by Java, but not by us")
+         nil)
+        (t
+         t)))
 
 (defun datetime--test-parser (as-strings)
   (unless (listp as-strings)
@@ -44,7 +49,7 @@
       (let ((as-string (pop as-strings))
             (time      (pop parsed)))
         (eval `(should (progn ',datetime--test-timezone 
',datetime--test-locale ,datetime--test-pattern ,as-string
-                              (datetime--~= ,(funcall datetime--test-parser 
as-string) ,time))))))))
+                              (datetime--~= ,(funcall datetime--test-parser 
as-string) ,time nil ,as-string))))))))
 
 (defun datetime--test-parser-around-transition (time)
   (datetime--test-parser (datetime--test 'format (list time
@@ -161,4 +166,18 @@
       (datetime--test-parser '("20220506123000")))))
 
 
+(ert-deftest datetime-parsing-timezone-offset-1 ()
+  (dolist (offset-format-specifier datetime--test-offset-format-specifiers)
+    (let ((pattern (format "yyyy-MM-dd HH:mm:ss%s" offset-format-specifier)))
+      ;; Parser should not need a fixed timezone, instead it will get the 
offset from the
+      ;; argument upon call.
+      (datetime--test-set-up-parser nil 'en pattern
+        (dolist (timezone (datetime-list-timezones))
+          (datetime--test-set-up-formatter timezone 'en pattern
+            (datetime--test-parser (list (funcall datetime--test-formatter 
1694863500)
+                                         ;; For ancient times many timezones 
yield offsets that
+                                         ;; include seconds.  Make sure we can 
parse those too.
+                                         (funcall datetime--test-formatter 
-3000000000)))))))))
+
+
 (provide 'test/parse)

Reply via email to