branch: elpa/logview
commit 0d1c20c9e5b7b61a2e40e95180a10c2d29ca97f7
Author: Paul Pogonyshev <pogonys...@gmail.com>
Commit: Paul Pogonyshev <pogonys...@gmail.com>

    Use 'datetime' library to support many more timestamp formats and also 
simplify timestamp customization.
---
 logview.el | 206 ++++++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 136 insertions(+), 70 deletions(-)

diff --git a/logview.el b/logview.el
index cdfcdb0bb7..b2c60c5731 100644
--- a/logview.el
+++ b/logview.el
@@ -1,13 +1,13 @@
 ;;; logview.el --- Major mode for viewing log files  -*- lexical-binding: t -*-
 
-;; Copyright (C) 2015 Paul Pogonyshev
+;; Copyright (C) 2015, 2016 Paul Pogonyshev
 
 ;; Author:     Paul Pogonyshev <pogonys...@gmail.com>
 ;; Maintainer: Paul Pogonyshev <pogonys...@gmail.com>
 ;; Version:    0.4.2
 ;; Keywords:   files, tools
 ;; Homepage:   https://github.com/doublep/logview
-;; Package-Requires: ((emacs "24.1"))
+;; Package-Requires: ((emacs "24.1") (datetime "0.1"))
 
 ;; This program is free software; you can redistribute it and/or
 ;; modify it under the terms of the GNU General Public License as
@@ -36,6 +36,7 @@
 ;;; Code:
 
 (eval-when-compile (require 'cl-lib))
+(require 'datetime)
 
 ;; We _append_ self to the list of mode rules so as to not clobber
 ;; other rules, as '.log' is a common file extension.  This also gives
@@ -79,36 +80,20 @@ This alist value is used as the fallback for customizable
   ;; General notices: we silently handle both common decimal
   ;; separators (dot and comma).  In several cases there is optional
   ;; space if the day/hour number is single-digit.
-  (let ((HH:mm:ss          "[012][0-9]:[0-5][0-9]:[0-5][0-9]")
-        (h:mm:ss           "[ 01]?[0-9]:[0-5][0-9]:[0-5][0-9]")
-        (.SSS              "[.,][0-9]\\{3\\}")
-        (.UUUUUU           "[.,][0-9]\\{6\\}")
-        (a                 " [AP]M")
-        (yyyy-MM-dd        "[0-9]\\{4\\}-[01][0-9]-[0-3][0-9]")
-        (MMM               (regexp-opt '("Jan" "Feb" "Mar" "Apr" "May" "Jun" 
"Jul" "Aug" "Sep" "Oct" "Nov" "Dec")))
-        (d                 "[ 1-3]?[0-9]")
-        )
-    (list (list "ISO 8601 datetime + millis"
-                (cons 'regexp  (concat yyyy-MM-dd " " HH:mm:ss .SSS))
-                (list 'aliases "yyyy-MM-dd HH:mm:ss.SSS"))
-          (list "ISO 8601 datetime + micros"
-                (cons 'regexp  (concat yyyy-MM-dd " " HH:mm:ss .UUUUUU))
-                (list 'aliases "yyyy-MM-dd HH:mm:ss.UUUUUU"))
-          (list "ISO 8601 datetime"
-                (cons 'regexp  (concat yyyy-MM-dd " " HH:mm:ss))
-                (list 'aliases "yyyy-MM-dd HH:mm:ss"))
-          (list "ISO 8601 time only + millis"
-                (cons 'regexp  (concat HH:mm:ss .SSS))
-                (list 'aliases "HH:mm:ss.SSS"))
-          (list "ISO 8601 time only"
-                (cons 'regexp  HH:mm:ss)
-                (list 'aliases "HH:mm:ss"))
-          (list "MMM d HH:mm:ss"
-                (cons 'regexp  (concat MMM " " d " " HH:mm:ss)))
-          (list "MMM d h:mm:ss a"
-                (cons 'regexp  (concat MMM " " d " " h:mm:ss a)))
-          (list "h:mm:ss a"
-                (cons 'regexp  (concat h:mm:ss a)))))
+  (let (formats)
+    (dolist (data '(("ISO 8601 datetime + millis"  "yyyy-MM-dd HH:mm:ss.SSS")
+                    ("ISO 8601 datetime + micros"  "yyyy-MM-dd 
HH:mm:ss.SSSSSS")
+                    ("ISO 8601 datetime"           "yyyy-MM-dd HH:mm:ss")
+                    ("ISO 8601 time only + millis" "HH:mm:ss.SSS")
+                    ("ISO 8601 time only + micros" "HH:mm:ss.SSSSSS")
+                    ("ISO 8601 time only"          "HH:mm:ss")
+                    (nil                           "MMM d HH:mm:ss")
+                    (nil                           "MMM d h:mm:ss a")
+                    (nil                           "h:mm:ss a")))
+      (push (list (or (car data) (cadr data)) (cons 'java-pattern (cadr 
data))) formats)
+      (when (car data)
+        (nconc (car formats) (list (list 'aliases (cadr data))))))
+    (nreverse formats))
   "Alist of standard timestamp formats.
 This value is used as the fallback for customizable
 `logview-additional-timestamp-formats'.")
@@ -263,20 +248,32 @@ A few common formats are already defined by the mode in 
variable
 take precedence.
 
 Each format has a name, by which it can be referred from submode
-definition.  A format is defined simply by a regular expression
-timestamp must match.  It is strongly recommended to make the
-expression as strict as possible to avoid false positives.  For
-example, if you entered something like \"\\w+\" as an expression,
-this would often lead to Logview mode autoselecting wrong submode
-and thus parsing log files incorrectly.
+definition.  A format is defined by Java-like pattern.  If the
+pattern contains text strings, e.g. month names, you can specify
+the locale to use (defaults to English).
+
+See `datetime' library for the help about patterns, or read
+
+    https://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html
+
+A more complicated and mostly obsolete way to specify format is
+by using regular expression timestamp must match.  It is strongly
+recommended to make the expression as strict as possible to avoid
+false positives.  For example, if you entered something like
+\"\\w+\" as an expression, this would often lead to Logview mode
+autoselecting wrong submode and thus parsing log files
+incorrectly.  Regular expression is ignored if Java pattern is
+also specified.
 
 Timestamp format can have any number of optional aliases, which
 work just as the name."
   :group 'logview
   :type  '(repeat (cons (string :tag "Name")
                         (list :tag "Definition"
-                              (cons :tag "" (const :tag "Format:"  regexp)  
regexp)
                               (set :inline t
+                                   (cons :tag "" (const :tag "Java pattern:"   
    java-pattern) string)
+                                   (cons :tag "" (const :tag "Locale:"         
    locale)       symbol)
+                                   (cons :tag "" (const :tag "Regular 
expression:" regexp)       regexp)
                                    (cons :tag "" (const :tag "Aliases:" 
aliases) (repeat string))))))
   :set   'logview--set-submode-affecting-variable)
 
@@ -444,6 +441,14 @@ To temporarily change this on per-buffer basis type 
\\<logview-mode-map>\\[logvi
                                              (group bow "NAME" eow)
                                              (group bow "THREAD" eow))))
 
+(defvar logview--datetime-options '(:second-fractional-extension t
+                                    :only-4-digit-years t
+                                    :accept-leading-space t
+                                    :require-leading-zeros t
+                                    :forbid-unnecessary-zeros t))
+
+(defvar logview--all-timestamp-formats-cache nil)
+
 (defconst logview--valid-filter-prefixes '("a+" "a-" "t+" "t-" "m+" "m-"))
 
 
@@ -1487,38 +1492,43 @@ returns non-nil."
       (widen)
       (goto-char 1)
       (end-of-line)
-      (let ((first-line (buffer-substring 1 (point))))
+      (let ((first-line (buffer-substring 1 (point)))
+            standard-timestamps)
+        (logview--iterate-split-alists (lambda (_timestamp-name timestamp) 
(push timestamp standard-timestamps))
+                                       logview-additional-timestamp-formats 
logview-std-timestamp-formats)
+        (maphash (lambda (regexp _keys)
+                   (push (list (cons 'regexp regexp)) standard-timestamps))
+                 (logview--all-timestamp-formats))
+        (setq standard-timestamps (nreverse standard-timestamps))
         (catch 'success
           (logview--iterate-split-alists (lambda (name definition)
                                            (condition-case error
-                                               (logview--initialize-submode 
name definition first-line)
+                                               (logview--initialize-submode 
name definition standard-timestamps first-line)
                                              (error (warn 
(error-message-string error)))))
                                          logview-additional-submodes 
logview-std-submodes))))))
 
-(defun logview--initialize-submode (name definition test-line)
-  (let* ((format    (cdr (assq 'format    definition)))
-         (timestamp (cdr (assq 'timestamp definition))))
+(defun logview--initialize-submode (name definition standard-timestamps 
test-line)
+  (let* ((format            (cdr (assq 'format    definition)))
+         (timestamp-names   (cdr (assq 'timestamp definition)))
+         (timestamp-options (if timestamp-names
+                                (mapcar (lambda (name)
+                                          (logview--get-split-alists name 
"timestamp format"
+                                                                     
logview-additional-timestamp-formats logview-std-timestamp-formats))
+                                        timestamp-names)
+                              standard-timestamps)))
     (unless (and (stringp format) (> (length format) 0))
       (user-error "Invalid submode '%s': no format string" name))
-    (catch 'failed
-      (if timestamp
-          (dolist (name timestamp)
-            (logview--try-initialize-submode name definition format
-                                             (logview--get-split-alists name 
"timestamp format"
-                                                                        
logview-additional-timestamp-formats logview-std-timestamp-formats)
-                                             test-line))
-        (logview--iterate-split-alists (lambda (_timestamp-name timestamp)
-                                         (logview--try-initialize-submode name 
definition format timestamp test-line))
-                                       logview-additional-timestamp-formats 
logview-std-timestamp-formats)))))
-
-(defun logview--try-initialize-submode (name submode format timestamp 
test-line)
+    (logview--try-initialize-submode name definition format timestamp-options 
test-line)))
+
+(defun logview--try-initialize-submode (name submode format timestamp-options 
test-line)
   (let* ((search-from 0)
-         (next)
-         (end)
+         next
+         end
          starter terminator
-         (levels)
-         (parts '("^"))
-         (features)
+         levels
+         parts
+         timestamp-at
+         features
          (add-text-part (lambda (from to)
                           (push (replace-regexp-in-string "[ \t]+" "[ \t]+" 
(regexp-quote (substring format from to))) parts))))
     (while (setq next (string-match logview--entry-part-regexp format 
search-from))
@@ -1530,8 +1540,9 @@ returns non-nil."
             terminator (when (< end (length format))
                          (aref format end)))
       (cond ((match-beginning logview--timestamp-group)
-             (push (format "\\(?%d:%s\\)" logview--timestamp-group (cdr (assq 
'regexp timestamp))) parts)
-             (push 'timestamp features))
+             (push nil parts)
+             (push 'timestamp features)
+             (setq timestamp-at parts))
             ((match-beginning logview--level-group)
              (setq levels (logview--get-split-alists (cdr (assq 'levels 
submode)) "level mapping"
                                                      
logview-additional-level-mappings logview-std-level-mappings))
@@ -1573,9 +1584,20 @@ returns non-nil."
     ;; Always behave as if format string ends with whitespace.
     (unless (string-match "[ \t]$" format)
       (push "[ \t]+" parts))
-    (let ((regexp (apply 'concat (reverse parts))))
-      (if (string-match regexp test-line)
-          (progn
+    (setq parts (nreverse parts))
+    (push "^" parts)
+    (dolist (timestamp-option (if timestamp-at timestamp-options '(nil)))
+      (let* ((timestamp-pattern (assq 'java-pattern timestamp-option))
+             (timestamp-regexp  (if timestamp-pattern
+                                    (apply #'datetime-matching-regexp 'java 
(cdr timestamp-pattern)
+                                           :locale (cdr (assq 'locale 
timestamp-option)) logview--datetime-options)
+                                  (cdr (assq 'regexp timestamp-option)))))
+        (when timestamp-at
+          ;;(message "+++ %s" timestamp-regexp)
+          (setcar timestamp-at (format "\\(?%d:%s\\)" logview--timestamp-group 
timestamp-regexp)))
+        (let ((regexp (apply #'concat parts)))
+          ;;(message "    %s :: %s" format (replace-regexp-in-string "\n" "" 
regexp))
+          (when (string-match regexp test-line)
             (setq logview--process-buffer-changes t
                   logview--entry-regexp           regexp
                   logview--submode-features       features
@@ -1596,10 +1618,54 @@ returns non-nil."
               (pcase logview-auto-revert-mode
                 (`auto-revert-mode      (auto-revert-mode      1))
                 (`auto-revert-tail-mode (auto-revert-tail-mode 1))))
-            (throw 'success nil))
-        (when (not (memq 'timestamp features))
-          ;; Else we will maybe retry with different timestamp formats.
-          (throw 'failed nil))))))
+            (throw 'success nil)))))))
+
+(defun logview--all-timestamp-formats ()
+  (unless logview--all-timestamp-formats-cache
+    (let ((start-time (float-time))
+          (patterns (make-hash-table :test 'equal :size 1000))
+          (uniques  (make-hash-table :test 'equal :size 1000)))
+      (dolist (locale (datetime-list-locales t))
+        (let ((decimal-separator (char-to-string (datetime-locale-field locale 
:decimal-separator)))
+              last-time-pattern)
+          (dolist (time-variant '(:short :medium :long :full))
+            (let ((time-pattern (datetime-locale-time-pattern locale 
time-variant)))
+              (unless (string= time-pattern last-time-pattern)
+                (setq last-time-pattern time-pattern)
+                (when (and (datetime-pattern-includes-second-p 'java 
time-pattern)
+                           (not (datetime-pattern-includes-timezone-p 'java 
time-pattern)))
+                  (let (variants)
+                    (dolist (pattern (cons time-pattern
+                                           (mapcar (lambda (date-variant) 
(datetime-locale-date-time-pattern locale date-variant time-variant))
+                                                   '(:short :medium :long 
:full))))
+                      (push pattern                                            
                                       variants)
+                      (push (replace-regexp-in-string "\\<s+\\>" (concat "\\&" 
decimal-separator "SSS")    pattern t) variants)
+                      (push (replace-regexp-in-string "\\<s+\\>" (concat "\\&" 
decimal-separator "SSSSSS") pattern t) variants))
+                    (dolist (pattern variants)
+                      (let* ((parts            (datetime-recode-pattern 'java 
'parsed pattern))
+                             (locale-dependent 
(datetime-pattern-locale-dependent-p 'parsed parts))
+                             (key              (cons pattern (when 
locale-dependent locale))))
+                        (when (or locale-dependent (null (gethash key 
patterns)))
+                          (puthash key
+                                   (datetime-matching-regexp 'parsed parts
+                                                             
:second-fractional-extension t
+                                                             :locale locale
+                                                             
:only-4-digit-years t
+                                                             
:accept-leading-space t
+                                                             
:require-leading-zeros t
+                                                             
:forbid-unnecessary-zeros t)
+                                   patterns)))))))))))
+      (maphash (lambda (key regexp)
+                 (let ((existing (gethash regexp uniques)))
+                   (if existing
+                       (unless (memq (cdr key) (cdr existing))
+                         (push (cdr key) (cdr existing)))
+                     (puthash regexp (cons (car key) (list (cdr key))) 
uniques))))
+               patterns)
+      (setq logview--all-timestamp-formats-cache uniques)
+      (let ((inhibit-message t))
+        (message "Logview/datetime: built list of %d timestamp regexps in 
%.3f" (hash-table-count uniques) (- (float-time) start-time)))))
+  logview--all-timestamp-formats-cache)
 
 
 (defun logview--assert (&rest assertions)

Reply via email to