branch: elpa/logview commit fb7762b0a9cc52f786f15230c171e1b88e1e1a03 Author: Paul Pogonyshev <pogonys...@gmail.com> Commit: Paul Pogonyshev <pogonys...@gmail.com>
Add support for "really special" log entry parts, where format string specifies full regexp for a subpart (only name, thread or "ignored"); issue #48. --- logview.el | 102 +++++++++++++++++++++++++++++------------------------- test/custom/2.log | 3 ++ test/logview.el | 10 ++++++ 3 files changed, 67 insertions(+), 48 deletions(-) diff --git a/logview.el b/logview.el index 3bdd33323c..b036d95d42 100644 --- a/logview.el +++ b/logview.el @@ -640,13 +640,11 @@ settings) with this face.") (defconst logview--final-levels '(error warning information debug trace) "List of final (submode-independent) levels, most to least severe.") -(defconst logview--entry-part-regexp (rx bow (or (group "TIMESTAMP") - (group "LEVEL") - (group "NAME") - (group "THREAD") - (group "IGNORED") - (group "MESSAGE")) - eow)) +(defconst logview--entry-part-regexp (rx (or (seq bow (or (group "TIMESTAMP") (group "LEVEL") (group "NAME") + (group "THREAD") (group "IGNORED") (group "MESSAGE")) ;; 1--6, see above + eow) + (seq "<<RX:" (or (group "NAME") (group "THREAD") (group "IGNORED")) ;; 7--9 + ":" (group (+? anychar)) ">>")))) ;; 10 (defconst logview--timestamp-entry-part-regexp (rx bow "TIMESTAMP" eow)) (defvar logview--datetime-matching-options '(:second-fractional-extension t @@ -2931,48 +2929,56 @@ returns non-nil." (setq have-explicit-message t)) (t (dolist (k (list logview--name-group logview--thread-group logview--ignored-group)) - (when (match-beginning k) - (push (format "\\(?%s:%s\\)" - (if (/= k logview--ignored-group) - (number-to-string k) - "") - (cond ((and starter terminator - (or (and (= starter ?\() (= terminator ?\))) - (and (= starter ?\[) (= terminator ?\])))) - ;; See https://github.com/doublep/logview/issues/2 - ;; We allow _one_ level of nested parens inside - ;; parenthesized THREAD or NAME. Allowing more would - ;; complicate regexp even further. Unlimited nesting - ;; level is not possible with regexps at all. - ;; - ;; 'rx-to-string' is used to avoid escaping things - ;; ourselves. - (rx-to-string `(seq (* (not (any ,starter ,terminator ?\n))) - (* ,starter (* (not (any ?\n))) ,terminator - (* (not (any ,starter ,terminator ?\n))))) - t)) - ((and terminator (/= terminator ? )) - (format "[^%c\n]*" terminator)) - (terminator - "[^ \t\n]+") - (t - ".+"))) - parts) - (push (if (= k logview--name-group) 'name 'thread) features))))) + ;; See definition of `logview--entry-part-regexp' for the meaning of 4 and 10. + (let ((special-regexp (match-beginning (+ k 4)))) + (when (or (match-beginning k) special-regexp) + (push (format "\\(?%s:%s\\)" + (if (/= k logview--ignored-group) + (number-to-string k) + "") + (cond (special-regexp + (let ((forced-regexp (match-string 10 format))) + (unless (logview--valid-regexp-p forced-regexp) + ;; Ideally would also ensure that there are no catching groups, + ;; but for this we'd need `xr' as dependency. Not now. + (warn "In format specifier `%s': `%s' is not a valid regexp" format forced-regexp) + (setf cannot-match t)) + forced-regexp)) + ((and starter terminator + (or (and (= starter ?\() (= terminator ?\))) + (and (= starter ?\[) (= terminator ?\])))) + ;; See https://github.com/doublep/logview/issues/2 We allow _one_ + ;; level of nested parens inside parenthesized THREAD or NAME. + ;; Allowing more would complicate regexp even further. Unlimited + ;; nesting level is not possible with regexps at all. + ;; + ;; 'rx-to-string' is used to avoid escaping things ourselves. + (rx-to-string `(seq (* (not (any ,starter ,terminator ?\n))) + (* ,starter (* (not (any ?\n))) ,terminator + (* (not (any ,starter ,terminator ?\n))))) + t)) + ((and terminator (/= terminator ? )) + (format "[^%c\n]*" terminator)) + (terminator + "[^ \t\n]+") + (t + ".+"))) + parts) + (push (if (= k logview--name-group) 'name 'thread) features)))))) (setq search-from end)) - (when (< search-from (length format)) - (funcall add-text-part search-from nil)) - ;; Unless `MESSAGE' field is used explicitly, behave as if format string ends with whitespace. - (unless (or have-explicit-message (string-match-p "[ \t]$" format)) - (push "\\(?:[ \t]+\\|$\\)" parts)) - (setq parts (nreverse parts)) - (when timestamp-at - ;; Speed optimization: if the submode includes a timestamp, but - ;; the test line doesn't have even two digits at the expected - ;; place, don't even loop through all the timestamp options. - (setcar timestamp-at ".*[0-9][0-9].*") - (when (and test-line (not (string-match-p (apply #'concat parts) test-line))) - (setq cannot-match t))) + (unless cannot-match + (when (< search-from (length format)) + (funcall add-text-part search-from nil)) + ;; Unless `MESSAGE' field is used explicitly, behave as if format string ends with whitespace. + (unless (or have-explicit-message (string-match-p "[ \t]$" format)) + (push "\\(?:[ \t]+\\|$\\)" parts)) + (setq parts (nreverse parts)) + (when timestamp-at + ;; Speed optimization: if the submode includes a timestamp, but the test line doesn't have even two + ;; digits at the expected place, don't even loop through all the timestamp options. + (setcar timestamp-at ".*[0-9][0-9].*") + (when (and test-line (not (string-match-p (apply #'concat parts) test-line))) + (setq cannot-match t)))) (unless cannot-match (dolist (timestamp-option (if timestamp-at timestamp-options '(nil))) (let* ((timestamp-pattern (assq 'java-pattern timestamp-option)) diff --git a/test/custom/2.log b/test/custom/2.log new file mode 100644 index 0000000000..3f9c091450 --- /dev/null +++ b/test/custom/2.log @@ -0,0 +1,3 @@ +2023-07-19 14:10:02.736 GMT+08:00 INFO T: Fake Thread WhateverName - see https://github.com/doublep/logview/issues/48 +2023-07-19 14:10:02.736 GMT+08:00 DEBUG T: Subscription Manager Consumer Thread LottieLockView - loading from filesystem +2023-07-19 14:10:02.739 GMT+08:00 DEBUG T: pool-40-thread-1 LottieLockView - starting to play t4.json diff --git a/test/logview.el b/test/logview.el index 9e4549a4fc..7bea5df334 100644 --- a/test/logview.el +++ b/test/logview.el @@ -148,6 +148,16 @@ (logview-difference-to-current-entry) (logview-go-to-difference-base-entry))) +;; See https://github.com/doublep/logview/issues/48 for rationale to have this at all. +(ert-deftest logview-test-custom-submode-with-special-regexp () + (logview--test-with-file "custom/2.log" + :extra-customizations '((logview-additional-submodes + '(("custom" (format . "TIMESTAMP IGNORED LEVEL T: <<RX:THREAD:[^-]+>> NAME - MESSAGE") (levels . "SLF4J"))))) + (should (equal logview--submode-name "custom")) + (logview--locate-current-entry entry start + (should (and entry (equal start 1))) + (should (equal (logview--entry-group entry start logview--name-group) "WhateverName"))))) + ;; Bug: Logview would ignore entry lines if they didn't contain a space at the end. This ;; would e.g. happen if you had code like 'log.info ("\n...");' in your program. (ert-deftest logview-test-multiline-entries ()