branch: externals/el-job
commit e290f1249ced1d29bac83d64e33738c52e27fb6c
Author: Martin Edström <[email protected]>
Commit: Martin Edström <[email protected]>
.
---
.dir-locals.el | 1 +
README.org | 19 ++++++++++-
el-job-child.el | 14 +++-----
el-job.el | 102 ++++++++++++++++++++++++++++++--------------------------
4 files changed, 78 insertions(+), 58 deletions(-)
diff --git a/.dir-locals.el b/.dir-locals.el
new file mode 100644
index 0000000000..53e215e228
--- /dev/null
+++ b/.dir-locals.el
@@ -0,0 +1 @@
+((emacs-lisp-mode . ((emacs-lisp-docstring-fill-column . 72)))) ;; Emacs 31
diff --git a/README.org b/README.org
index 22434f51b7..8f539cbc01 100644
--- a/README.org
+++ b/README.org
@@ -1,3 +1,4 @@
+* el-job
Imagine you have a function you'd like to run on a long list of inputs. You
could run =(mapcar #'FN INPUTS)=, but that hangs Emacs until done.
@@ -8,4 +9,20 @@ This library gives you the tools to split up the inputs and
run the function in
For real-world usage, search for =el-job-launch= in the source of
[[https://github.com/meedstrom/org-node/blob/use-el-job/org-node.el][org-node.el]].
-For now, some limitations on FN's return value, which must always be a list of
a fixed length, where the elements are themselves lists.
+
+** Limitations
+
+1. Still in a development honeymoon, so argument names are not set in stone.
Check back often!
+
+2. For now, some limitations on FN's return value, which must always be a list
of a fixed length, where the elements are themselves lists. For example, at
the end of org-node-parser.el, it returns:
+
+ #+begin_src elisp
+ (list (if missing-file (list missing-file))
+ (if file-mtime (list file-mtime))
+ found-nodes ;; always a list
+ org-node-parser--paths-types ;; always a list
+ org-node-parser--found-links ;; always a list
+ (if problem (list problem))))
+ #+end_src
+
+ May seem clunky when you return lists of only one item, but at least it is
easy to extend.
diff --git a/el-job-child.el b/el-job-child.el
index 48ed4bd168..50d8cac8af 100644
--- a/el-job-child.el
+++ b/el-job-child.el
@@ -47,21 +47,17 @@ information to the final return value."
(setq item (pop items))
(setq start (time-convert nil t))
(setq output (funcall func item))
- ;; Afraid that looping `time-add' would be slower than summing floats.
Benchmark that?
- ;; Actually--
- ;; (push (float-time (time-since start)) meta)
- ;; now this is still (TIME . HZ):
(push (time-since start) meta)
(setq results (el-job-child--zip output results)))
;; Now the durations are in same order that ITEMS came in
(setq meta (nreverse meta))
;; This will be the very `car' of the metadata
(push (time-convert nil t) meta)
- (prin1 (cons meta results)
- nil
- ;; TODO: Consider print-circle to shrink data transmitted
- ;; TODO: Consider print-symbols-bare
- '((length) (level) (circle . t)))))
+ (let (print-length
+ print-level
+ (print-circle t)
+ (print-symbols-bare t))
+ (prin1 (cons meta results)))))
(provide 'el-job-child)
diff --git a/el-job.el b/el-job.el
index 0d0cb54f30..a7d7247e89 100644
--- a/el-job.el
+++ b/el-job.el
@@ -258,7 +258,7 @@ See `el-job-child--zip' for details."
;;; Main logic:
(defvar el-job--batches (make-hash-table :test #'eq))
-(cl-defstruct (el-job-batch (:constructor el-job-make-batch)
+(cl-defstruct (el-job-batch (:constructor el-job-batch-make)
(:copier nil)
(:conc-name el-job-))
lock
@@ -270,17 +270,19 @@ See `el-job-child--zip' for details."
(timestamps (list :accept-launch-request (time-convert nil t)))
(elapsed-table (make-hash-table :test #'equal)))
-(cl-defun el-job-launch (&key early-eval
- load
+;; TODO: How to share the same elapsed-table, without locking?
+(cl-defun el-job-launch (&key load
inject-vars
+ eval-once
funcall
inputs
wrapup
await-max
lock
- ;; use-file-handlers
- debug ;; TODO
- max-children)
+ max-children ;; will deprecate
+ ;; TODO
+ ;; use-file-handlers
+ debug)
"Run FUNCALL in one or more headless Elisp processes.
Then merge the return values \(lists of N lists) into one list
\(of N lists) and pass it to WRAPUP.
@@ -346,12 +348,14 @@ which you can get from this form:
\(el-job-timestamps JOB)
-LOCK is a symbol or integer (anything suitable for `eq')
-identifying this batch of jobs, and prevents launching another batch
-with the same LOCK if the previous batch has not completed.
+LOCK is a symbol identifying this batch of jobs, and prevents launching
+another batch with the same LOCK if the previous batch has not
+completed. It can also be a keyword or an integer below 536,870,911
+(suitable for `eq').
-EARLY-EVAL is a string containing a Lisp form. It is evaluated in the
-child before it loads anything else."
+EVAL-ONCE is a string containing a Lisp form. It is evaluated in the
+child just before FUNCALL, but only once, even though FUNCALL may be
+evaluated many times."
(unless (symbolp funcall)
(error "Argument :funcall only takes a symbol"))
(setq load (ensure-list load))
@@ -365,23 +369,18 @@ child before it loads anything else."
(if (seq-some #'process-live-p (el-job-processes batch))
(setq stop (message "el-job: Batch %s still at work"))
(mapc #'delete-process (el-job-processes batch))
- (setf (el-job-processes batch) nil)
- (setf (el-job-inputs batch) nil)
- (setf (el-job-results batch) nil)
+ (setf (el-job-processes batch) nil)
+ (setf (el-job-inputs batch) nil)
+ (setf (el-job-results batch) nil)
(setf (el-job-inhibit-wrapup batch) nil)
+ (setf (el-job-lock batch) lock)
(setf (el-job-timestamps batch)
(list :accept-launch-request (time-convert nil t))))
(setq batch
- (puthash lock
- (el-job-make-batch
- :lock lock
- :stderr (format " *el-job-%s:err*" lock))
+ (puthash lock (el-job-batch-make :lock lock)
el-job--batches)))
- ;; Anonymous batch needs buffer names that will never be reused.
- (setq lock (intern (format-time-string "%FT%H%M%S%N")))
- (setq batch (el-job-make-batch
- :lock lock
- :stderr (format " *el-job-%s:err*" lock))))
+ ;; TODO: Do not benchmark inputs for anonymous job
+ (setq batch (el-job-batch-make)))
(cond
(stop)
@@ -392,12 +391,21 @@ child before it loads anything else."
(debug)
(t
- (with-current-buffer (get-buffer-create (el-job-stderr batch) t)
- (erase-buffer))
+
(let* ((splits (el-job--split-optimally inputs
(or max-children el-job--cores)
(el-job-elapsed-table batch)))
(n (if splits (length splits) 1))
+ ;; Anonymous batch needs buffer names that will never be reused
+ (name (or lock (format-time-string "%FT%H%M%S%N")))
+ (shared-stderr
+ (setf (el-job-stderr batch)
+ (with-current-buffer
+ (get-buffer-create (format " *el-job-%s:err*" name) t)
+ (erase-buffer)
+ (current-buffer))))
+ print-length
+ print-level
(inject-vars-alist
(cons (cons 'current-time-list current-time-list)
;; TODO: Reuse allocated memory instead of building a new
@@ -406,6 +414,9 @@ child before it loads anything else."
for var in inject-vars
if (symbolp var) collect (cons var (symbol-value var))
else collect var)))
+ (inject-vars-expr (prin1-to-string
+ `(dolist (var ',inject-vars-alist)
+ (set (car var) (cdr var)))))
;; Ensure the working directory is not remote (messes things up)
(default-directory invocation-directory)
items proc)
@@ -413,14 +424,14 @@ child before it loads anything else."
(setq items (pop splits))
(setq proc
(make-process
- :name (format "el-job-%s:%d" lock i)
+ :name (format "el-job-%s:%d" name i)
:noquery t
;; Pipe is the fallback on environments that don't support
;; PTY, so I'll force pipe for now to reveal any footguns
:connection-type 'pipe
- :stderr (get-buffer (el-job-stderr batch))
+ :stderr shared-stderr
:buffer (with-current-buffer (get-buffer-create
- (format " *el-job-%s:%d*" lock
i)
+ (format " *el-job-%s:%d*" name
i)
t)
(erase-buffer)
(current-buffer))
@@ -430,23 +441,16 @@ child before it loads anything else."
(file-name-concat invocation-directory invocation-name)
"--quick"
"--batch")
- (if early-eval (list "--eval" early-eval))
(cl-loop
for file in (mapcar #'el-job--loaded-lib load)
nconc (list "--load" file))
- (if inject-vars
- (list "--eval"
- (prin1-to-string
- `(dolist (var ',inject-vars-alist)
- (set (car var) (cdr var)))
- nil
- '((length) (level)))))
+ (if inject-vars (list "--eval" inject-vars-expr))
+ (if eval-once (list "--eval" eval-once))
(list
"--load" (el-job--loaded-lib 'el-job-child)
"--eval" (format "(el-job-child--work #'%S '%s)"
funcall
- (prin1-to-string
- items nil '((length) (level))))))
+ (prin1-to-string items))))
:sentinel
(lambda (proc event)
(pcase event
@@ -492,21 +496,23 @@ the Nth call, then call function WRAPUP and pass it the
merged outputs."
(( error )
(error "Problems reading el-job child output: %S" err))
(:success
- (let ((durations (cdar output))
- (input (alist-get proc (el-job-inputs batch))))
+ (when (el-job-lock batch)
;; Record the time spent by FUNCALL on each item in
- ;; SPLITABLE-DATA. Big deal with `el-job--split-optimally'.
- (dolist (item input)
- (puthash item
- (pop durations)
- (el-job-elapsed-table batch))))
- ;; The `car' was just metadata we slipped in
+ ;; INPUTS, for next time with `el-job--split-optimally'.
+ (let ((durations (cdar output))
+ (input (alist-get proc (el-job-inputs batch))))
+ (dolist (item input)
+ (puthash item (pop durations) (el-job-elapsed-table batch)))))
+ ;; The `car' was just our own metadata
(push (cdr output) (el-job-results batch)))))
(when (= (length (el-job-results batch)) n)
- ;; We are in the last process sentinel, so this child's exit-timestamp
- ;; is the latest one
+ ;; We are in the last process sentinel
(plist-put (el-job-timestamps batch)
:children-done (caar output))
+ (unless (el-job-lock batch)
+ (kill-buffer (el-job-stderr batch))
+ (dolist (proc (el-job-processes batch))
+ (kill-buffer proc)))
;; Would be nice if we could timestamp the moment where we /begin/
;; accepting results, i.e. the first sentinel, but this may occur
;; before the last child has exited, so it would be confusing. At