branch: externals/llm
commit ef3c55e9c8b7fee127a21fe72e35adae4a7b00bd
Author: Andrew Hyatt <ahy...@gmail.com>
Commit: GitHub <nore...@github.com>

    Add the ability to return multiple outputs via a plist in llm calls (#160)
    
    This adds the `multi-ouput` optional argument to the llm interface. When
    set, the return value can contain different types of values: `text`, the
    normal textual output of the model, `tool-results`, the results of tool
    calls, `tool-uses` the uses determined by the LLM, from which we execute
    to get the results, and `reasoning`, the output for reasoning.
    
    This should enable us to solve https://github.com/ahyatt/llm/issues/143,
    but it may require more work with the r1 server and any other reasoning
    server. Other reasoning models didn't appear to have a different output
    stream, but that may change.
---
 NEWS.org                   |   2 +
 README.org                 |  16 +++-
 llm-claude.el              |   4 +-
 llm-fake.el                |  18 +++--
 llm-integration-test.el    |  71 ++++++++++++++++
 llm-ollama.el              |  45 +++++++++--
 llm-openai.el              |   7 +-
 llm-provider-utils-test.el |  10 +++
 llm-provider-utils.el      | 196 ++++++++++++++++++++++++++++++++-------------
 llm-vertex.el              |   7 +-
 llm.el                     |  69 ++++++++++------
 11 files changed, 341 insertions(+), 104 deletions(-)

diff --git a/NEWS.org b/NEWS.org
index f770f48287..bd882d71f0 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,3 +1,5 @@
+* Version 0.24.0
+- Add =multi-output= as an option, allowing all llm results to return, call, 
or stream multiple kinds of data via a plist.  This allows separating out 
reasoning, as well as optionally returning text as well as tool uses at the 
same time.
 * Version 0.23.1
 - Add Gemini 2.0 pro experimental model, default to 2.0 flash
 - Add Open AI's o3 mini model
diff --git a/README.org b/README.org
index 4a3014fd15..420d34bbd7 100644
--- a/README.org
+++ b/README.org
@@ -139,9 +139,9 @@ Client applications should require the =llm= package, and 
code against it.  Most
 
 For all callbacks, the callback will be executed in the buffer the function 
was first called from.  If the buffer has been killed, it will be executed in a 
temporary buffer instead.
 ** Main functions
-- ~llm-chat provider prompt~:  With user-chosen ~provider~ , and a 
~llm-chat-prompt~ structure (created by ~llm-make-chat-prompt~), send that 
prompt to the LLM and wait for the string output.
-- ~llm-chat-async provider prompt response-callback error-callback~: Same as 
~llm-chat~, but executes in the background.  Takes a ~response-callback~ which 
will be called with the text response.  The ~error-callback~ will be called in 
case of error, with the error symbol and an error message.
-- ~llm-chat-streaming provider prompt partial-callback response-callback 
error-callback~:  Similar to ~llm-chat-async~, but request a streaming 
response.  As the response is built up, ~partial-callback~ is called with the 
all the text retrieved up to the current point.  Finally, ~reponse-callback~ is 
called with the complete text.
+- ~llm-chat provider prompt multi-output~:  With user-chosen ~provider~ , and 
a ~llm-chat-prompt~ structure (created by ~llm-make-chat-prompt~), send that 
prompt to the LLM and wait for the string output.
+- ~llm-chat-async provider prompt response-callback error-callback 
multi-output~: Same as ~llm-chat~, but executes in the background.  Takes a 
~response-callback~ which will be called with the text response.  The 
~error-callback~ will be called in case of error, with the error symbol and an 
error message.
+- ~llm-chat-streaming provider prompt partial-callback response-callback 
error-callback multi-output~:  Similar to ~llm-chat-async~, but request a 
streaming response.  As the response is built up, ~partial-callback~ is called 
with the all the text retrieved up to the current point.  Finally, 
~reponse-callback~ is called with the complete text.
 - ~llm-embedding provider string~: With the user-chosen ~provider~, send a 
string and get an embedding, which is a large vector of floating point values.  
The embedding represents the semantic meaning of the string, and the vector can 
be compared against other vectors, where smaller distances between the vectors 
represent greater semantic similarity.
 - ~llm-embedding-async provider string vector-callback error-callback~: Same 
as ~llm-embedding~ but this is processed asynchronously. ~vector-callback~ is 
called with the vector embedding, and, in case of error, ~error-callback~ is 
called with the same arguments as in ~llm-chat-async~.
 - ~llm-batch-embedding provider strings~: same as ~llm-embedding~, but takes 
in a list of strings, and returns a list of vectors whose order corresponds to 
the ordering of the strings.
@@ -156,6 +156,16 @@ For all callbacks, the callback will be executed in the 
buffer the function was
   - ~llm-chat-prompt-to-text prompt~: From a prompt, return a string 
representation.  This is not usually suitable for passing to LLMs, but for 
debugging purposes.
   - ~llm-chat-streaming-to-point provider prompt buffer point 
finish-callback~: Same basic arguments as ~llm-chat-streaming~, but will stream 
to ~point~ in ~buffer~.
   - ~llm-chat-prompt-append-response prompt response role~: Append a new 
response (from the user, usually) to the prompt.  The ~role~ is optional, and 
defaults to ~'user~.
+*** Return and multi-output
+The default return value is text except for when tools are called, in which 
case it is a record of the return values of the tools called.
+
+Models can potentially return many types of information, though, so the 
~multi-output~ option was added to the ~llm-chat~ calls so that the single 
return value can instead be a plist that represents the various possible 
values.  In the case of ~llm-chat~, this plist is returned, in 
~llm-chat-async~, it is passed to the success function.  In 
~llm-chat-streaming~, it is passed to the success function, and each partial 
update will be a plist, with no guarantee that the same keys will always [...]
+
+The possible plist keys are:
+   - ~:text~ , for the main textual output.
+   - ~:reasoning~, for reasoning output, when the model separates it.
+   - ~:tool-uses~, the tools that the llm identified to be called, as a list 
of plists, with ~:name~ and ~:args~ values.
+   - ~:tool-results~, the results of calling the tools.
 *** JSON schema
 By using the ~response-format~ argument to ~llm-make-chat-prompt~, you can ask 
the LLM to return items according to a specified JSON schema, based on the 
[[https://json-schema.org][JSON Schema Spec]].  Not everything is supported, 
but the most commonly used parts are.  To specify the JSON schema, we use a 
plist-based approach.  JSON objects are defined with ~(:type object :properties 
(:<var1> <schema1> :<var2> <schema2> ... :<varn> <scheman>) :required (<req 
var1> ... <req varn>))~.  Arr [...]
 
diff --git a/llm-claude.el b/llm-claude.el
index b708c1f9a0..0ea71a3efe 100644
--- a/llm-claude.el
+++ b/llm-claude.el
@@ -135,7 +135,7 @@
       (format "Unsupported non-text response: %s" content))))
 
 (cl-defmethod llm-provider-streaming-media-handler ((_ llm-claude)
-                                                    msg-receiver _ 
err-receiver)
+                                                    receiver err-receiver)
   (cons 'text/event-stream
         (plz-event-source:text/event-stream
          :events `((message_start . ignore)
@@ -153,7 +153,7 @@
                               (delta (assoc-default 'delta json))
                               (type (assoc-default 'type delta)))
                          (when (equal type "text_delta")
-                           (funcall msg-receiver (assoc-default 'text 
delta))))))))))
+                           (funcall receiver `(:text ,(assoc-default 'text 
delta)))))))))))
 
 (cl-defmethod llm-provider-collect-streaming-tool-uses ((_ llm-claude) data)
   (llm-provider-utils-openai-collect-streaming-tool-uses data))
diff --git a/llm-fake.el b/llm-fake.el
index fa2ea3e15f..0c81319da1 100644
--- a/llm-fake.el
+++ b/llm-fake.el
@@ -46,13 +46,15 @@ either a vector response for the chat, or a signal symbol 
and
 message cons.  If nil, the response will be a simple vector."
   output-to-buffer chat-action-func embedding-action-func)
 
-(cl-defmethod llm-chat-async ((provider llm-fake) prompt response-callback 
error-callback)
+(cl-defmethod llm-chat-async ((provider llm-fake) prompt response-callback 
error-callback &optional multi-output)
   (condition-case err
-      (funcall response-callback (llm-chat provider prompt))
+      ;; We use `apply' here in case `llm-chat is older and doesn't support
+      ;; the multi-output argument.
+      (funcall response-callback (apply #'llm-chat provider prompt 
multi-output))
     (t (funcall error-callback (car err) (cdr err))))
   nil)
 
-(cl-defmethod llm-chat ((provider llm-fake) prompt)
+(cl-defmethod llm-chat ((provider llm-fake) prompt &optional multi-output)
   (when (llm-fake-output-to-buffer provider)
     (with-current-buffer (get-buffer-create (llm-fake-output-to-buffer 
provider))
       (goto-char (point-max))
@@ -69,9 +71,11 @@ message cons.  If nil, the response will be a simple vector."
     (setf (llm-chat-prompt-interactions prompt)
           (append (llm-chat-prompt-interactions prompt)
                   (list (make-llm-chat-prompt-interaction :role 'assistant 
:content result))))
-    result))
+    (if multi-output
+        `(:text ,result)
+      result)))
 
-(cl-defmethod llm-chat-streaming ((provider llm-fake) prompt partial-callback 
response-callback _error-callback)
+(cl-defmethod llm-chat-streaming ((provider llm-fake) prompt partial-callback 
response-callback _error-callback &optional multi-output)
   (when (llm-fake-output-to-buffer provider)
     (with-current-buffer (get-buffer-create (llm-fake-output-to-buffer 
provider))
       (goto-char (point-max))
@@ -87,13 +91,13 @@ message cons.  If nil, the response will be a simple 
vector."
     (let ((accum ""))
       (mapc (lambda (word)
               (setq accum (concat accum word " "))
-              (funcall partial-callback accum)
+              (funcall partial-callback (if multi-output `(:text ,accum) 
accum))
               (sleep-for 0 100))
             (split-string text))
       (setf (llm-chat-prompt-interactions prompt)
             (append (llm-chat-prompt-interactions prompt)
                     (list (make-llm-chat-prompt-interaction :role 'assistant 
:content text))))
-      (funcall response-callback text))))
+      (funcall response-callback (if multi-output `(:text ,text) text)))))
 
 (cl-defmethod llm-embedding ((provider llm-fake) string)
   (when (llm-fake-output-to-buffer provider)
diff --git a/llm-integration-test.el b/llm-integration-test.el
index 3fe8470473..71ee008e3b 100644
--- a/llm-integration-test.el
+++ b/llm-integration-test.el
@@ -213,6 +213,15 @@ else.  We really just want to see if it's in the right 
ballpark."
                          (llm-make-chat-prompt 
llm-integration-test-chat-prompt)))
            llm-integration-test-chat-answer)))
 
+(llm-def-integration-test llm-chat-multi-output (provider)
+  (should (equal
+           (string-trim (plist-get
+                         (llm-chat
+                          provider
+                          (llm-make-chat-prompt 
llm-integration-test-chat-prompt)
+                          t) :text))
+           llm-integration-test-chat-answer)))
+
 (llm-def-integration-test llm-chat-async (provider)
   (let ((result nil)
         (buf (current-buffer))
@@ -231,6 +240,26 @@ else.  We really just want to see if it's in the right 
ballpark."
     (if err-result (error err-result))
     (should (llm-integration-test-string-eq llm-integration-test-chat-answer 
(string-trim result)))))
 
+(llm-def-integration-test llm-chat-async-multi-output (provider)
+  (let ((result nil)
+        (buf (current-buffer))
+        (llm-warn-on-nonfree nil)
+        (err-result nil))
+    (llm-chat-async
+     provider
+     (llm-make-chat-prompt llm-integration-test-chat-prompt)
+     (lambda (response)
+       (should (or (not (buffer-live-p buf)) (eq (current-buffer) buf)))
+       (setq result response))
+     (lambda (_ err)
+       (setq err-result err))
+     t)
+    (while (not (or result err-result))
+      (sleep-for 0.1))
+    (if err-result (error err-result))
+    (should (plist-get result :text))
+    (should (llm-integration-test-string-eq llm-integration-test-chat-answer 
(string-trim (plist-get result :text))))))
+
 (llm-def-integration-test llm-chat-streaming (provider)
   (when (member 'streaming (llm-capabilities provider))
     (let ((streamed-result "")
@@ -259,6 +288,35 @@ else.  We really just want to see if it's in the right 
ballpark."
       (should (llm-integration-test-string-eq llm-integration-test-chat-answer 
(string-trim returned-result)))
       (should (llm-integration-test-string-eq llm-integration-test-chat-answer 
(string-trim streamed-result))))))
 
+(llm-def-integration-test llm-chat-streaming-multi-output (provider)
+  (when (member 'streaming (llm-capabilities provider))
+    (let ((streamed-result "")
+          (returned-result nil)
+          (llm-warn-on-nonfree nil)
+          (buf (current-buffer))
+          (start-time (current-time))
+          (err-result nil))
+      (llm-chat-streaming
+       provider
+       (llm-make-chat-prompt llm-integration-test-chat-prompt)
+       (lambda (partial-response)
+         (should (or (not (buffer-live-p buf)) (eq (current-buffer) buf)))
+         (setq streamed-result partial-response))
+       (lambda (response)
+         (should (or (not (buffer-live-p buf)) (eq (current-buffer) buf)))
+         (setq returned-result response))
+       (lambda (_ err)
+         (setq err-result err))
+       t)
+      (while (and (or (null returned-result)
+                      (= (length streamed-result) 0))
+                  (null err-result)
+                  (time-less-p (time-subtract (current-time) start-time) 60))
+        (sleep-for 0.1))
+      (if err-result (error err-result))
+      (should (llm-integration-test-string-eq llm-integration-test-chat-answer 
(string-trim (plist-get returned-result :text))))
+      (should (llm-integration-test-string-eq llm-integration-test-chat-answer 
(string-trim (plist-get streamed-result :text)))))))
+
 (llm-def-integration-test llm-tool-use (provider)
   (when (member 'function-calls (llm-capabilities provider))
     (let ((prompt (llm-integration-test-tool-use-prompt)))
@@ -268,6 +326,19 @@ else.  We really just want to see if it's in the right 
ballpark."
       ;; Test that we can send the function back to the provider without error.
       (llm-chat provider prompt))))
 
+(llm-def-integration-test llm-tool-use-multi-output (provider)
+  (when (member 'function-calls (llm-capabilities provider))
+    (let* ((prompt (llm-integration-test-tool-use-prompt))
+           (result (llm-chat provider prompt t)))
+      (should (equal
+               (plist-get result :tool-results)
+               llm-integration-test-fc-answer))
+      (should (plist-get result :tool-uses))
+      (if (plist-get result :text)
+          (should (> (length (plist-get result :text)) 0)))
+      ;; Test that we can send the function back to the provider without error.
+      (llm-chat provider prompt t))))
+
 (llm-def-integration-test llm-tool-use-multiple (provider)
   (when (member 'function-calls (llm-capabilities provider))
     (let ((prompt (llm-integration-test-fc-multiple-prompt)))
diff --git a/llm-ollama.el b/llm-ollama.el
index 2103e4487a..fa932f4f72 100644
--- a/llm-ollama.el
+++ b/llm-ollama.el
@@ -106,7 +106,28 @@ PROVIDER is the llm-ollama provider."
 
 (cl-defmethod llm-provider-chat-extract-result ((_ llm-ollama) response)
   "Return the chat response from the server RESPONSE."
-  (assoc-default 'content (assoc-default 'message response)))
+  (let ((raw-result (assoc-default 'content (assoc-default 'message 
response))))
+    ;; The raw result may have reasoning content in, which is in <think> tags
+    ;; (for DeepSeek reasoning).  We want to strip that out.
+    (with-temp-buffer
+      (insert raw-result)
+      (goto-char 0)
+      (if (search-forward "\n</think>" nil t)
+          (string-trim (buffer-substring (point) (point-max)))
+        raw-result))))
+
+(cl-defmethod llm-provider-extract-reasoning ((_ llm-ollama) response)
+  (let ((raw-result (assoc-default 'content (assoc-default 'message 
response))))
+    ;; Reasoning content is in <think> tags (for DeepSeek reasoning).  We want 
to
+    ;; extract the content between these tags.
+    (with-temp-buffer
+      (insert raw-result)
+      (goto-char 0)
+      (when (search-forward "<think>\n" nil t)
+        (let* ((endtag "\n</think>")
+               (end (save-excursion
+                      (search-forward endtag))))
+          (buffer-substring (point) (- end (length endtag))))))))
 
 (defun llm-ollama--response-format (format)
   "Return the response format for FORMAT."
@@ -180,14 +201,24 @@ PROVIDER is the llm-ollama provider."
                                                       
(llm-provider-utils-tool-use-args tool-use)))))
                     tool-uses))))
 
-(cl-defmethod llm-provider-streaming-media-handler ((_ llm-ollama) 
msg-receiver _ _)
+(cl-defmethod llm-provider-streaming-media-handler ((_ llm-ollama) receiver _)
   (cons 'application/x-ndjson
         (plz-media-type:application/x-ndjson
-         :handler (lambda (data)
-                    (when-let ((response (assoc-default
-                                          'content
-                                          (assoc-default 'message data))))
-                      (funcall msg-receiver response))))))
+         :handler (let ((in-reasoning))
+                    (lambda (data)
+                      (when-let ((response (assoc-default
+                                            'content
+                                            (assoc-default 'message data))))
+                        ;; The response from ollama should just have the tag 
and
+                        ;; nothing more.
+                        (cond
+                         ((string-match "<think>" response)
+                          (setq in-reasoning t))
+                         ((string-match "</think>" response)
+                          (setq in-reasoning nil))
+                         (t (funcall receiver (list (if in-reasoning
+                                                        :reasoning
+                                                      :text) response))))))))))
 
 (cl-defmethod llm-name ((provider llm-ollama))
   (or (llm-ollama-chat-model provider)
diff --git a/llm-openai.el b/llm-openai.el
index a0d37b519e..d67f59434e 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -338,7 +338,7 @@ RESPONSE can be nil if the response is complete."
                                  (assoc-default 'tool_calls delta)))))
       content-or-call)))
 
-(cl-defmethod llm-provider-streaming-media-handler ((_ llm-openai) 
msg-receiver fc-receiver _)
+(cl-defmethod llm-provider-streaming-media-handler ((_ llm-openai) receiver _)
   (cons 'text/event-stream
         (plz-event-source:text/event-stream
          :events `((message
@@ -348,7 +348,10 @@ RESPONSE can be nil if the response is complete."
                          (unless (equal data "[DONE]")
                            (when-let ((response 
(llm-openai--get-partial-chat-response
                                                  (json-parse-string data 
:object-type 'alist))))
-                             (funcall (if (stringp response) msg-receiver 
fc-receiver) response))))))))))
+                             (funcall receiver (if (stringp response)
+                                                   (list :text response)
+                                                 (list :tool-uses-raw
+                                                       response))))))))))))
 
 (cl-defmethod llm-provider-collect-streaming-tool-uses ((_ llm-openai) data)
   (llm-provider-utils-openai-collect-streaming-tool-uses data))
diff --git a/llm-provider-utils-test.el b/llm-provider-utils-test.el
index a77060239c..06189ac8a6 100644
--- a/llm-provider-utils-test.el
+++ b/llm-provider-utils-test.el
@@ -138,5 +138,15 @@
     (should (equal "Previous interactions:\n\nUser: Hello\nAssistant: Hi! How 
can I assist you?\n\nThe current conversation follows:\n\nEarl Grey, hot."
                    (llm-chat-prompt-interaction-content (nth 0 
(llm-chat-prompt-interactions prompt-for-second-request)))))))
 
+(ert-deftest llm-provider-utils-streaming-accumulate ()
+  (should (equal 3 (llm-provider-utils-streaming-accumulate 1 2)))
+  (should (equal "foobar" (llm-provider-utils-streaming-accumulate "foo" 
"bar")))
+  (should (equal [1 2 3] (llm-provider-utils-streaming-accumulate [1] [2 3])))
+  (should (equal '(1 2 3) (llm-provider-utils-streaming-accumulate '(1) '(2 
3))))
+  (should (equal (llm-test-normalize '(:foo "aa" :bar "b" :baz "c"))
+                 (llm-test-normalize (llm-provider-utils-streaming-accumulate 
'(:foo "a" :baz "c") '(:foo "a" :bar "b")))))
+  (should (equal '(:foo 3) (llm-provider-utils-streaming-accumulate '(:foo 1) 
'(:foo 2))))
+  (should (equal '(:foo "foo bar baz") 
(llm-provider-utils-streaming-accumulate '(:foo "foo bar") '(:foo " baz")))))
+
 (provide 'llm-provider-utils-test)
 ;;; llm-provider-utils-test.el ends here
diff --git a/llm-provider-utils.el b/llm-provider-utils.el
index 1822bbdb23..fafd24c0da 100644
--- a/llm-provider-utils.el
+++ b/llm-provider-utils.el
@@ -186,19 +186,18 @@ TOOL-RESULTS is a list of function results, if any.")
   ;; By default, we just append to the prompt.
   (llm-provider-utils-append-to-prompt prompt result tool-results))
 
-(cl-defgeneric llm-provider-streaming-media-handler (provider msg-receiver 
fc-receiver err-receiver)
+(cl-defgeneric llm-provider-streaming-media-handler (provider receiver 
err-receiver)
   "Define how to handle streaming media for the PROVIDER.
 
 This should return a cons of the media type and an instance that
 handle objects of that type.
 
-The handlers defined can call MSG-RECEIVER when they receive part
-of a text message for the client (a chat response).  If they
-receive a function call, they should call FC-RECEIVER with the
-function call.  If they receive an error, they should call
-ERR-RECEIVER with the error message.")
+The handlers defined can call RECEIVER with a plist compatible with the
+output of the llm functions returned when `multi-output' is set.  If
+they receive an error, they should call ERR-RECEIVER with the error
+message.")
 
-(cl-defmethod llm-provider-streaming-media-handler ((_ 
llm-standard-chat-provider) _ _ _)
+(cl-defmethod llm-provider-streaming-media-handler ((_ 
llm-standard-chat-provider) _ _)
   "By default, the standard provider has no streaming media handler."
   nil)
 
@@ -214,6 +213,13 @@ list of `llm-provider-utils-tool-use'.")
   "By default, the standard provider has no function call extractor."
   nil)
 
+(cl-defgeneric llm-provider-extract-reasoning (provider response)
+  "Return the reasoning from RESPONSE for the PROVIDER.")
+
+(cl-defmethod llm-provider-extract-reasoning ((_ llm-standard-chat-provider) _)
+  "By default, the standard provider has no reasoning extractor."
+  nil)
+
 (cl-defgeneric llm-provider-populate-tool-uses (provider prompt tool-uses)
   "For PROVIDER, in PROMPT, record TOOL-USES.
 This is the recording before the function calls were executed, in the prompt.
@@ -304,7 +310,19 @@ return a list of `llm-chat-prompt-tool-use' structs.")
                          provider data)
                         "Unknown error")))))))
 
-(cl-defmethod llm-chat ((provider llm-standard-chat-provider) prompt)
+(defun llm-provider-utils-extract-all (provider response)
+  "Extract all from RESPONSE for the PROVIDER."
+  (let ((text
+         (llm-provider-chat-extract-result provider response))
+        (tool-uses (llm-provider-extract-tool-uses
+                    provider response))
+        (reasoning (llm-provider-extract-reasoning
+                    provider response)))
+    (append (when text `(:text ,text))
+            (when tool-uses `(:tool-uses ,tool-uses))
+            (when reasoning `(:reasoning ,reasoning)))))
+
+(cl-defmethod llm-chat ((provider llm-standard-chat-provider) prompt &optional 
multi-output)
   (llm-provider-request-prelude provider)
   (let ((response (llm-request-plz-sync (llm-provider-chat-url provider)
                                         :headers (llm-provider-headers 
provider)
@@ -313,10 +331,9 @@ return a list of `llm-chat-prompt-tool-use' structs.")
     (if-let ((err-msg (llm-provider-chat-extract-error provider response)))
         (error err-msg)
       (llm-provider-utils-process-result provider prompt
-                                         (llm-provider-chat-extract-result
-                                          provider response)
-                                         (llm-provider-extract-tool-uses
+                                         (llm-provider-utils-extract-all
                                           provider response)
+                                         multi-output
                                          (lambda (result)
                                            (setq final-result result))))
     ;; In most cases, final-result will be available immediately.  However, 
when
@@ -327,7 +344,7 @@ return a list of `llm-chat-prompt-tool-use' structs.")
     final-result))
 
 (cl-defmethod llm-chat-async ((provider llm-standard-chat-provider) prompt 
success-callback
-                              error-callback)
+                              error-callback &optional multi-output)
   (llm-provider-request-prelude provider)
   (let ((buf (current-buffer)))
     (llm-request-plz-async
@@ -341,8 +358,8 @@ return a list of `llm-chat-prompt-tool-use' structs.")
                         err-msg)
                      (llm-provider-utils-process-result
                       provider prompt
-                      (llm-provider-chat-extract-result provider data)
-                      (llm-provider-extract-tool-uses provider data)
+                      (llm-provider-utils-extract-all provider data)
+                      multi-output
                       (lambda (result)
                         (llm-provider-utils-callback-in-buffer
                          buf success-callback result)))))
@@ -355,12 +372,47 @@ return a list of `llm-chat-prompt-tool-use' structs.")
                          provider data)
                         "Unknown error")))))))
 
+(defun llm-provider-utils-streaming-accumulate (current new)
+  "Add streaming NEW to CURRENT and return the result.
+
+This is designed to accumulate responses for streaming results.  It
+assumes that CURRENT and NEW are the same type of thing..
+
+This will work with text as well as the plists that are returned when
+`multi-output' is on.
+
+Any strings will be concatenated, integers will be added, etc."
+  (if current
+      (if new
+          (progn
+            (unless (eq (type-of current) (type-of new))
+              (error "Cannot accumulate different types of streaming results: 
%s and %s"
+                     current new))
+            (pcase (type-of current)
+              ('string (concat current new))
+              ('integer (+ current new))
+              ('float (+ current new))
+              ('vector (vconcat current new))
+              ('cons (if (and (> (length current) 0)  ;; if plist
+                              (symbolp (car current))
+                              (string-match-p "^:" (symbol-name (car 
current))))
+                         (cl-loop for key in
+                                  (seq-union (map-keys current)
+                                             (map-keys new))
+                                  append
+                                  (list key
+                                        
(llm-provider-utils-streaming-accumulate
+                                         (plist-get current key)
+                                         (plist-get new key))))
+                       (append current new)))))
+        current)
+    new))
+
 (cl-defmethod llm-chat-streaming ((provider llm-standard-chat-provider) prompt 
partial-callback
-                                  response-callback error-callback)
+                                  response-callback error-callback &optional 
multi-output)
   (llm-provider-request-prelude provider)
   (let ((buf (current-buffer))
-        (current-text "")
-        (fc nil))
+        (current-result))
     (llm-request-plz-async
      (llm-provider-chat-streaming-url provider)
      :headers (llm-provider-headers provider)
@@ -368,13 +420,13 @@ return a list of `llm-chat-prompt-tool-use' structs.")
      :media-type (llm-provider-streaming-media-handler
                   provider
                   (lambda (s)
-                    (when (> (length s) 0)
-                      (setq current-text
-                            (concat current-text s))
-                      (when partial-callback
-                        (llm-provider-utils-callback-in-buffer
-                         buf partial-callback current-text))))
-                  (lambda (fc-new) (push fc-new fc))
+                    (setq current-result
+                          (llm-provider-utils-streaming-accumulate 
current-result s))
+                    (when partial-callback
+                      (llm-provider-utils-callback-in-buffer
+                       buf partial-callback (if multi-output
+                                                current-result
+                                              (plist-get current-result 
:text)))))
                   (lambda (err)
                     (llm-provider-utils-callback-in-buffer
                      buf error-callback 'error
@@ -384,10 +436,13 @@ return a list of `llm-chat-prompt-tool-use' structs.")
        ;; We don't need the data at the end of streaming, so we can ignore it.
        (llm-provider-utils-process-result
         provider prompt
-        current-text
-        (when fc
-          (llm-provider-collect-streaming-tool-uses
-           provider (nreverse fc)))
+        (llm-provider-utils-streaming-accumulate
+         current-result
+         (when-let ((tool-uses-raw (plist-get current-result
+                                              :tool-uses-raw)))
+           `(:tool-uses ,(llm-provider-collect-streaming-tool-uses
+                          provider tool-uses-raw))))
+        multi-output
         (lambda (result)
           (llm-provider-utils-callback-in-buffer
            buf response-callback result))))
@@ -613,24 +668,23 @@ This returns a JSON object (a list that can be converted 
to JSON)."
 
 (defun llm-provider-utils-openai-collect-streaming-tool-uses (data)
   "Read Open AI compatible streaming output DATA to collect tool-uses."
-  (let* ((num-index (+ 1 (assoc-default 'index (aref (car (last data)) 0))))
+  (let* ((num-index (+ 1 (assoc-default 'index (aref data 0))))
          (cvec (make-vector num-index nil)))
     (dotimes (i num-index)
       (setf (aref cvec i) (make-llm-provider-utils-tool-use)))
-    (cl-loop for part in data do
-             (cl-loop for call in (append part nil) do
-                      (let* ((index (assoc-default 'index call))
-                             (id (assoc-default 'id call))
-                             (function (assoc-default 'function call))
-                             (name (assoc-default 'name function))
-                             (arguments (assoc-default 'arguments function)))
-                        (when id
-                          (setf (llm-provider-utils-tool-use-id (aref cvec 
index)) id))
-                        (when name
-                          (setf (llm-provider-utils-tool-use-name (aref cvec 
index)) name))
-                        (setf (llm-provider-utils-tool-use-args (aref cvec 
index))
-                              (concat (llm-provider-utils-tool-use-args (aref 
cvec index))
-                                      arguments)))))
+    (cl-loop for call in (append data nil) do
+             (let* ((index (assoc-default 'index call))
+                    (id (assoc-default 'id call))
+                    (function (assoc-default 'function call))
+                    (name (assoc-default 'name function))
+                    (arguments (assoc-default 'arguments function)))
+               (when id
+                 (setf (llm-provider-utils-tool-use-id (aref cvec index)) id))
+               (when name
+                 (setf (llm-provider-utils-tool-use-name (aref cvec index)) 
name))
+               (setf (llm-provider-utils-tool-use-args (aref cvec index))
+                     (concat (llm-provider-utils-tool-use-args (aref cvec 
index))
+                             arguments))))
     (cl-loop for call in (append cvec nil)
              do (setf (llm-provider-utils-tool-use-args call)
                       (json-parse-string (llm-provider-utils-tool-use-args 
call)
@@ -661,7 +715,7 @@ ROLE will be `assistant' by default, but can be passed in 
for other roles."
                                   (format "%s" output))
                        :tool-results tool-results)))))
 
-(defun llm-provider-utils-process-result (provider prompt text tool-uses 
success-callback)
+(defun llm-provider-utils-process-result (provider prompt partial-result 
multi-output success-callback)
   "Process the RESPONSE from the provider for PROMPT.
 This execute function calls if there are any, does any result
 appending to the prompt, and returns an appropriate response for
@@ -671,22 +725,25 @@ PROVIDER is the struct that configures the use of the LLM.
 
 TOOL-USES is a list of tool uses in the result.
 
-TEXT is the text output from the provider, if any.  There should
-be either FUNCALLS or TEXT.
+PARTIAL-RESULT is the multipart result, without any tool results.
+
+MULTI-OUTPUT is true if multiple outputs are expected to be passed to
+SUCCESS-CALLBACK.
 
 SUCCESS-CALLBACK is the callback that will be run when all functions
 complete."
-  (if tool-uses
+  (when (plist-get partial-result :text)
+    (llm-provider-append-to-prompt provider prompt (plist-get partial-result 
:text)))
+  (if-let ((tool-uses (plist-get partial-result :tool-uses)))
       ;; If we have tool uses, execute them, and on the callback, we will
       ;; populate the results.  We don't execute the callback here because it
       ;; will be done inside `llm-provider-utils-execute-tool-uses'.
       (llm-provider-utils-execute-tool-uses
-       provider prompt tool-uses success-callback)
-    ;; We probably shouldn't be called if text is nil, but if we do,
-    ;; we shouldn't add something invalid to the prompt.
-    (when text
-      (llm-provider-append-to-prompt provider prompt text))
-    (funcall success-callback text)))
+       provider prompt tool-uses multi-output
+       partial-result success-callback)
+    (funcall success-callback
+             (if multi-output partial-result
+               (plist-get partial-result :text)))))
 
 (defun llm-provider-utils-populate-tool-uses (provider prompt results-alist)
   "Append the results in RESULTS-ALIST to the prompt.
@@ -706,7 +763,25 @@ PROVIDER is the struct that configures the user of the 
LLM."
                         :result (cdr c)))
            results-alist)))
 
-(defun llm-provider-utils-execute-tool-uses (provider prompt tool-uses 
success-callback)
+(defun llm-provider-utils-final-multi-output-result (tool-results)
+  "Return the final result from TOOL-RESULTS.
+
+This transforms the plist so that:
+1. We don't return an empty :text value.
+2. We transform the :tool-uses to an alist of tool name to use."
+  (cl-loop for (key value) on tool-results
+           by 'cddr
+           if (and (not (and (eq key :text) (equal value "")))
+                   (member key '(:text :tool-uses :tool-results)))
+           nconc (list key
+                       (if (eq key :tool-uses)
+                           (mapcar (lambda (tool-use)
+                                     `(:name 
,(llm-provider-utils-tool-use-name tool-use)
+                                             :args 
,(llm-provider-utils-tool-use-args tool-use)))
+                                   value)
+                         value))))
+
+(defun llm-provider-utils-execute-tool-uses (provider prompt tool-uses 
multi-output partial-result success-callback)
   "Execute TOOL-USES, a list of `llm-provider-utils-tool-use'.
 
 A response suitable for returning to the client will be returned.
@@ -717,6 +792,12 @@ PROMPT was the prompt given to the provider, which will get
 updated with the response from the LLM, and if there is a
 function call, the result.
 
+MULTI-OUTPUT is true if multiple outputs are expected to be passed to
+SUCCESS-CALLBACK.
+
+PARTIAL-RESULT is the result to return to the user, without the tool
+call results.
+
 SUCCESS-CALLBACK is the callback that will be run when all functions
 have returned results."
   (llm-provider-populate-tool-uses provider prompt tool-uses)
@@ -745,7 +826,12 @@ have returned results."
                         (when (= (length results) (length tool-uses))
                           (llm-provider-utils-populate-tool-uses
                            provider prompt results)
-                          (funcall success-callback tool-use-and-results)))))
+                          (funcall success-callback
+                                   (if multi-output
+                                       
(llm-provider-utils-final-multi-output-result
+                                        (append partial-result
+                                                `(:tool-results 
,tool-use-and-results)))
+                                     tool-use-and-results))))))
        (if (llm-tool-async tool)
            (apply (llm-tool-function tool)
                   (append (list end-func) call-args))
diff --git a/llm-vertex.el b/llm-vertex.el
index 99b3338182..ca8d523147 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -258,8 +258,7 @@ nothing to add, in which case it is nil."
   (llm-provider-utils-append-to-prompt prompt tool-uses nil 'assistant))
 
 (cl-defmethod llm-provider-streaming-media-handler ((provider llm-google)
-                                                    msg-receiver fc-receiver
-                                                    err-receiver)
+                                                    receiver err-receiver)
   (cons 'application/json
         (plz-media-type:application/json-array
          :handler
@@ -267,9 +266,9 @@ nothing to add, in which case it is nil."
            (when-let ((err-response (llm-provider-chat-extract-error provider 
element)))
              (funcall err-receiver err-response))
            (if-let ((response (llm-provider-chat-extract-result provider 
element)))
-               (funcall msg-receiver response)
+               (funcall receiver `(:text ,response))
              (when-let ((fc (llm-provider-extract-tool-uses provider element)))
-               (funcall fc-receiver fc)))))))
+               (funcall receiver `(:tool-call ,fc))))))))
 
 (cl-defmethod llm-provider-collect-streaming-tool-uses ((_ llm-google) data)
   (car data))
diff --git a/llm.el b/llm.el
index c8f87467ea..324a47508e 100644
--- a/llm.el
+++ b/llm.el
@@ -342,7 +342,7 @@ need to override it."
   (ignore provider)
   nil)
 
-(cl-defgeneric llm-chat (provider prompt)
+(cl-defgeneric llm-chat (provider prompt &optional multi-output)
   "Return a response to PROMPT from PROVIDER.
 PROMPT is a `llm-chat-prompt'.
 
@@ -352,20 +352,25 @@ conses of the function named called (as a symbol), and the
 corresponding result from calling it.
 
 The prompt's interactions list will be updated to encode the
-conversation so far."
-  (ignore provider prompt)
+conversation so far.
+
+If MULTI-OUTPUT is non-nil the response is a plist with the possible
+keys: `text' (textual output), `reasoning' (reasoning that accompanies
+the output) `tool-uses' (a list of plists with tool `:name' and
+`:args'), and `tool-results' (an alist of results of a calling tools)"
+  (ignore provider prompt multi-output)
   (signal 'not-implemented nil))
 
-(cl-defmethod llm-chat ((_ (eql nil)) _)
+(cl-defmethod llm-chat ((_ (eql nil)) _ &optional _)
   "Catch trivial configuration mistake."
   (error "LLM provider was nil.  Please set the provider in the application 
you are using"))
 
-(cl-defmethod llm-chat :before (provider _)
+(cl-defmethod llm-chat :before (provider _ &optional _)
   "Issue a warning if the LLM is non-free."
   (when-let (info (llm-nonfree-message-info provider))
     (llm--warn-on-nonfree (llm-name provider) info)))
 
-(cl-defmethod llm-chat :around (provider prompt)
+(cl-defmethod llm-chat :around (provider prompt &optional _)
   "Log the input to llm-chat."
   (llm--log 'api-send :provider provider :prompt prompt)
   ;; We set the debug flag to nil around the next-method so that we don't log
@@ -378,7 +383,7 @@ conversation so far."
       (llm--log 'api-receive :provider provider :msg result))
     result))
 
-(cl-defgeneric llm-chat-async (provider prompt response-callback 
error-callback)
+(cl-defgeneric llm-chat-async (provider prompt response-callback 
error-callback &optional multi-output)
   "Call RESPONSE-CALLBACK with a response to PROMPT from PROVIDER.
 
 The response is a string response by the LLM when functions are
@@ -392,6 +397,11 @@ RESPONSE-CALLBACK receives the final text.
 
 ERROR-CALLBACK receives the error response.
 
+If MULTI-OUTPUT is non-nil the response is a plist with the possible
+keys: `text' (textual output), `reasoning' (reasoning that accompanies
+the output) `tool-uses' (a list of plists with tool `:name' and
+`:args'), and `tool-results' (an alist of results of a calling tools)
+
 The prompt's interactions list will be updated to encode the
 conversation so far.
 
@@ -400,14 +410,19 @@ be passed to `llm-cancel-request'."
   ;; By default, you can turn a streaming call into an async call, so we can
   ;; fall back to streaming if async is not populated.
   ;; However, first, we don't want to log twice, so let's delete the last log 
so that llm-chat-streaming will
-  (llm-chat-streaming provider prompt
-                      ;; Do nothing on partial callback
-                      nil
-                      (lambda (text)
-                        (funcall response-callback text))
-                      (lambda (err msg) (funcall error-callback err msg))))
-
-(cl-defmethod llm-chat-async :around (provider prompt response-callback 
error-callback)
+  ;;
+  ;; We use `apply' here in case `llm-chat-streaming' is older and doesn't
+  ;; support the multi-output argument.
+  (apply #'llm-chat-streaming
+         provider prompt
+         ;; Do nothing on partial callback
+         nil
+         (lambda (text)
+           (funcall response-callback text))
+         (lambda (err msg) (funcall error-callback err msg))
+         multi-output))
+
+(cl-defmethod llm-chat-async :around (provider prompt response-callback 
error-callback &optional multi-output)
   "Log the input to llm-chat-async."
   (llm--log 'api-send :provider provider :prompt prompt)
   (let* ((new-response-callback (lambda (response)
@@ -422,10 +437,11 @@ be passed to `llm-cancel-request'."
          (llm-log nil)
          (result (cl-call-next-method provider prompt
                                       new-response-callback
-                                      new-error-callback)))
+                                      new-error-callback
+                                      multi-output)))
     result))
 
-(cl-defgeneric llm-chat-streaming (provider prompt partial-callback 
response-callback error-callback)
+(cl-defgeneric llm-chat-streaming (provider prompt partial-callback 
response-callback error-callback &optional multi-output)
   "Stream a response to PROMPT from PROVIDER.
 PROMPT is a `llm-chat-prompt'.
 
@@ -449,24 +465,29 @@ final text.
 
 ERROR-CALLBACK receives the error response.
 
+If MULTI-OUTPUT is non-nil the response is a plist with the possible
+keys: `text' (textual output), `reasoning' (reasoning that accompanies
+the output) `tool-uses' (a list of plists with tool `:name' and
+`:args'), and `tool-results' (an alist of results of a calling tools)
+
 The prompt's interactions list will be updated to encode the
 conversation so far.
 
 This returns an object representing the async request, which can
 be passed to `llm-cancel-request'."
-  (ignore provider prompt partial-callback response-callback error-callback)
+  (ignore provider prompt partial-callback response-callback error-callback 
multi-output)
   (signal 'not-implemented nil))
 
-(cl-defmethod llm-chat-streaming ((_ (eql nil)) _ _ _ _)
+(cl-defmethod llm-chat-streaming ((_ (eql nil)) _ _ _ _ &optional _)
   "Catch trivial configuration mistake."
   (error "LLM provider was nil.  Please set the provider in the application 
you are using"))
 
-(cl-defmethod llm-chat-streaming :before (provider _ _ _ _)
+(cl-defmethod llm-chat-streaming :before (provider _ _ _ _ &optional _)
   "Issue a warning if the LLM is non-free."
   (when-let (info (llm-nonfree-message-info provider))
     (llm--warn-on-nonfree (llm-name provider) info)))
 
-(cl-defmethod llm-chat-streaming :around (provider prompt partial-callback 
response-callback error-callback)
+(cl-defmethod llm-chat-streaming :around (provider prompt partial-callback 
response-callback error-callback &optional multi-output)
   "Log the input to llm-chat-async."
   (llm--log 'api-send :provider provider :prompt prompt)
   ;; We need to wrap the callbacks before we set llm-log to nil.
@@ -486,7 +507,7 @@ be passed to `llm-cancel-request'."
          (llm-log nil)
          (result (cl-call-next-method provider prompt new-partial-callback
                                       new-response-callback
-                                      new-error-callback)))
+                                      new-error-callback multi-output)))
     result))
 
 (cl-defun llm-chat-streaming-to-point (provider prompt buffer point 
finish-callback
@@ -533,11 +554,11 @@ be passed to `llm-cancel-request'."
                                 (funcall finish-callback))
                               (lambda (_ msg) (error "Error calling the LLM: 
%s" msg))))))))
 
-(cl-defmethod llm-chat-async ((_ (eql nil)) _ _ _)
+(cl-defmethod llm-chat-async ((_ (eql nil)) _ _ _ &optional _)
   "Catch trivial configuration mistake."
   (error "LLM provider was nil.  Please set the provider in the application 
you are using"))
 
-(cl-defmethod llm-chat-async :before (provider _ _ _)
+(cl-defmethod llm-chat-async :before (provider _ _ _ &optional _)
   "Issue a warning if the LLM is non-free."
   (when-let (info (llm-nonfree-message-info provider))
     (llm--warn-on-nonfree (llm-name provider) info)))


Reply via email to