branch: externals/llm
commit 16335ca7cd8c324a9efab0de25fcdedf0fd44208
Merge: b69b6e8480 59fc3d7d29
Author: Andrew Hyatt <ahy...@gmail.com>
Commit: Andrew Hyatt <ahy...@gmail.com>

    Merge branch 'conversation-fix'
---
 README.org     |  17 ++++++--
 llm-fake.el    |  24 ++++++----
 llm-ollama.el  |  61 ++++++++++++++++----------
 llm-openai.el  |  61 ++++++++++++++++----------
 llm-request.el |   9 ++++
 llm-tester.el  | 136 ++++++++++++++++++++++++++++++++++++++++++++++++---------
 llm-vertex.el  |  43 ++++++++++++------
 llm.el         |  41 ++++++++++++++---
 8 files changed, 293 insertions(+), 99 deletions(-)

diff --git a/README.org b/README.org
index 64f4280a90..7b94708d55 100644
--- a/README.org
+++ b/README.org
@@ -56,9 +56,7 @@ To build upon the example from before:
 #+end_src
 * Programmatic use
 Client applications should require the =llm= package, and code against it.  
Most functions are generic, and take a struct representing a provider as the 
first argument. The client code, or the user themselves can then require the 
specific module, such as =llm-openai=, and create a provider with a function 
such as ~(make-llm-openai :key user-api-key)~.  The client application will use 
this provider to call all the generic functions.
-
-A list of all the main functions:
-
+** Main functions
 - ~llm-chat provider prompt~:  With user-chosen ~provider~ , and a 
~llm-chat-prompt~ structure (containing context, examples, interactions, and 
parameters such as temperature and max tokens), send that prompt to the LLM and 
wait for the string output.
 - ~llm-chat-async provider prompt response-callback error-callback~: Same as 
~llm-chat~, but executes in the background.  Takes a ~response-callback~ which 
will be called with the text response.  The ~error-callback~ will be called in 
case of error, with the error symbol and an error message.
 - ~llm-chat-streaming provider prompt partial-callback response-callback 
error-callback~:  Similar to ~llm-chat-async~, but request a streaming 
response.  As the response is built up, ~partial-callback~ is called with the 
all the text retrieved up to the current point.  Finally, ~reponse-callback~ is 
called with the complete text.
@@ -70,6 +68,19 @@ A list of all the main functions:
   - ~llm-make-simple-chat-prompt text~: For the common case of just wanting a 
simple text prompt without the richness that ~llm-chat-prompt~ struct provides, 
use this to turn a string into a ~llm-chat-prompt~ that can be passed to the 
main functions above.
   - ~llm-chat-prompt-to-text prompt~: Somewhat opposite of the above, from a 
prompt, return a string representation.  This is not usually suitable for 
passing to LLMs, but for debugging purposes.
   - ~llm-chat-streaming-to-point provider prompt buffer point 
finish-callback~: Same basic arguments as ~llm-chat-streaming~, but will stream 
to ~point~ in ~buffer~.
+  - ~llm-chat-prompt-append-response prompt response role~: Append a new 
response (from the user, usually) to the prompt.  The ~role~ is optional, and 
defaults to ~'user~.
+** How to handle conversations
+Conversations can take place by repeatedly calling ~llm-chat~ and its 
variants.  For a conversation, the entire prompt must be a variable, because 
the ~llm-chat-prompt-interactions~ slot will be getting changed by the chat 
functions to store the conversation.  For some providers, this will store the 
history directly in ~llm-chat-prompt-interactions~, but for others (such as 
ollama), the conversation history is opaque.  For that reason, the correct way 
to handle a conversation is to repea [...]
+
+#+begin_src emacs-lisp
+(defvar-local llm-chat-streaming-prompt nil)
+(defun start-or-continue-conversation (text)
+  "Called when the user has input TEXT as the next input."
+  (if llm-chat-streaming-prompt
+      (llm-chat-prompt-append-response llm-chat-streaming-prompt text)
+    (setq llm-chat-streaming-prompt (llm-make-simple-chat-prompt text))
+    (llm-chat-streaming-to-point provider prompt (current-buffer) (point-max) 
(lambda ()))))
+#+end_src
 
 * Contributions
 If you are interested in creating a provider, please send a pull request, or 
open a bug.  This library is part of GNU ELPA, so any major provider that we 
include in this module needs to be written by someone with FSF papers.  
However, you can always write a module and put it on a different package 
archive, such as MELPA.
diff --git a/llm-fake.el b/llm-fake.el
index 7ead4e01ed..30db7ba9c4 100644
--- a/llm-fake.el
+++ b/llm-fake.el
@@ -57,14 +57,19 @@ message cons. If nil, the response will be a simple vector."
     (with-current-buffer (get-buffer-create (llm-fake-output-to-buffer 
provider))
       (goto-char (point-max))
       (insert "\nCall to llm-chat\n"  (llm-chat-prompt-to-text prompt) "\n")))
-  (if (llm-fake-chat-action-func provider)
-      (let* ((f (llm-fake-chat-action-func provider))
-             (result (funcall f)))
-        (pcase (type-of result)
-                ('string result)
-                ('cons (signal (car result) (cdr result)))
-                (_ (error "Incorrect type found in `chat-action-func': %s" 
(type-of result)))))
-    "Sample response from `llm-chat-async'"))
+  (let ((result
+         (if (llm-fake-chat-action-func provider)
+             (let* ((f (llm-fake-chat-action-func provider))
+                    (result (funcall f)))
+               (pcase (type-of result)
+                 ('string result)
+                 ('cons (signal (car result) (cdr result)))
+                 (_ (error "Incorrect type found in `chat-action-func': %s" 
(type-of result)))))
+           "Sample response from `llm-chat-async'")))
+    (setf (llm-chat-prompt-interactions prompt)
+          (append (llm-chat-prompt-interactions prompt)
+                  (list (make-llm-chat-prompt-interaction :role 'assistant 
:content result))))
+    result))
 
 (cl-defmethod llm-chat-streaming ((provider llm-fake) prompt partial-callback 
response-callback _error-callback)
   (when (llm-fake-output-to-buffer provider)
@@ -85,6 +90,9 @@ message cons. If nil, the response will be a simple vector."
               (funcall partial-callback accum)
               (sleep-for 0 100))
             (string-split text))
+      (setf (llm-chat-prompt-interactions prompt)
+            (append (llm-chat-prompt-interactions prompt)
+                    (list (make-llm-chat-prompt-interaction :role 'assistant 
:content text))))
       (funcall response-callback text))))
 
 (cl-defmethod llm-embedding ((provider llm-fake) string)
diff --git a/llm-ollama.el b/llm-ollama.el
index fbce0fa6e0..7fd6afdec9 100644
--- a/llm-ollama.el
+++ b/llm-ollama.el
@@ -110,20 +110,16 @@ STREAMING if non-nil, turn on response streaming."
                                          (car example)
                                          (cdr example)))
                                (llm-chat-prompt-examples prompt) "\n"))))
-    (setq text-prompt (concat text-prompt "\n"
-                              (let ((conversationp (> (length 
(llm-chat-prompt-interactions prompt)) 1)))
-                                (if conversationp
-                                    (concat
-                                     "The following interactions have already 
happened: "
-                                     (mapcar (lambda (p)
-                                               (format "%s: %s\n"
-                                                       (pcase 
(llm-chat-prompt-interaction-role p)
-                                                         ('user "User")
-                                                         ('assistant 
"Assistant"))
-                                                       (string-trim 
(llm-chat-prompt-interaction-content p))))
-                                             (llm-chat-prompt-interactions 
prompt)))
-                                  (string-trim
-                                   (llm-chat-prompt-interaction-content (car 
(llm-chat-prompt-interactions prompt))))))))
+    ;; The last item always should be the latest interaction, which is the 
prompt.
+    (setq text-prompt (concat text-prompt
+                              "\n"
+                              (string-trim (llm-chat-prompt-interaction-content
+                                            (car (last 
(llm-chat-prompt-interactions prompt)))))))
+    ;; If the first item isn't an interaction, then it's a conversation which
+    ;; we'll set as the chat context.
+    (when (not (eq (type-of (car (llm-chat-prompt-interactions prompt)))
+                   'llm-chat-prompt-interaction))
+      (push `("context" . ,(car (llm-chat-prompt-interactions prompt))) 
request-alist))
     (push `("prompt" . ,(string-trim text-prompt)) request-alist)
     (push `("model" . ,(llm-ollama-chat-model provider)) request-alist)
     (when (llm-chat-prompt-temperature prompt)
@@ -159,32 +155,49 @@ STREAMING if non-nil, turn on response streaming."
     (setq-local llm-ollama-last-position last-position)
     current-response))
 
+(defun llm-ollama--get-final-response (response)
+  "Return the final post-streaming json output from RESPONSE."
+  (with-temp-buffer
+    (insert response)
+    ;; Find the last json object in the buffer.
+    (goto-char (point-max))
+    (search-backward "{" nil t)
+    (json-read)))
+
 (cl-defmethod llm-chat ((provider llm-ollama) prompt)
   ;; We expect to be in a new buffer with the response, which we use to store
   ;; local variables. The temp buffer won't have the response, but that's fine,
   ;; we really just need it for the local variables.
   (with-temp-buffer
-    (llm-ollama--get-partial-chat-response
-     (llm-request-sync-raw-output (llm-ollama--url provider "generate")
-                                  :data (llm-ollama--chat-request provider 
prompt)
-                                  ;; ollama is run on a user's machine, and it 
can take a while.
-                                  :timeout llm-ollama-chat-timeout))))
+    (let ((output (llm-request-sync-raw-output 
+                   (llm-ollama--url provider "generate")
+                   :data (llm-ollama--chat-request provider prompt)
+                   ;; ollama is run on a user's machine, and it can take a 
while.
+                   :timeout llm-ollama-chat-timeout)))
+      (setf (llm-chat-prompt-interactions prompt)
+               (list (assoc-default 'context (llm-ollama--get-final-response 
output))))
+      (llm-ollama--get-partial-chat-response output))))
 
 (cl-defmethod llm-chat-async ((provider llm-ollama) prompt response-callback 
error-callback)
   (llm-chat-streaming provider prompt (lambda (_)) response-callback 
error-callback))
 
 (cl-defmethod llm-chat-streaming ((provider llm-ollama) prompt 
partial-callback response-callback error-callback)
-  (llm-request-async (llm-ollama--url provider "generate")
+  (let ((buf (current-buffer)))
+    (llm-request-async (llm-ollama--url provider "generate")
       :data (llm-ollama--chat-request provider prompt)
-      :on-success-raw (lambda (data)
-                        (funcall response-callback 
(llm-ollama--get-partial-chat-response data)))
+      :on-success-raw (lambda (response)
+                        (setf (llm-chat-prompt-interactions prompt)
+                              (list (assoc-default 'context 
(llm-ollama--get-final-response response))))
+                        (llm-request-callback-in-buffer
+                         buf response-callback
+                         (llm-ollama--get-partial-chat-response response)))
       :on-partial (lambda (data)
                     (when-let ((response 
(llm-ollama--get-partial-chat-response data)))
-                      (funcall partial-callback response)))
+                      (llm-request-callback-in-buffer buf partial-callback 
response)))
       :on-error (lambda (_ _)
                   ;; The problem with ollama is that it doesn't
                   ;; seem to have an error response.
-                  (funcall error-callback 'error "Unknown error calling 
ollama"))))
+                  (llm-request-callback-in-buffer buf error-callback "Unknown 
error calling ollama")))))
 
 (provide 'llm-ollama)
 
diff --git a/llm-openai.el b/llm-openai.el
index 7f44f8d62b..6165ad1af6 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -150,25 +150,35 @@ STREAMING if non-nil, turn on response streaming."
 (cl-defmethod llm-chat-async ((provider llm-openai) prompt response-callback 
error-callback)
   (unless (llm-openai-key provider)
     (error "To call Open AI API, the key must have been set"))
-  (llm-request-async "https://api.openai.com/v1/chat/completions";
+  (let ((buf (current-buffer)))
+    (llm-request-async "https://api.openai.com/v1/chat/completions";
       :headers `(("Authorization" . ,(format "Bearer %s" (llm-openai-key 
provider))))
       :data (llm-openai--chat-request provider prompt)
-      :on-success (lambda (data) (funcall response-callback 
(llm-openai--extract-chat-response data)))
+      :on-success (lambda (data)
+                    (let ((response (llm-openai--extract-chat-response data)))
+                      (setf (llm-chat-prompt-interactions prompt)
+                            (append (llm-chat-prompt-interactions prompt)
+                                    (list (make-llm-chat-prompt-interaction 
:role 'assistant :content response))))
+                      (llm-request-callback-in-buffer buf response-callback 
response)))
       :on-error (lambda (_ data)
                   (let ((errdata (cdr (assoc 'error data))))
-                    (funcall error-callback 'error
+                    (llm-request-callback-in-buffer buf error-callback 'error
                              (format "Problem calling Open AI: %s message: %s"
                                      (cdr (assoc 'type errdata))
-                                     (cdr (assoc 'message errdata))))))))
+                                     (cdr (assoc 'message errdata)))))))))
 
 (cl-defmethod llm-chat ((provider llm-openai) prompt)
   (unless (llm-openai-key provider)
     (error "To call Open AI API, the key must have been set"))
-  (llm-openai--handle-response
-   (llm-request-sync "https://api.openai.com/v1/chat/completions";
-                     :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-openai-key provider))))
-                     :data (llm-openai--chat-request provider prompt))
-   #'llm-openai--extract-chat-response))
+  (let ((response (llm-openai--handle-response
+                   (llm-request-sync 
"https://api.openai.com/v1/chat/completions";
+                                     :headers `(("Authorization" . ,(format 
"Bearer %s" (llm-openai-key provider))))
+                                     :data (llm-openai--chat-request provider 
prompt))
+                   #'llm-openai--extract-chat-response)))
+    (setf (llm-chat-prompt-interactions prompt)
+          (append (llm-chat-prompt-interactions prompt)
+                  (list (make-llm-chat-prompt-interaction :role 'assistant 
:content response))))
+    response))
 
 (defvar-local llm-openai-current-response ""
   "The response so far from the server.")
@@ -197,20 +207,25 @@ STREAMING if non-nil, turn on response streaming."
 (cl-defmethod llm-chat-streaming ((provider llm-openai) prompt 
partial-callback response-callback error-callback)
   (unless (llm-openai-key provider)
     (error "To call Open AI API, the key must have been set"))
-  (llm-request-async "https://api.openai.com/v1/chat/completions";
-                     :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-openai-key provider))))
-                     :data (llm-openai--chat-request provider prompt nil t)
-                     :on-error (lambda (_ data)
-                                 (let ((errdata (cdr (assoc 'error data))))
-                                   (funcall error-callback 'error
-                                            (format "Problem calling Open AI: 
%s message: %s"
-                                                    (cdr (assoc 'type errdata))
-                                                    (cdr (assoc 'message 
errdata))))))
-                     :on-partial (lambda (data)
-                                   (when-let ((response 
(llm-openai--get-partial-chat-response data)))
-                                     (funcall partial-callback response)))
-                     :on-success-raw (lambda (data)
-                                       (funcall response-callback 
(llm-openai--get-partial-chat-response data)))))
+  (let ((buf (current-buffer)))
+    (llm-request-async "https://api.openai.com/v1/chat/completions";
+                       :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-openai-key provider))))
+                       :data (llm-openai--chat-request provider prompt nil t)
+                       :on-error (lambda (_ data)
+                                   (let ((errdata (cdr (assoc 'error data))))
+                                     (llm-request-callback-in-buffer buf 
error-callback 'error
+                                              (format "Problem calling Open 
AI: %s message: %s"
+                                                      (cdr (assoc 'type 
errdata))
+                                                      (cdr (assoc 'message 
errdata))))))
+                       :on-partial (lambda (data)
+                                     (when-let ((response 
(llm-openai--get-partial-chat-response data)))
+                                       (llm-request-callback-in-buffer buf 
partial-callback response)))
+                       :on-success-raw (lambda (data)
+                                         (let ((response 
(llm-openai--get-partial-chat-response data)))
+                                           (setf (llm-chat-prompt-interactions 
prompt)
+                                                 (append 
(llm-chat-prompt-interactions prompt)
+                                                         (list 
(make-llm-chat-prompt-interaction :role 'assistant :content response))))
+                                           (llm-request-callback-in-buffer buf 
response-callback response))))))
 
 (provide 'llm-openai)
 
diff --git a/llm-request.el b/llm-request.el
index a69924c508..8054e79c31 100644
--- a/llm-request.el
+++ b/llm-request.el
@@ -137,5 +137,14 @@ the buffer is turned into JSON and passed to ON-SUCCESS."
                     #'llm-request--handle-new-content
                     nil t))))))
 
+;; This is a useful method for getting out of the request buffer when it's time
+;; to make callbacks.
+(defun llm-request-callback-in-buffer (buf f &rest args)
+  "Run F with ARSG in the context of BUF.
+But if BUF has been killed, use a temporary buffer instead."
+  (if (buffer-live-p buf)
+      (with-current-buffer buf (apply f args))
+    (with-temp-buffer (apply f args))))
+
 (provide 'llm-request)
 ;;; llm-request.el ends here
diff --git a/llm-tester.el b/llm-tester.el
index 3a4e9a3cda..37290d37c8 100644
--- a/llm-tester.el
+++ b/llm-tester.el
@@ -66,26 +66,29 @@
 (defun llm-tester-chat-async (provider)
   "Test that PROVIDER can interact with the LLM chat."
   (message "Testing provider %s for chat" (type-of provider))
-  (llm-chat-async
-   provider
-   (make-llm-chat-prompt
-    :interactions (list
-                   (make-llm-chat-prompt-interaction
-                    :role 'user
-                    :content "Tell me a random cool feature of emacs."))
-    :context "You must answer all questions as if you were the butler Jeeves 
from Jeeves and Wooster.  Start all interactions with the phrase, 'Very good, 
sir.'"
-    :examples '(("Tell me the capital of France." . "Very good, sir.  The 
capital of France is Paris, which I expect you to be familiar with, since you 
were just there last week with your Aunt Agatha.")
-                ("Could you take me to my favorite place?" . "Very good, sir.  
I believe you are referring to the Drone's Club, which I will take you to after 
you put on your evening attire."))
-    :temperature 0.5
-    :max-tokens 100)
-   (lambda (response)
-     (if response
-         (if (> (length response) 0)
-             (message "SUCCESS: Provider %s provided a response %s" (type-of 
provider) response)
-           (message "ERROR: Provider %s returned an empty response" (type-of 
provider)))
-       (message "ERROR: Provider %s did not return any response" (type-of 
provider))))
-   (lambda (type message)
-     (message "ERROR: Provider %s returned an error of type %s with message 
%s" (type-of provider) type message))))
+  (let ((buf (current-buffer)))
+    (llm-chat-async
+       provider
+       (make-llm-chat-prompt
+        :interactions (list
+                       (make-llm-chat-prompt-interaction
+                        :role 'user
+                        :content "Tell me a random cool feature of emacs."))
+        :context "You must answer all questions as if you were the butler 
Jeeves from Jeeves and Wooster.  Start all interactions with the phrase, 'Very 
good, sir.'"
+        :examples '(("Tell me the capital of France." . "Very good, sir.  The 
capital of France is Paris, which I expect you to be familiar with, since you 
were just there last week with your Aunt Agatha.")
+                    ("Could you take me to my favorite place?" . "Very good, 
sir.  I believe you are referring to the Drone's Club, which I will take you to 
after you put on your evening attire."))
+        :temperature 0.5
+        :max-tokens 100)
+       (lambda (response)
+         (unless (eq buf (current-buffer))
+           (message "ERROR: Provider %s returned a response not in the 
original buffer" (type-of provider)))
+         (if response
+             (if (> (length response) 0)
+                 (message "SUCCESS: Provider %s provided a response %s" 
(type-of provider) response)
+               (message "ERROR: Provider %s returned an empty response" 
(type-of provider)))
+           (message "ERROR: Provider %s did not return any response" (type-of 
provider))))
+       (lambda (type message)
+         (message "ERROR: Provider %s returned an error of type %s with 
message %s" (type-of provider) type message)))))
 
 (defun llm-tester-chat-sync (provider)
   "Test that PROVIDER can interact with the LLM chat."
@@ -112,7 +115,8 @@
   "Test that PROVIDER can stream back LLM chat responses."
   (message "Testing provider %s for streaming chat" (type-of provider))
   (let ((streamed)
-        (counter 0))
+        (counter 0)
+        (buf (current-buffer)))
     (llm-chat-streaming
      provider
      (make-llm-chat-prompt
@@ -123,15 +127,105 @@
       :temperature 0.5
       :max-tokens 200)
      (lambda (text)
+       (unless (eq buf (current-buffer))
+         (message "ERROR: Provider %s returned a response not in the original 
buffer" (type-of provider)))
        (cl-incf counter)
        (setq streamed text))
      (lambda (text)
+       (unless (eq buf (current-buffer))
+         (message "ERROR: Provider %s returned a response not in the original 
buffer" (type-of provider)))
        (message "SUCCESS: Provider %s provided a streamed response %s in %d 
parts, complete text is: %s" (type-of provider) streamed counter text)
        (if (= 0 counter)
            (message "ERROR: Provider %s streaming request never happened!" 
(type-of provider))))
      (lambda (type message)
+       (unless (eq buf (current-buffer))
+         (message "ERROR: Provider %s returned a response not in the original 
buffer" (type-of provider)))
        (message "ERROR: Provider %s returned an error of type %s with message 
%s" (type-of provider) type message)))))
 
+(defun llm-tester-chat-conversation (provider chat-func)
+  "Test that PROVIDER can handle a conversation via CHAT-FUNC.
+CHAT-FUNC should insert the chat response to the buffer."
+  (message "Testing provider %s for conversation" (type-of provider))
+  (with-temp-buffer
+    (let ((prompt (llm-make-simple-chat-prompt
+                   "I'm currently testing conversational abilities.  Please 
respond to each message with the ordinal number of your response, so just '1' 
for the first response, '2' for the second, and so on.  It's important that I 
can verify that you are working with the full conversation history, so please 
let me know if you seem to be missing anything.")))
+      (push (llm-chat provider prompt) outputs)
+      (llm-chat-prompt-append-response prompt "This is the second message.")
+      (push (llm-chat provider prompt) outputs)
+      (llm-chat-prompt-append-response prompt "This is the third message.")
+      (push (llm-chat provider prompt) outputs)
+      (message "SUCCESS: Provider %s provided a conversation with responses 
%s" (type-of provider)
+               (nreverse outputs)))))
+
+(defun llm-tester-chat-conversation-sync (provider)
+  "Test that PROVIDER can handle a conversation."
+  (message "Testing provider %s for conversation" (type-of provider))
+  (let ((prompt (llm-make-simple-chat-prompt
+                 "I'm currently testing conversational abilities.  Please 
respond to each message with the ordinal number of your response, so just '1' 
for the first response, '2' for the second, and so on.  It's important that I 
can verify that you are working with the full conversation history, so please 
let me know if you seem to be missing anything."))
+        (outputs nil))
+    (push (llm-chat provider prompt) outputs)
+    (llm-chat-prompt-append-response prompt "This is the second message.")
+    (push (llm-chat provider prompt) outputs)
+    (llm-chat-prompt-append-response prompt "This is the third message.")
+    (push (llm-chat provider prompt) outputs)
+    (message "SUCCESS: Provider %s provided a conversation with responses %s" 
(type-of provider)
+             (nreverse outputs))))
+
+(defun llm-tester-chat-conversation-async (provider)
+  "Test that PROVIDER can handle a conversation."
+  (message "Testing provider %s for conversation" (type-of provider))
+  (let ((prompt (llm-make-simple-chat-prompt
+                 "I'm currently testing conversational abilities.  Please 
respond to each message with the ordinal number of your response, so just '1' 
for the first response, '2' for the second, and so on.  It's important that I 
can verify that you are working with the full conversation history, so please 
let me know if you seem to be missing anything."))
+        (outputs nil)
+        (buf (current-buffer)))
+    (llm-chat-async provider prompt
+                    (lambda (response)
+                      (push response outputs)
+                      (llm-chat-prompt-append-response prompt "This is the 
second message.")
+                      (llm-chat-async provider prompt
+                                      (lambda (response)
+                                        (unless (eq buf (current-buffer))
+                                          (message "ERROR: Provider %s 
returned a response not in the original buffer" (type-of provider)))
+                                        (push response outputs)
+                                        (llm-chat-prompt-append-response 
prompt "This is the third message.")
+                                        (llm-chat-async provider prompt
+                                                        (lambda (response)
+                                                          (push response 
outputs)
+                                                          (message "SUCCESS: 
Provider %s provided a conversation with responses %s" (type-of provider) 
(nreverse outputs)))
+                                                        (lambda (type message)
+                                                          (message "ERROR: 
Provider %s returned an error of type %s with message %s" (type-of provider) 
type message))))
+                                      (lambda (type message)
+                                        (unless (eq buf (current-buffer))
+                                          (message "ERROR: Provider %s 
returned a response not in the original buffer" (type-of provider)))
+                                        (message "ERROR: Provider %s returned 
an error of type %s with message %s" (type-of provider) type message))))
+                    (lambda (type message)
+                      (unless (eq buf (current-buffer))
+                        (message "ERROR: Provider %s returned a response not 
in the original buffer" (type-of provider)))
+                      (message "ERROR: Provider %s returned an error of type 
%s with message %s" (type-of provider) type message)))))
+
+(defun llm-tester-chat-conversation-streaming (provider)
+  "Test that PROVIDER can handle a conversation."
+  (message "Testing provider %s for conversation" (type-of provider))
+  (let ((prompt (llm-make-simple-chat-prompt
+                 "I'm currently testing conversational abilities.  Please 
respond to each message with the ordinal number of your response, so just '1' 
for the first response, '2' for the second, and so on.  It's important that I 
can verify that you are working with the full conversation history, so please 
let me know if you seem to be missing anything.")))
+    (let ((buf (get-buffer-create "*llm-streaming-conversation-tester*")))
+      (llm-chat-streaming-to-point
+       provider prompt buf (with-current-buffer buf (point-max))
+       (lambda ()
+         (goto-char (point-max)) (insert "\n")
+         (llm-chat-prompt-append-response prompt "This is the second message.")
+         (llm-chat-streaming-to-point
+          provider prompt
+          buf (with-current-buffer buf (point-max))
+          (lambda ()
+            (goto-char (point-max)) (insert "\n")
+            (llm-chat-prompt-append-response prompt "This is the third 
message.")
+            (llm-chat-streaming-to-point
+             provider prompt buf (with-current-buffer buf (point-max))
+             (lambda ()
+               (message "SUCCESS: Provider %s provided a conversation with 
responses %s" (type-of provider) (buffer-string))
+               (kill-buffer buf))))))))))
+
 (defun llm-tester-all (provider)
   "Test all llm functionality for PROVIDER."
   (llm-tester-embedding-sync provider)
diff --git a/llm-vertex.el b/llm-vertex.el
index 9443a6a1fc..a896cf7098 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -265,36 +265,51 @@ If STREAMING is non-nil, use the URL for the streaming 
API."
 
 (cl-defmethod llm-chat-async ((provider llm-vertex) prompt response-callback 
error-callback)
   (llm-vertex-refresh-key provider)
-  (llm-request-async (llm-vertex--chat-url provider nil)
+  (let ((buf (current-buffer)))
+    (llm-request-async (llm-vertex--chat-url provider nil)
                      :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
                      :data (llm-vertex--chat-request-v1 prompt)
                      :on-success (lambda (data)
-                                   (funcall response-callback 
(llm-vertex--chat-extract-response data)))
+                                   (let ((response 
(llm-vertex--chat-extract-response data)))
+                                     (setf (llm-chat-prompt-interactions 
prompt)
+                                           (append 
(llm-chat-prompt-interactions prompt)
+                                                   (list 
(make-llm-chat-prompt-interaction :role 'assistant :content response))))
+                                     (llm-request-callback-in-buffer buf 
response-callback response)))
                      :on-error (lambda (_ data)
-                                 (funcall error-callback 'error
-                                          (llm-vertex--error-message data)))))
+                                 (llm-request-callback-in-buffer buf 
error-callback 'error
+                                          (llm-vertex--error-message data))))))
 
 (cl-defmethod llm-chat ((provider llm-vertex) prompt)
   (llm-vertex-refresh-key provider)
-  (llm-vertex--handle-response
-   (llm-request-sync (llm-vertex--chat-url provider nil)
-                     :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
-                     :data (llm-vertex--chat-request-v1 prompt))
-   #'llm-vertex--chat-extract-response))
+  (let ((response (llm-vertex--handle-response
+                 (llm-request-sync
+                  (llm-vertex--chat-url provider nil)
+                  :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
+                  :data (llm-vertex--chat-request-v1 prompt))
+                 #'llm-vertex--chat-extract-response)))
+    (setf (llm-chat-prompt-interactions prompt)
+          (append (llm-chat-prompt-interactions prompt)
+                  (list (make-llm-chat-prompt-interaction :role 'assistant 
:content response))))
+    response))
 
 (cl-defmethod llm-chat-streaming ((provider llm-vertex) prompt 
partial-callback response-callback error-callback)
   (llm-vertex-refresh-key provider)
-  (llm-request-async (llm-vertex--chat-url provider t)
+  (let ((buf (current-buffer)))
+    (llm-request-async (llm-vertex--chat-url provider t)
                      :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
                      :data (llm-vertex--chat-request-ui prompt)
                      :on-partial (lambda (partial)
                                    (when-let ((response 
(llm-vertex--get-partial-chat-ui-repsonse partial)))
-                                     (funcall partial-callback response)))
+                                     (llm-request-callback-in-buffer buf 
partial-callback response)))
                      :on-success (lambda (data)
-                                   (funcall response-callback 
(llm-vertex--get-chat-response-ui data)))
+                                   (let ((response 
(llm-vertex--get-chat-response-ui data)))
+                                     (setf (llm-chat-prompt-interactions 
prompt)
+                                           (append 
(llm-chat-prompt-interactions prompt)
+                                                   (list 
(make-llm-chat-prompt-interaction :role 'assistant :content response))))
+                                     (llm-request-callback-in-buffer buf 
response-callback response)))
                      :on-error (lambda (_ data)
-                                 (funcall error-callback 'error
-                                          (llm-vertex--error-message data)))))
+                                 (llm-request-callback-in-buffer buf 
error-callback 'error
+                                                                 
(llm-vertex--error-message data))))))
 
 (provide 'llm-vertex)
 
diff --git a/llm.el b/llm.el
index 460fbee370..32561d08a7 100644
--- a/llm.el
+++ b/llm.el
@@ -72,8 +72,18 @@ EXAMPLES is a list of conses, where the car is an example
 inputs, and cdr is the corresponding example outputs.  This is optional.
 
 INTERACTIONS is a list message sent by either the llm or the
-user.  It is a list of `llm-chat-prompt-interaction' objects.  This
-is required.
+user. It is a either list of `llm-chat-prompt-interaction'
+objects or list of an opaque converation ID (anything not a
+`llm-chat-prompt-interaction') and the latest
+`llm-chat-prompt-interaction' in the conversation to submit. When
+building up a chat, the chat methods update this to a new value,
+and the client is expected to append a new interaction to the
+end, without introspecting the value otherwise. The function
+`llm-chat-prompt-append-response' accomplishes that operation, and
+should be used. 'Because this value updated by the called
+function, for continuing chats, the whole prompt MUST be a
+variable passed in to the chat function. INTERACTIONS is
+required.
 
 TEMPERATURE is a floating point number with a minimum of 0, and
 maximum of 1, which controls how predictable the result is, with
@@ -95,6 +105,14 @@ an LLM, and don't need the more advanced features that the
 `llm-chat-prompt' struct makes available."
   (make-llm-chat-prompt :interactions (list (make-llm-chat-prompt-interaction 
:role 'user :content text))))
 
+(defun llm-chat-prompt-append-response (prompt response &optional role)
+  "Append a new RESPONSE to PROMPT, to continue a conversation.
+ROLE default to `user', which should almost always be what is needed."
+  (setf (llm-chat-prompt-interactions prompt)
+        (append (llm-chat-prompt-interactions prompt)
+                (list (make-llm-chat-prompt-interaction :role (or role 'user)
+                                                        :content response)))))
+
 (cl-defgeneric llm-nonfree-message-info (provider)
   "If PROVIDER is non-free, return info for a warning.
 This should be a cons of the name of the LLM, and the URL of the
@@ -108,7 +126,10 @@ need to override it."
 
 (cl-defgeneric llm-chat (provider prompt)
   "Return a response to PROMPT from PROVIDER.
-PROMPT is a `llm-chat-prompt'.  The response is a string."
+PROMPT is a `llm-chat-prompt'. The response is a string response by the LLM.
+
+The prompt's interactions list will be updated to encode the
+conversation so far."
   (ignore provider prompt)
   (signal 'not-implemented nil))
 
@@ -124,8 +145,13 @@ PROMPT is a `llm-chat-prompt'.  The response is a string."
 (cl-defgeneric llm-chat-async (provider prompt response-callback 
error-callback)
   "Return a response to PROMPT from PROVIDER.
 PROMPT is a `llm-chat-prompt'.
-RESPONSE-CALLBACK receives the string response.
-ERROR-CALLBACK receives the error response."
+
+RESPONSE-CALLBACK receives the final text.
+
+ERROR-CALLBACK receives the error response.
+
+The prompt's interactions list will be updated to encode the
+conversation so far."
   (ignore provider prompt response-callback error-callback)
   (signal 'not-implemented nil))
 
@@ -143,7 +169,10 @@ RESPONSE-CALLBACK receives the each piece of the string 
response.
 It is called once after the response has been completed, with the
 final text.
 
-ERROR-CALLBACK receives the error response."
+ERROR-CALLBACK receives the error response.
+
+The prompt's interactions list will be updated to encode the
+conversation so far."
   (ignore provider prompt partial-callback response-callback error-callback)
   (signal 'not-implemented nil))
 


Reply via email to