branch: externals/llm commit 92914e3304514253ac74a360b75923277af0c78b Author: Andrew Hyatt <ahy...@gmail.com> Commit: Andrew Hyatt <ahy...@gmail.com>
Improve how conversations work and make it easier to handle them With this commit, we do something new: update the interactions in the prompt after each message. This is necessary beacuse ollama stores the conversation as a vector. If we ever integrate with the command-line "llm" package (https://llm.datasette.io/en/stable/index.html), it works in a similar way. However, Open AI and Google Cloud Vertex rely on a list of interactions. Because of this discrepancy, we store data in the interactions list, which is now opaque, and must be added to with the new function `llm-chat-prompt-append-response'. The README has an example of how this all works. --- README.org | 17 ++++++++++--- llm-fake.el | 24 ++++++++++++------- llm-ollama.el | 51 +++++++++++++++++++++++---------------- llm-openai.el | 27 +++++++++++++++------ llm-tester.el | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ llm-vertex.el | 27 +++++++++++++++------ llm.el | 41 ++++++++++++++++++++++++++----- 7 files changed, 212 insertions(+), 52 deletions(-) diff --git a/README.org b/README.org index 64f4280a90..7b94708d55 100644 --- a/README.org +++ b/README.org @@ -56,9 +56,7 @@ To build upon the example from before: #+end_src * Programmatic use Client applications should require the =llm= package, and code against it. Most functions are generic, and take a struct representing a provider as the first argument. The client code, or the user themselves can then require the specific module, such as =llm-openai=, and create a provider with a function such as ~(make-llm-openai :key user-api-key)~. The client application will use this provider to call all the generic functions. - -A list of all the main functions: - +** Main functions - ~llm-chat provider prompt~: With user-chosen ~provider~ , and a ~llm-chat-prompt~ structure (containing context, examples, interactions, and parameters such as temperature and max tokens), send that prompt to the LLM and wait for the string output. - ~llm-chat-async provider prompt response-callback error-callback~: Same as ~llm-chat~, but executes in the background. Takes a ~response-callback~ which will be called with the text response. The ~error-callback~ will be called in case of error, with the error symbol and an error message. - ~llm-chat-streaming provider prompt partial-callback response-callback error-callback~: Similar to ~llm-chat-async~, but request a streaming response. As the response is built up, ~partial-callback~ is called with the all the text retrieved up to the current point. Finally, ~reponse-callback~ is called with the complete text. @@ -70,6 +68,19 @@ A list of all the main functions: - ~llm-make-simple-chat-prompt text~: For the common case of just wanting a simple text prompt without the richness that ~llm-chat-prompt~ struct provides, use this to turn a string into a ~llm-chat-prompt~ that can be passed to the main functions above. - ~llm-chat-prompt-to-text prompt~: Somewhat opposite of the above, from a prompt, return a string representation. This is not usually suitable for passing to LLMs, but for debugging purposes. - ~llm-chat-streaming-to-point provider prompt buffer point finish-callback~: Same basic arguments as ~llm-chat-streaming~, but will stream to ~point~ in ~buffer~. + - ~llm-chat-prompt-append-response prompt response role~: Append a new response (from the user, usually) to the prompt. The ~role~ is optional, and defaults to ~'user~. +** How to handle conversations +Conversations can take place by repeatedly calling ~llm-chat~ and its variants. For a conversation, the entire prompt must be a variable, because the ~llm-chat-prompt-interactions~ slot will be getting changed by the chat functions to store the conversation. For some providers, this will store the history directly in ~llm-chat-prompt-interactions~, but for others (such as ollama), the conversation history is opaque. For that reason, the correct way to handle a conversation is to repea [...] + +#+begin_src emacs-lisp +(defvar-local llm-chat-streaming-prompt nil) +(defun start-or-continue-conversation (text) + "Called when the user has input TEXT as the next input." + (if llm-chat-streaming-prompt + (llm-chat-prompt-append-response llm-chat-streaming-prompt text) + (setq llm-chat-streaming-prompt (llm-make-simple-chat-prompt text)) + (llm-chat-streaming-to-point provider prompt (current-buffer) (point-max) (lambda ())))) +#+end_src * Contributions If you are interested in creating a provider, please send a pull request, or open a bug. This library is part of GNU ELPA, so any major provider that we include in this module needs to be written by someone with FSF papers. However, you can always write a module and put it on a different package archive, such as MELPA. diff --git a/llm-fake.el b/llm-fake.el index 7ead4e01ed..3bbddebbbe 100644 --- a/llm-fake.el +++ b/llm-fake.el @@ -57,14 +57,19 @@ message cons. If nil, the response will be a simple vector." (with-current-buffer (get-buffer-create (llm-fake-output-to-buffer provider)) (goto-char (point-max)) (insert "\nCall to llm-chat\n" (llm-chat-prompt-to-text prompt) "\n"))) - (if (llm-fake-chat-action-func provider) - (let* ((f (llm-fake-chat-action-func provider)) - (result (funcall f))) - (pcase (type-of result) - ('string result) - ('cons (signal (car result) (cdr result))) - (_ (error "Incorrect type found in `chat-action-func': %s" (type-of result))))) - "Sample response from `llm-chat-async'")) + (let ((result + (if (llm-fake-chat-action-func provider) + (let* ((f (llm-fake-chat-action-func provider)) + (result (funcall f))) + (pcase (type-of result) + ('string result) + ('cons (signal (car result) (cdr result))) + (_ (error "Incorrect type found in `chat-action-func': %s" (type-of result))))) + "Sample response from `llm-chat-async'"))) + (setf (llm-chat-prompt-interactions prompt) + (append (llm-chat-prompt-interactions prompt) + (list (make-llm-chat-prompt-interaction :role 'assistant :context result)))) + result)) (cl-defmethod llm-chat-streaming ((provider llm-fake) prompt partial-callback response-callback _error-callback) (when (llm-fake-output-to-buffer provider) @@ -85,6 +90,9 @@ message cons. If nil, the response will be a simple vector." (funcall partial-callback accum) (sleep-for 0 100)) (string-split text)) + (setf (llm-chat-prompt-interactions prompt) + (append (llm-chat-prompt-interactions prompt) + (list (make-llm-chat-prompt-interaction :role 'assistant :content text)))) (funcall response-callback text)))) (cl-defmethod llm-embedding ((provider llm-fake) string) diff --git a/llm-ollama.el b/llm-ollama.el index 7a8d53e02d..d1a7c4b39d 100644 --- a/llm-ollama.el +++ b/llm-ollama.el @@ -106,20 +106,15 @@ STREAMING if non-nil, turn on response streaming." (car example) (cdr example))) (llm-chat-prompt-examples prompt) "\n")))) - (setq text-prompt (concat text-prompt "\n" - (let ((conversationp (> (length (llm-chat-prompt-interactions prompt)) 1))) - (if conversationp - (concat - "The following interactions have already happened: " - (mapcar (lambda (p) - (format "%s: %s\n" - (pcase (llm-chat-prompt-interaction-role p) - ('user "User") - ('assistant "Assistant")) - (string-trim (llm-chat-prompt-interaction-content p)))) - (llm-chat-prompt-interactions prompt))) - (string-trim - (llm-chat-prompt-interaction-content (car (llm-chat-prompt-interactions prompt)))))))) + ;; The last item always should be the latest interaction, which is the prompt. + (setq text-prompt (concat text-prompt + "\n" + (string-trim (llm-chat-prompt-interaction-content + (car (last (llm-chat-prompt-interactions prompt))))))) + ;; If the first item isn't an interaction, then it's a conversation which + ;; we'll set as the chat context. + (when (not (type-of (car (llm-chat-prompt-interactions prompt)))) + (push `("context" . ,(car (llm-chat-prompt-interactions prompt))) request-alist)) (push `("prompt" . ,(string-trim text-prompt)) request-alist) (push `("model" . ,(llm-ollama-chat-model provider)) request-alist) (when (llm-chat-prompt-temperature prompt) @@ -155,16 +150,28 @@ STREAMING if non-nil, turn on response streaming." (setq-local llm-ollama-last-position last-position) current-response)) +(defun llm-ollama--get-final-response (response) + "Return the final post-streaming json output from RESPONSE." + (with-temp-buffer + (insert response) + ;; Find the last json object in the buffer. + (goto-char (point-max)) + (search-backward "{" nil t) + (json-read))) + (cl-defmethod llm-chat ((provider llm-ollama) prompt) ;; We expect to be in a new buffer with the response, which we use to store ;; local variables. The temp buffer won't have the response, but that's fine, ;; we really just need it for the local variables. (with-temp-buffer - (llm-ollama--get-partial-chat-response - (llm-request-sync-raw-output (llm-ollama--url provider "generate") - :data (llm-ollama--chat-request provider prompt) - ;; ollama is run on a user's machine, and it can take a while. - :timeout llm-ollama-chat-timeout)))) + (let ((output (llm-request-sync-raw-output + (llm-ollama--url provider "generate") + :data (llm-ollama--chat-request provider prompt) + ;; ollama is run on a user's machine, and it can take a while. + :timeout llm-ollama-chat-timeout))) + (setf (llm-chat-prompt-interactions prompt) + (assoc-default 'context (llm-ollama--get-final-response output))) + (llm-ollama--get-partial-chat-response output)))) (cl-defmethod llm-chat-async ((provider llm-ollama) prompt response-callback error-callback) (llm-chat-streaming provider prompt (lambda (_)) response-callback error-callback)) @@ -172,8 +179,10 @@ STREAMING if non-nil, turn on response streaming." (cl-defmethod llm-chat-streaming ((provider llm-ollama) prompt partial-callback response-callback error-callback) (llm-request-async (llm-ollama--url provider "generate") :data (llm-ollama--chat-request provider prompt) - :on-success-raw (lambda (data) - (funcall response-callback (llm-ollama--get-partial-chat-response data))) + :on-success-raw (lambda (response) + (setf (llm-chat-prompt-interactions prompt) + (assoc-default 'context (llm-ollama--get-final-response response))) + (funcall response-callback (llm-ollama--get-partial-chat-response response))) :on-partial (lambda (data) (when-let ((response (llm-ollama--get-partial-chat-response data))) (funcall partial-callback response))) diff --git a/llm-openai.el b/llm-openai.el index 7f44f8d62b..e63389cf0d 100644 --- a/llm-openai.el +++ b/llm-openai.el @@ -153,7 +153,12 @@ STREAMING if non-nil, turn on response streaming." (llm-request-async "https://api.openai.com/v1/chat/completions" :headers `(("Authorization" . ,(format "Bearer %s" (llm-openai-key provider)))) :data (llm-openai--chat-request provider prompt) - :on-success (lambda (data) (funcall response-callback (llm-openai--extract-chat-response data))) + :on-success (lambda (data) + (let ((response (llm-openai--extract-chat-response data))) + (setf (llm-chat-prompt-interactions prompt) + (append (llm-chat-prompt-interactions prompt) + (list (make-llm-chat-prompt-interaction :role 'assistant :content response)))) + (funcall response-callback response))) :on-error (lambda (_ data) (let ((errdata (cdr (assoc 'error data)))) (funcall error-callback 'error @@ -164,11 +169,15 @@ STREAMING if non-nil, turn on response streaming." (cl-defmethod llm-chat ((provider llm-openai) prompt) (unless (llm-openai-key provider) (error "To call Open AI API, the key must have been set")) - (llm-openai--handle-response - (llm-request-sync "https://api.openai.com/v1/chat/completions" - :headers `(("Authorization" . ,(format "Bearer %s" (llm-openai-key provider)))) - :data (llm-openai--chat-request provider prompt)) - #'llm-openai--extract-chat-response)) + (let ((response (llm-openai--handle-response + (llm-request-sync "https://api.openai.com/v1/chat/completions" + :headers `(("Authorization" . ,(format "Bearer %s" (llm-openai-key provider)))) + :data (llm-openai--chat-request provider prompt)) + #'llm-openai--extract-chat-response))) + (setf (llm-chat-prompt-interactions prompt) + (append (llm-chat-prompt-interactions prompt) + (list (make-llm-chat-prompt-interaction :role 'assistant :content response)))) + response)) (defvar-local llm-openai-current-response "" "The response so far from the server.") @@ -210,7 +219,11 @@ STREAMING if non-nil, turn on response streaming." (when-let ((response (llm-openai--get-partial-chat-response data))) (funcall partial-callback response))) :on-success-raw (lambda (data) - (funcall response-callback (llm-openai--get-partial-chat-response data))))) + (let ((response (llm-openai--get-partial-chat-response data))) + (setf (llm-chat-prompt-interactions prompt) + (append (llm-chat-prompt-interactions prompt) + (list (make-llm-chat-prompt-interaction :role 'assistant :content response)))) + (funcall response-callback response))))) (provide 'llm-openai) diff --git a/llm-tester.el b/llm-tester.el index 3a4e9a3cda..8b00851df7 100644 --- a/llm-tester.el +++ b/llm-tester.el @@ -132,6 +132,83 @@ (lambda (type message) (message "ERROR: Provider %s returned an error of type %s with message %s" (type-of provider) type message))))) +(defun llm-tester-chat-conversation (provider chat-func) + "Test that PROVIDER can handle a conversation via CHAT-FUNC. +CHAT-FUNC should insert the chat response to the buffer." + (message "Testing provider %s for conversation" (type-of provider)) + (with-temp-buffer + (let ((prompt (llm-make-simple-chat-prompt + "I'm currently testing conversational abilities. Please respond to each message with the ordinal number of your response, so just '1' for the first response, '2' for the second, and so on. It's important that I can verify that you are working with the full conversation history, so please let me know if you seem to be missing anything."))) + (push (llm-chat provider prompt) outputs) + (llm-chat-prompt-append-response prompt "This is the second message.") + (push (llm-chat provider prompt) outputs) + (llm-chat-prompt-append-response prompt "This is the third message.") + (push (llm-chat provider prompt) outputs) + (message "SUCCESS: Provider %s provided a conversation with responses %s" (type-of provider) + (nreverse outputs))))) + +(defun llm-tester-chat-conversation-sync (provider) + "Test that PROVIDER can handle a conversation." + (message "Testing provider %s for conversation" (type-of provider)) + (let ((prompt (llm-make-simple-chat-prompt + "I'm currently testing conversational abilities. Please respond to each message with the ordinal number of your response, so just '1' for the first response, '2' for the second, and so on. It's important that I can verify that you are working with the full conversation history, so please let me know if you seem to be missing anything.")) + (outputs nil)) + (push (llm-chat provider prompt) outputs) + (llm-chat-prompt-append-response prompt "This is the second message.") + (push (llm-chat provider prompt) outputs) + (llm-chat-prompt-append-response prompt "This is the third message.") + (push (llm-chat provider prompt) outputs) + (message "SUCCESS: Provider %s provided a conversation with responses %s" (type-of provider) + (nreverse outputs)))) + +(defun llm-tester-chat-conversation-async (provider) + "Test that PROVIDER can handle a conversation." + (message "Testing provider %s for conversation" (type-of provider)) + (let ((prompt (llm-make-simple-chat-prompt + "I'm currently testing conversational abilities. Please respond to each message with the ordinal number of your response, so just '1' for the first response, '2' for the second, and so on. It's important that I can verify that you are working with the full conversation history, so please let me know if you seem to be missing anything.")) + (outputs nil)) + (llm-chat-async provider prompt + (lambda (response) + (push response outputs) + (llm-chat-prompt-append-response prompt "This is the second message.") + (llm-chat-async provider prompt + (lambda (response) + (push response outputs) + (llm-chat-prompt-append-response prompt "This is the third message.") + (llm-chat-async provider prompt + (lambda (response) + (push response outputs) + (message "SUCCESS: Provider %s provided a conversation with responses %s" (type-of provider) (nreverse outputs))) + (lambda (type message) + (message "ERROR: Provider %s returned an error of type %s with message %s" (type-of provider) type message)))) + (lambda (type message) + (message "ERROR: Provider %s returned an error of type %s with message %s" (type-of provider) type message)))) + (lambda (type message) + (message "ERROR: Provider %s returned an error of type %s with message %s" (type-of provider) type message))))) + +(defun llm-tester-chat-conversation-streaming (provider) + "Test that PROVIDER can handle a conversation." + (message "Testing provider %s for conversation" (type-of provider)) + (let ((prompt (llm-make-simple-chat-prompt + "I'm currently testing conversational abilities. Please respond to each message with the ordinal number of your response, so just '1' for the first response, '2' for the second, and so on. It's important that I can verify that you are working with the full conversation history, so please let me know if you seem to be missing anything."))) + (let ((buf (get-buffer-create "*llm-streaming-conversation-tester*"))) + (llm-chat-streaming-to-point + provider prompt buf (with-current-buffer buf (point-max)) + (lambda () + (with-current-buffer buf (goto-char (point-max)) (insert "\n")) + (llm-chat-prompt-append-response prompt "This is the second message.") + (llm-chat-streaming-to-point + provider prompt + buf (with-current-buffer buf (point-max)) + (lambda () + (with-current-buffer buf (goto-char (point-max)) (insert "\n")) + (llm-chat-prompt-append-response prompt "This is the third message.") + (llm-chat-streaming-to-point + provider prompt buf (with-current-buffer buf (point-max)) + (lambda () + (message "SUCCESS: Provider %s provided a conversation with responses %s" (type-of provider) (with-current-buffer buf (buffer-string))) + (kill-buffer buf)))))))))) + (defun llm-tester-all (provider) "Test all llm functionality for PROVIDER." (llm-tester-embedding-sync provider) diff --git a/llm-vertex.el b/llm-vertex.el index 775d51317f..dbe88598cd 100644 --- a/llm-vertex.el +++ b/llm-vertex.el @@ -269,18 +269,27 @@ If STREAMING is non-nil, use the URL for the streaming API." :headers `(("Authorization" . ,(format "Bearer %s" (llm-vertex-key provider)))) :data (llm-vertex--chat-request-v1 prompt) :on-success (lambda (data) - (funcall response-callback (llm-vertex--chat-extract-response data))) + (let ((response (llm-vertex--chat-extract-response data))) + (setf (llm-chat-prompt-interactions prompt) + (append (llm-chat-prompt-interactions prompt) + (list (make-llm-chat-prompt-interaction :role 'assistant :content response)))) + (funcall response-callback response))) :on-error (lambda (_ data) (funcall error-callback 'error (llm-vertex--error-message data))))) (cl-defmethod llm-chat ((provider llm-vertex) prompt) (llm-vertex-refresh-key provider) - (llm-vertex--handle-response - (llm-request-sync (llm-vertex--chat-url provider nil) - :headers `(("Authorization" . ,(format "Bearer %s" (llm-vertex-key provider)))) - :data (llm-vertex--chat-request-v1 prompt)) - #'llm-vertex--chat-extract-response)) + (let ((response (llm-vertex--handle-response + (llm-request-sync + (llm-vertex--chat-url provider nil) + :headers `(("Authorization" . ,(format "Bearer %s" (llm-vertex-key provider)))) + :data (llm-vertex--chat-request-v1 prompt)) + #'llm-vertex--chat-extract-response))) + (setf (llm-chat-prompt-interactions prompt) + (append (llm-chat-prompt-interactions prompt) + (list (make-llm-chat-prompt-interaction :role 'assistant :content response)))) + response)) (cl-defmethod llm-chat-streaming ((provider llm-vertex) prompt partial-callback response-callback error-callback) (llm-vertex-refresh-key provider) @@ -291,7 +300,11 @@ If STREAMING is non-nil, use the URL for the streaming API." (when-let ((response (llm--vertex--get-partial-chat-ui-repsonse partial))) (funcall partial-callback response))) :on-success (lambda (data) - (funcall response-callback (llm-vertex--get-chat-response-ui data))) + (let ((response (llm-vertex--get-chat-response-ui data))) + (setf (llm-chat-prompt-interactions prompt) + (append (llm-chat-prompt-interactions prompt) + (list (make-llm-chat-prompt-interaction :role 'assistant :content response)))) + (funcall response-callback response))) :on-error (lambda (_ data) (funcall error-callback 'error (llm-vertex--error-message data))))) diff --git a/llm.el b/llm.el index 460fbee370..32561d08a7 100644 --- a/llm.el +++ b/llm.el @@ -72,8 +72,18 @@ EXAMPLES is a list of conses, where the car is an example inputs, and cdr is the corresponding example outputs. This is optional. INTERACTIONS is a list message sent by either the llm or the -user. It is a list of `llm-chat-prompt-interaction' objects. This -is required. +user. It is a either list of `llm-chat-prompt-interaction' +objects or list of an opaque converation ID (anything not a +`llm-chat-prompt-interaction') and the latest +`llm-chat-prompt-interaction' in the conversation to submit. When +building up a chat, the chat methods update this to a new value, +and the client is expected to append a new interaction to the +end, without introspecting the value otherwise. The function +`llm-chat-prompt-append-response' accomplishes that operation, and +should be used. 'Because this value updated by the called +function, for continuing chats, the whole prompt MUST be a +variable passed in to the chat function. INTERACTIONS is +required. TEMPERATURE is a floating point number with a minimum of 0, and maximum of 1, which controls how predictable the result is, with @@ -95,6 +105,14 @@ an LLM, and don't need the more advanced features that the `llm-chat-prompt' struct makes available." (make-llm-chat-prompt :interactions (list (make-llm-chat-prompt-interaction :role 'user :content text)))) +(defun llm-chat-prompt-append-response (prompt response &optional role) + "Append a new RESPONSE to PROMPT, to continue a conversation. +ROLE default to `user', which should almost always be what is needed." + (setf (llm-chat-prompt-interactions prompt) + (append (llm-chat-prompt-interactions prompt) + (list (make-llm-chat-prompt-interaction :role (or role 'user) + :content response))))) + (cl-defgeneric llm-nonfree-message-info (provider) "If PROVIDER is non-free, return info for a warning. This should be a cons of the name of the LLM, and the URL of the @@ -108,7 +126,10 @@ need to override it." (cl-defgeneric llm-chat (provider prompt) "Return a response to PROMPT from PROVIDER. -PROMPT is a `llm-chat-prompt'. The response is a string." +PROMPT is a `llm-chat-prompt'. The response is a string response by the LLM. + +The prompt's interactions list will be updated to encode the +conversation so far." (ignore provider prompt) (signal 'not-implemented nil)) @@ -124,8 +145,13 @@ PROMPT is a `llm-chat-prompt'. The response is a string." (cl-defgeneric llm-chat-async (provider prompt response-callback error-callback) "Return a response to PROMPT from PROVIDER. PROMPT is a `llm-chat-prompt'. -RESPONSE-CALLBACK receives the string response. -ERROR-CALLBACK receives the error response." + +RESPONSE-CALLBACK receives the final text. + +ERROR-CALLBACK receives the error response. + +The prompt's interactions list will be updated to encode the +conversation so far." (ignore provider prompt response-callback error-callback) (signal 'not-implemented nil)) @@ -143,7 +169,10 @@ RESPONSE-CALLBACK receives the each piece of the string response. It is called once after the response has been completed, with the final text. -ERROR-CALLBACK receives the error response." +ERROR-CALLBACK receives the error response. + +The prompt's interactions list will be updated to encode the +conversation so far." (ignore provider prompt partial-callback response-callback error-callback) (signal 'not-implemented nil))