branch: externals/llm commit 1e66e4ed3e153480e22c1837e78ad56a0d0060e7 Author: Andrew Hyatt <ahy...@gmail.com> Commit: GitHub <nore...@github.com>
Add reasoning control and support it for Claude and Ollama (#200) Other tools like Open AI and Gemini also have reasoning controls, but have not publicized the API for this, so they cannot yet support reasoning. This also adds support for getting Claude reasoning. As part of this, we also increased the possible `max_tokens` for Claude, which otherwise would have limited the reasoning output. --------- Co-authored-by: Andrew Hyatt <ahy...@continua.ai> --- NEWS.org | 4 ++++ README.org | 2 +- llm-claude.el | 37 ++++++++++++++++++++++++++++++++----- llm-integration-test.el | 16 ++++++++++++++++ llm-models.el | 2 +- llm-ollama.el | 5 +++++ llm-test.el | 14 +++++++------- llm.el | 17 +++++++++++++++-- 8 files changed, 81 insertions(+), 16 deletions(-) diff --git a/NEWS.org b/NEWS.org index ca626ab994..5ad76a1d7f 100644 --- a/NEWS.org +++ b/NEWS.org @@ -1,3 +1,7 @@ +* Version 0.27.0 +- Add =thinking= option to control the amount of thinking that happens for reasoning models. +- Fix incorrectly low default Claude max tokens +- Fix Claude extraction of text and reasoning results when reasoning * Version 0.26.1 - Add Claude 4 models - Fix error using Open AI for batch embeddings diff --git a/README.org b/README.org index 4469a584bd..0c06ff1cdc 100644 --- a/README.org +++ b/README.org @@ -171,7 +171,7 @@ For all callbacks, the callback will be executed in the buffer the function was - ~llm-chat-token-limit~. Gets the token limit for the chat model. This isn't possible for some backends like =llama.cpp=, in which the model isn't selected or known by this library. And the following helper functions: - - ~llm-make-chat-prompt text &keys context examples tools temperature max-tokens response-format non-standard-params~: This is how you make prompts. ~text~ can be a string (the user input to the llm chatbot), or a list representing a series of back-and-forth exchanges, of odd number, with the last element of the list representing the user's latest input. This supports inputting context (also commonly called a system prompt, although it isn't guaranteed to replace the actual system pr [...] + - ~llm-make-chat-prompt text &keys context examples tools temperature max-tokens response-format reasoning non-standard-params~: This is how you make prompts. ~text~ can be a string (the user input to the llm chatbot), or a list representing a series of back-and-forth exchanges, of odd number, with the last element of the list representing the user's latest input. This supports inputting context (also commonly called a system prompt, although it isn't guaranteed to replace the actual [...] - ~llm-chat-prompt-to-text prompt~: From a prompt, return a string representation. This is not usually suitable for passing to LLMs, but for debugging purposes. - ~llm-chat-streaming-to-point provider prompt buffer point finish-callback~: Same basic arguments as ~llm-chat-streaming~, but will stream to ~point~ in ~buffer~. - ~llm-chat-prompt-append-response prompt response role~: Append a new response (from the user, usually) to the prompt. The ~role~ is optional, and defaults to ~'user~. diff --git a/llm-claude.el b/llm-claude.el index 73c0b64637..6f108595db 100644 --- a/llm-claude.el +++ b/llm-claude.el @@ -56,7 +56,16 @@ `(:model ,(llm-claude-chat-model provider) :stream ,(if stream t :false) ;; Claude requires max_tokens - :max_tokens ,(or (llm-chat-prompt-max-tokens prompt) 4096) + :max_tokens ,(or (llm-chat-prompt-max-tokens prompt) + (cond ((string-match "opus-4-0" (llm-claude-chat-model provider)) + 32000) + ((or (string-match "sonnet-4-0" (llm-claude-chat-model provider)) + (string-match "sonnet-3-7" (llm-claude-chat-model provider))) + 64000) + ((string-match "opus" (llm-claude-chat-model provider)) + 4096) + (t + 8192))) :messages ,(vconcat (mapcar (lambda (interaction) @@ -90,6 +99,20 @@ (setq request (plist-put request :system system))) (when (llm-chat-prompt-temperature prompt) (setq request (plist-put request :temperature (llm-chat-prompt-temperature prompt)))) + (when (llm-chat-prompt-reasoning prompt) + (setq request (plist-put request :thinking + (let (thinking-plist) + (setq thinking-plist (plist-put thinking-plist + :type + (if (eq (llm-chat-prompt-reasoning prompt) 'none) + "disabled" "enabled"))) + (if (not (eq (llm-chat-prompt-reasoning prompt) 'none)) + (plist-put thinking-plist :budget_tokens + (pcase (llm-chat-prompt-reasoning prompt) + ('light 3000) + ('medium 10000) + ('maximum 32000)))) + thinking-plist)))) (append request (llm-provider-utils-non-standard-params-plist prompt)))) (defun llm-claude--multipart-content (content) @@ -132,9 +155,11 @@ (cl-defmethod llm-provider-chat-extract-result ((_ llm-claude) response) (let ((content (aref (assoc-default 'content response) 0))) - (if (equal (assoc-default 'type content) "text") - (assoc-default 'text content) - (format "Unsupported non-text response: %s" content)))) + (assoc-default 'text content))) + +(cl-defmethod llm-provider-extract-reasoning ((_ llm-claude) response) + (let ((content (aref (assoc-default 'content response) 0))) + (assoc-default 'thinking content))) (cl-defmethod llm-provider-streaming-media-handler ((_ llm-claude) receiver err-receiver) @@ -176,6 +201,8 @@ (cond ((equal type "text_delta") (funcall receiver `(:text ,(assoc-default 'text delta)))) + ((equal type "thinking_delta") + (funcall receiver `(:reasoning ,(assoc-default 'text delta)))) ((equal type "input_json_delta") (funcall receiver `(:tool-uses-raw ,(vector @@ -246,7 +273,7 @@ DATA is a vector of lists produced by `llm-provider-streaming-media-handler'." "Claude") (cl-defmethod llm-capabilities ((_ llm-claude)) - (list 'streaming 'tool-use 'streaming-tool-use 'image-input 'pdf-input)) + (list 'streaming 'tool-use 'streaming-tool-use 'image-input 'pdf-input 'reasoning)) (cl-defmethod llm-provider-append-to-prompt ((_ llm-claude) prompt result &optional tool-use-results) diff --git a/llm-integration-test.el b/llm-integration-test.el index 8f7bfdd324..bd54e2008d 100644 --- a/llm-integration-test.el +++ b/llm-integration-test.el @@ -382,6 +382,22 @@ else. We really just want to see if it's in the right ballpark." ;; Test that we can send the function back to the provider without error. (llm-chat provider prompt)))) +(llm-def-integration-test llm-reasoning (provider) + (when (member 'reasoning (llm-capabilities provider)) + (let ((prompt (llm-make-chat-prompt "Will interest rates fall in the next year?" :reasoning 'medium))) + (should (plist-get (llm-chat provider prompt t) :reasoning))))) + +(llm-def-integration-test llm-reasoning-streaming (provider) + (when (member 'streaming-reasoning (llm-capabilities provider)) + (let ((prompt (llm-make-chat-prompt "Will interest rates fall in the next year?" :reasoning 'medium)) + (result nil)) + (llm-chat-streaming provider prompt #'ignore + (lambda (response) (setq result response)) + (lambda (_ err) (error err)) t) + (while (null result) + (sleep-for 0.1)) + (should (plist-get result :reasoning))))) + (llm-def-integration-test llm-image-chat (provider) ;; On github, the emacs we use doesn't have image support, so we can't use ;; image objects. diff --git a/llm-models.el b/llm-models.el index 7b281894a3..d57823e1d2 100644 --- a/llm-models.el +++ b/llm-models.el @@ -252,7 +252,7 @@ REGEX is a regular expression that can be used to identify the model, uniquely ( :regex "gemma-?3") (make-llm-model :name "deepseek-r1" :symbol 'deepseek-r1 - :capabilities '(generation free-software) ;; MIT license + :capabilities '(generation reasoning free-software) ;; MIT license :context-length 128000 :regex "deepseek-r1") (make-llm-model diff --git a/llm-ollama.el b/llm-ollama.el index ef708c21cb..9047f264b3 100644 --- a/llm-ollama.el +++ b/llm-ollama.el @@ -196,6 +196,11 @@ These are just the text inside the tag, not the tag itself.")) (llm-ollama--response-format (llm-chat-prompt-response-format prompt))))) (setq request-plist (plist-put request-plist :stream (if streaming t :false))) + (when (llm-chat-prompt-reasoning prompt) + (setq request-plist (plist-put request-plist :think + (if (eq 'none (llm-chat-prompt-reasoning prompt)) + :false + 't)))) (when (llm-chat-prompt-temperature prompt) (setq options (plist-put options :temperature (llm-chat-prompt-temperature prompt)))) (when (llm-chat-prompt-max-tokens prompt) diff --git a/llm-test.el b/llm-test.el index 193ee7fd6a..1d68a5f726 100644 --- a/llm-test.el +++ b/llm-test.el @@ -110,11 +110,11 @@ :messages [(:role "user" :content "Hello world")] :stream :false) :claude (:model "model" - :max_tokens 4096 + :max_tokens 8192 :messages [(:role "user" :content "Hello world")] :stream :false) :claude-stream (:model "model" - :max_tokens 4096 + :max_tokens 8192 :messages [(:role "user" :content "Hello world")] :stream t)) (:name "Request with temperature" @@ -130,7 +130,7 @@ :options (:temperature 0.5) :stream :false) :claude (:model "model" - :max_tokens 4096 + :max_tokens 8192 :messages [(:role "user" :content "Hello world")] :temperature 0.5 :stream :false)) @@ -151,7 +151,7 @@ (:role "user" :content "Hello world")] :stream :false) :claude (:model "model" - :max_tokens 4096 + :max_tokens 8192 :messages [(:role "user" :content "Hello world")] :system "context\nHere are 2 examples of how to respond:\n\nUser: input1\nAssistant: output1\nUser: input2\nAssistant: output2" :stream :false)) @@ -170,7 +170,7 @@ (:role "user" :content "I am user!")] :stream :false) :claude (:model "model" - :max_tokens 4096 + :max_tokens 8192 :messages [(:role "user" :content "Hello world") (:role "assistant" :content "Hello human") (:role "user" :content "I am user!")] @@ -197,7 +197,7 @@ :images ["aW1hZ2UgZGF0YQ=="])] :stream :false) :claude (:model "model" - :max_tokens 4096 + :max_tokens 8192 :messages [(:role "user" :content [(:type "text" :text "What is this?") @@ -250,7 +250,7 @@ :required ["arg1"])))] :stream :false) :claude (:model "model" - :max_tokens 4096 + :max_tokens 8192 :messages [(:role "user" :content "Hello world")] :tools [(:name "func" diff --git a/llm.el b/llm.el index 1c11b0f7e5..40b5f62c15 100644 --- a/llm.el +++ b/llm.el @@ -71,7 +71,8 @@ See %s for the details on the restrictions on use." name tos))) "This stores all the information needed for a structured chat prompt. Use of this directly is deprecated, instead use `llm-make-chat-prompt'." - context examples interactions tools temperature max-tokens response-format non-standard-params) + context examples interactions tools temperature max-tokens response-format + reasoning non-standard-params) (cl-defstruct llm-chat-prompt-interaction "This defines a single interaction given as part of a chat prompt. @@ -232,7 +233,7 @@ instead." (cl-defun llm-make-chat-prompt (content &key context examples tools temperature max-tokens response-format - non-standard-params) + reasoning non-standard-params) "Create a `llm-chat-prompt' with CONTENT sent to the LLM provider. This is the most correct and easy way to create an @@ -300,6 +301,17 @@ usually turned into part of the interaction, and if so, they will be put in the first interaction of the prompt (before anything in PREVIOUS-INTERACTIONS). +REASONING controls the reasoning (also called thinking) the model does. +This generally enables a separate step of thinking about the answer +which is different from the answer, and will be returned either not at +all (if MULTIPART is false) or with the `:reasoning' key if MULTIPART is +true. This can be nil (whatever the default for the provider is), +`none', `light', `medium', and `maximum'. The settings will have +different exact effects per providers, but for providers that allow +control over the thinking tokens, `light' will result in a small number +of tokens used for thinking, `medium' would use half the maximum, and +`maximum' would use the maximum tokens. + NON-STANDARD-PARAMS is an alist of other options that the provider may or may not know how to handle. These are expected to be provider specific. Don't use this if you want the prompt to be used amongst @@ -322,6 +334,7 @@ vectors (if a list). This is optional." :temperature temperature :max-tokens max-tokens :response-format response-format + :reasoning reasoning :non-standard-params non-standard-params)) (defun llm-chat-prompt-append-response (prompt response &optional role)