branch: externals/llm commit 99f9fab641258266f7e143ee94d8d1fdc82df62a Author: Andrew Hyatt <ahy...@gmail.com> Commit: GitHub <nore...@github.com>
Add ability to get JSON object as a specific schema (#123) This will close request https://github.com/ahyatt/llm/issues/47. --- NEWS.org | 3 ++- README.org | 28 +++++++++++++++++++++++++++- llm-integration-test.el | 16 ++++++++++++++++ llm-ollama.el | 12 ++++++++++-- llm-openai.el | 17 +++++++++++++++-- llm-provider-utils-test.el | 22 ++++++++++++++++++++++ llm-provider-utils.el | 37 +++++++++++++++++++++++++++++++++++++ llm-vertex.el | 12 ++++++++++-- llm.el | 11 +++++++++-- 9 files changed, 148 insertions(+), 10 deletions(-) diff --git a/NEWS.org b/NEWS.org index b026b7e5a3..929897bfb3 100644 --- a/NEWS.org +++ b/NEWS.org @@ -1,4 +1,5 @@ -* Version 0.19.2 +* Version 0.20.0 +- Add ability to output according to a JSON spec. - Add Gemini 2.0 Flash and Llama 3.3 and QwQ models. * Version 0.19.1 - Fix Open AI context length sizes, which are mostly smaller than advertised. diff --git a/README.org b/README.org index 0784c96142..c3a4b2694f 100644 --- a/README.org +++ b/README.org @@ -149,10 +149,36 @@ For all callbacks, the callback will be executed in the buffer the function was - ~llm-chat-token-limit~. Gets the token limit for the chat model. This isn't possible for some backends like =llama.cpp=, in which the model isn't selected or known by this library. And the following helper functions: - - ~llm-make-chat-prompt text &keys context examples functions temperature max-tokens response-format non-standard-params~: This is how you make prompts. ~text~ can be a string (the user input to the llm chatbot), or a list representing a series of back-and-forth exchanges, of odd number, with the last element of the list representing the user's latest input. This supports inputting context (also commonly called a system prompt, although it isn't guaranteed to replace the actual syste [...] + - ~llm-make-chat-prompt text &keys context examples functions temperature max-tokens response-format non-standard-params~: This is how you make prompts. ~text~ can be a string (the user input to the llm chatbot), or a list representing a series of back-and-forth exchanges, of odd number, with the last element of the list representing the user's latest input. This supports inputting context (also commonly called a system prompt, although it isn't guaranteed to replace the actual syste [...] - ~llm-chat-prompt-to-text prompt~: From a prompt, return a string representation. This is not usually suitable for passing to LLMs, but for debugging purposes. - ~llm-chat-streaming-to-point provider prompt buffer point finish-callback~: Same basic arguments as ~llm-chat-streaming~, but will stream to ~point~ in ~buffer~. - ~llm-chat-prompt-append-response prompt response role~: Append a new response (from the user, usually) to the prompt. The ~role~ is optional, and defaults to ~'user~. +*** JSON schema +By using the ~response-format~ argument to ~llm-make-chat-prompt~, you can ask the LLM to return items according to a specified JSON schema, based on the [[https://json-schema.org][JSON Schema Spec]]. Not everything is supported, but the most commonly used parts are. To specify the JSON schema, we use a plist-based approach. JSON objects are defined with ~(:type object :properties (:<var1> <schema1> :<var2> <schema2> ... :<varn> <scheman>) :required (<req var1> ... <req varn>))~. Arr [...] + +Some examples: +#+begin_src emacs-lisp +(llm-chat my-provider (llm-make-chat-prompt + "How many countries are there? Return the result as JSON." + :response-format + '(:type object :properties (:num (:type integer)) :required (num)))) +#+end_src + +#+RESULTS: +: {"num":195} + +#+begin_src emacs-lisp +(llm-chat ash/llm-openai-small (llm-make-chat-prompt + "Which editor is hard to quit? Return the result as JSON." + :response-format + '(:type object :properties (:editor (:enum ("emacs" "vi" "vscode")) + :authors (:type array :items (:type string))) + :required (editor authors)))) +#+end_src + +#+RESULTS: +: {"editor":"vi","authors":["Bram Moolenaar","Bill Joy"]} + ** Logging Interactions with the =llm= package can be logged by setting ~llm-log~ to a non-nil value. This should be done only when developing. The log can be found in the =*llm log*= buffer. ** How to handle conversations diff --git a/llm-integration-test.el b/llm-integration-test.el index 600188dd90..edfc6be768 100644 --- a/llm-integration-test.el +++ b/llm-integration-test.el @@ -295,6 +295,22 @@ else. We really just want to see if it's in the right ballpark." (should (stringp result)) (should (llm-integration-test-string-eq "owl" (string-trim (downcase result))))))) +(llm-def-integration-test llm-json-test (provider) + (when (member 'json-response (llm-capabilities provider)) + (let ((result (llm-chat + provider + (llm-make-chat-prompt + "List the 3 largest cities in France in order of population, giving the results in JSON." + :response-format + '(:type object + :properties + (:cities (:type array :items (:type string))) + :required (cities)))))) + (should (equal + '(:cities ["Paris" "Marseille" "Lyon"]) + (let ((json-object-type 'plist)) + (json-read-from-string result))))))) + (llm-def-integration-test llm-count-tokens (provider) (let ((result (llm-count-tokens provider "What is the capital of France?"))) (should (integerp result)) diff --git a/llm-ollama.el b/llm-ollama.el index c428e25879..ce774466e0 100644 --- a/llm-ollama.el +++ b/llm-ollama.el @@ -108,6 +108,12 @@ PROVIDER is the llm-ollama provider." "Return the chat response from the server RESPONSE." (assoc-default 'content (assoc-default 'message response))) +(defun llm-ollama--response-format (format) + "Return the response format for FORMAT." + (if (eq format 'json) + :json + (llm-provider-utils-json-schema format))) + (cl-defmethod llm-provider-chat-request ((provider llm-ollama) prompt streaming) (let (request-alist messages options) (setq messages @@ -144,8 +150,10 @@ PROVIDER is the llm-ollama provider." (when (llm-chat-prompt-functions prompt) (push `("tools" . ,(mapcar #'llm-provider-utils-openai-function-spec (llm-chat-prompt-functions prompt))) request-alist)) - (when (eq 'json (llm-chat-prompt-response-format prompt)) - (push `("format" . ,(llm-chat-prompt-response-format prompt)) request-alist)) + (when (llm-chat-prompt-response-format prompt) + (push `("format" . ,(llm-ollama--response-format + (llm-chat-prompt-response-format prompt))) + request-alist)) (push `("stream" . ,(if streaming t :json-false)) request-alist) (when (llm-chat-prompt-temperature prompt) (push `("temperature" . ,(llm-chat-prompt-temperature prompt)) options)) diff --git a/llm-openai.el b/llm-openai.el index 7d9d3555d7..668ed72775 100644 --- a/llm-openai.el +++ b/llm-openai.el @@ -154,6 +154,17 @@ PROVIDER is the Open AI provider struct." (cl-defmethod llm-provider-chat-extract-error ((provider llm-openai) err-response) (llm-provider-embedding-extract-error provider err-response)) +(defun llm-openai--response-format (format) + "Return the Open AI response format for FORMAT." + (if (eq format 'json) '(("type" . "json_object")) + ;; If not JSON, this must be a json response spec. + `(("type" . "json_schema") + ("json_schema" . (("name" . "response") + ("strict" . t) + ("schema" . ,(append + (llm-provider-utils-json-schema format) + '(("additionalProperties" . :json-false))))))))) + (cl-defmethod llm-provider-chat-request ((provider llm-openai) prompt streaming) "From PROMPT, create the chat request data to send. PROVIDER is the Open AI provider. @@ -199,8 +210,10 @@ STREAMING if non-nil, turn on response streaming." (llm-chat-prompt-interactions prompt))) request-alist) (push `("model" . ,(llm-openai-chat-model provider)) request-alist) - (when (eq 'json (llm-chat-prompt-response-format prompt)) - (push '("response_format" . (("type" . "json_object"))) request-alist)) + (when (llm-chat-prompt-response-format prompt) + (push `("response_format" . ,(llm-openai--response-format + (llm-chat-prompt-response-format prompt))) + request-alist)) (when (llm-chat-prompt-temperature prompt) (push `("temperature" . ,(* (llm-chat-prompt-temperature prompt) 2.0)) request-alist)) (when (llm-chat-prompt-max-tokens prompt) diff --git a/llm-provider-utils-test.el b/llm-provider-utils-test.el index b73bf7c741..a6a3e5fb36 100644 --- a/llm-provider-utils-test.el +++ b/llm-provider-utils-test.el @@ -94,5 +94,27 @@ (should (equal "Previous interactions:\n\nUser: Hello\nAssistant: Hi! How can I assist you?\n\nThe current conversation follows:\n\nEarl Grey, hot." (llm-chat-prompt-interaction-content (nth 0 (llm-chat-prompt-interactions prompt-for-second-request))))))) +(ert-deftest llm-provider-utils-json-schema () + (should (equal '((type . object) + (properties + (cities + (type . array) + (items + (type . string)))) + (required . (cities))) + (llm-provider-utils-json-schema + '(:type object + :properties + (:cities (:type array :items (:type string))) + :required (cities))))) + (should (equal '((type . boolean)) + (llm-provider-utils-json-schema '(:type boolean)))) + (should (equal '((type . object) + (properties . ((data . ((enum . ("pizza" "calzone" "pasta"))))))) + (llm-provider-utils-json-schema + '(:type object + :properties + (:data (:enum ("pizza" "calzone" "pasta")))))))) + (provide 'llm-provider-utils-test) ;;; llm-provider-utils-test.el ends here diff --git a/llm-provider-utils.el b/llm-provider-utils.el index 84de0fd9e8..cf63020aa5 100644 --- a/llm-provider-utils.el +++ b/llm-provider-utils.el @@ -478,6 +478,43 @@ If MODEL cannot be found, warn and return DEFAULT, which by default is 4096." (warn "No model predefined for model %s, using restrictive defaults" model) (or default 4096)))) +(defun llm-provider--decolon (sym) + "Remove a colon from the beginnging of SYM." + (let ((s (symbol-name sym))) + (if (string-prefix-p ":" s) + (intern (substring s 1)) + sym))) + +(defun llm-provider-utils-json-schema (spec) + "Return a JSON schema object from SPEC. +This is a plist that represents a JSON type specification. +An example is `(:type object + :properties + (:cities (:type array :items (:type string))) + :required (cities))'" + (let ((schema `((type . ,(plist-get spec :type))))) + (pcase (plist-get spec :type) + ('object + (let ((properties (plist-get spec :properties))) + (setq schema + (push (cons 'properties + (mapcar (lambda (pair) + (cons (llm-provider--decolon (car pair)) + (llm-provider-utils-json-schema (cadr pair)))) + (seq-partition properties 2))) + schema)) + (when (plist-get spec :required) + (push (cons 'required (plist-get spec :required)) schema)) + (nreverse schema))) + ('array + (let ((items (plist-get spec :items))) + (push (cons 'items (llm-provider-utils-json-schema items)) schema)) + (nreverse schema)) + ('nil (if (plist-get spec :enum) + `((enum . ,(plist-get spec :enum))) + (error "Unknown JSON schema type: %s" (plist-get spec :type)))) + (_ schema)))) + (defun llm-provider-utils-openai-arguments (args) "Convert ARGS to the Open AI function calling spec. ARGS is a list of `llm-function-arg' structs." diff --git a/llm-vertex.el b/llm-vertex.el index 065558c7a1..927f94f25d 100644 --- a/llm-vertex.el +++ b/llm-vertex.el @@ -235,6 +235,10 @@ the key must be regenerated every hour." (llm-chat-prompt-functions prompt))))) (llm-vertex--chat-parameters prompt))) +(defun llm-vertex--response-schema (schema) + "Return vertex SCHEMA from our standard schema spec." + (llm-provider-utils-json-schema schema)) + (defun llm-vertex--chat-parameters (prompt) "From PROMPT, create the parameters section. Return value is a cons for adding to an alist, unless there is @@ -245,8 +249,12 @@ nothing to add, in which case it is nil." params-alist)) (when (llm-chat-prompt-max-tokens prompt) (push `(maxOutputTokens . ,(llm-chat-prompt-max-tokens prompt)) params-alist)) - (pcase (llm-chat-prompt-response-format prompt) - ('json (push '("response_mime_type" . "application/json") params-alist))) + (when-let ((format (llm-chat-prompt-response-format prompt))) + (push '("response_mime_type" . "application/json") params-alist) + (unless (eq 'json format) + (push `("response_schema" . ,(llm-vertex--response-schema + (llm-chat-prompt-response-format prompt))) + params-alist))) (when params-alist `((generation_config . ,params-alist))))) diff --git a/llm.el b/llm.el index e30ddcfbcb..5229652aae 100644 --- a/llm.el +++ b/llm.el @@ -277,11 +277,18 @@ This is not required. MAX-TOKENS is the maximum number of tokens to generate. This is optional. -If RESPONSE-FORMAT is `json' (the currently only accepted value), we +If RESPONSE-FORMAT is `json' (the currently only accepted symbol), we will attempt to force ouput to fit the format. This should not be used with function calling. If this is set the instructions to the LLM should tell the model about the format, for example with JSON format by -including examples or describing the schema. +including examples or describing the schema. This can also be a +structure defining the JSON schema. The structure is plist that can be +either `(:type <type> <additional-properties...>)', or in the case of +enums `(:enum (<val1> .. <valn>))'. LLMs will often require the +top-level schema passed in will be an object: `(:type object +:properties (:val schema :other-val other-schema) :required (val +other-val))'. Often, all properties must be required. Arrays can be +specified with `(:type array :items <schema>)'. CONTEXT, EXAMPLES, FUNCTIONS, TEMPERATURE, and MAX-TOKENS are usually turned into part of the interaction, and if so, they will