branch: externals/llm
commit 4058691d3f9cb8324c63212260728cc7b7cc1699
Author: Hraban <[email protected]>
Commit: GitHub <[email protected]>

    fix: OpenAI API keys passed as multibyte strings (#44)
    
    Emacs has two types of strings: multibyte and unibyte. The request library 
is
    essentially a giant ‘concat’ call, which converts the entire result to 
multibyte
    if any single component is multibyte, including the headers. Even if you 
encoded
    the body: that effect will be spoiled by a single multibyte header string. 
This
    is regardless of the header actually containing multibyte characters: while 
an
    Emacs string literal containing only simple characters will be unibyte, an 
API
    key fetched from an external source will often be multibyte,
    e.g. ‘shell-command-to-string’.
    
    Example:
    
    (dolist (x (list
                "x"
                (shell-command-to-string "printf x")
                (encode-coding-string (shell-command-to-string "printf x") 
'utf-8)))
      (let ((s (concat x (encode-coding-string "é" 'utf-8))))
        (message
         "%S: %s(%s) %s, %s"
         s
         (multibyte-string-p s)
         (multibyte-string-p x)
         (string-bytes s)
         (length s))))
    
    Output:
    
    "x\303\251": nil(nil) 3, 3
    "x\303\251": t(t) 5, 3
    "x\303\251": nil(nil) 3, 3
    
    And:
    
    (multibyte-string-p "foo") ; NIL
    (multibyte-string-p "fôo") ; T
---
 llm-openai.el | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/llm-openai.el b/llm-openai.el
index 40f71be9e6..6020acba1a 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -91,8 +91,13 @@ MODEL is the embedding model to use, or nil to use the 
default.."
   "Return the headers to use for a request from PROVIDER.")
 
 (cl-defmethod llm-openai--headers ((provider llm-openai))
-  (when (llm-openai-key provider)
-    `(("Authorization" . ,(format "Bearer %s" (llm-openai-key provider))))))
+  (when-let ((key (llm-openai-key provider)))
+    ;; Encode the API key to ensure it is unibyte. The request library gets
+    ;; confused by multibyte headers, which turn the entire body multibyte if
+    ;; there’s a non-ascii character, regardless of encoding. And API keys are
+    ;; likely to be obtained from external sources like 
shell-command-to-string,
+    ;; which always returns multibyte.
+    `(("Authorization" . ,(format "Bearer %s" (encode-coding-string key 
'utf-8))))))
 
 (cl-defmethod llm-provider-headers ((provider llm-openai))
   (llm-openai--headers provider))

Reply via email to