branch: externals/llm
commit 069bd193efa094ee93cfadf51f0b81b1d5a9b98b
Author: Andrew Hyatt <ahy...@gmail.com>
Commit: Andrew Hyatt <ahy...@gmail.com>

    Adding Gemini & Vertex streaming using plz
    
    Currently, the streaming isn't actually streaming, but the problem doesn't
    appear to be on this end, but perhaps something in plz.
    
    This is relevant to the discussion in https://github.com/ahyatt/llm/pull/29.
---
 llm-gemini.el      | 33 ++++++++++---------
 llm-request-plz.el | 93 +++++++++++++++++++++++++++++++++++-------------------
 llm-tester.el      |  2 +-
 llm-vertex.el      | 50 ++++++++++++-----------------
 4 files changed, 99 insertions(+), 79 deletions(-)

diff --git a/llm-gemini.el b/llm-gemini.el
index 6d0dbb4fd4..d3690468aa 100644
--- a/llm-gemini.el
+++ b/llm-gemini.el
@@ -121,21 +121,24 @@ If STREAMING-P is non-nil, use the streaming endpoint."
                                                                        
(llm-vertex--error-message data))))))
 
 (cl-defmethod llm-chat-streaming ((provider llm-gemini) prompt 
partial-callback response-callback error-callback)
-  (let ((buf (current-buffer)))
-    (llm-request-plz-async (llm-gemini--chat-url provider t)
-                           :data (llm-gemini--chat-request prompt)
-                           :on-partial (lambda (partial)
-                                         (when-let ((response 
(llm-vertex--get-partial-chat-response partial)))
-                                           (when (> (length response) 0)
-                                             (llm-request-callback-in-buffer 
buf partial-callback response))))
-                           :on-success (lambda (data)
-                                         (llm-request-callback-in-buffer
-                                          buf response-callback
-                                          (llm-vertex--process-and-return
-                                           provider prompt data)))
-                           :on-error (lambda (_ data)
-                                       (llm-request-callback-in-buffer buf 
error-callback 'error
-                                                                       
(llm-vertex--error-message data))))))
+  (let ((buf (current-buffer))
+        (streamed-text ""))
+    (llm-request-plz-json-array
+     (llm-gemini--chat-url provider t)
+     :data (llm-gemini--chat-request prompt)
+     :on-element (lambda (element)
+                   (when-let ((response (llm-vertex--get-chat-response 
element)))
+                     (when (> (length response) 0)
+                       (setq streamed-text (concat streamed-text response))
+                       (llm-request-callback-in-buffer buf partial-callback 
response))))
+     :on-success (lambda (data)
+                   (llm-request-callback-in-buffer
+                    buf response-callback
+                    (llm-vertex--process-and-return
+                     provider prompt (if (> (length streamed-text) 0) 
streamed-text data))))
+     :on-error (lambda (_ data)
+                 (llm-request-callback-in-buffer buf error-callback 'error
+                                                 (llm-vertex--error-message 
data))))))
 
 (defun llm-gemini--count-token-url (provider)
   "Return the URL for the count token call, using PROVIDER."
diff --git a/llm-request-plz.el b/llm-request-plz.el
index 40b92c4518..d0219111a5 100644
--- a/llm-request-plz.el
+++ b/llm-request-plz.el
@@ -113,8 +113,8 @@ the curl process and an error message."
                     (cdr curl-error))))
         (t (user-error "Unexpected error: %s" error))))
 
-(cl-defun llm-request-plz-async (url &key headers data on-success 
on-success-raw on-error
-                                     on-partial timeout)
+(cl-defun llm-request-plz-async (url &key headers data on-success 
on-success-raw media-type
+                                     on-error timeout)
   "Make a request to URL.
 Nothing will be returned.
 
@@ -131,8 +131,9 @@ and required otherwise.
 ON-ERROR will be called with the error code and a response-body.
 This is required.
 
-ON-PARTIAL will be called with the potentially incomplete response
-body as a string.  This is an optional argument.
+MEDIA-TYPE is an optional argument that sets a media type, useful
+for streaming formats.  It is expected that this is only used by
+other methods in this file.
 
 ON-SUCCESS-RAW, if set, will be called in the buffer with the
 response body, and expect the response content. This is an
@@ -140,12 +141,13 @@ optional argument, and mostly useful for streaming.  If 
not set,
 the buffer is turned into JSON and passed to ON-SUCCESS."
   (plz-media-type-request
     'post url
-    :as 'string
+    :as (if media-type
+            `(media-types ,(cons media-type plz-media-types))
+            'string)
     :body (when data
             (encode-coding-string (json-encode data) 'utf-8))
     :headers (append headers
-                     '(("Accept-encoding" . "identity")
-                       ("Content-Type" . "application/json")))
+                     '(("Content-Type" . "application/json")))
     :then (lambda (response)
             (when on-success-raw
               (funcall on-success-raw response))
@@ -156,6 +158,41 @@ the buffer is turned into JSON and passed to ON-SUCCESS."
               (llm-request-plz--handle-error error on-error)))
     :timeout (or timeout llm-request-plz-timeout)))
 
+(cl-defun llm-request-plz-json-array (url &key headers data on-error on-success
+                                          on-element timeout)
+  "Make a request to URL.
+
+HEADERS will be added in the Authorization header, in addition to
+standard json header. This is optional.
+
+DATA will be jsonified and sent as the request body.
+This is required.
+
+ON-SUCCESS will be called with the response body as a json
+object. This is optional in the case that ON-SUCCESS-DATA is set,
+and required otherwise.
+
+ON-ELEMENT will be called with each new element in the enclosing
+JSON array that is being streamed.
+
+ON-ERROR will be called with the error code and a response-body.
+This is required.
+"
+  (llm-request-plz-async url
+                         :headers headers
+                         :data data
+                         :on-error on-error
+                         ;; Have to use :on-success-raw because :on-success 
will try to
+                         ;; convert to JSON, and this already should be JSON.
+                         :on-success-raw (lambda (resp)
+                                           (funcall on-success 
(plz-response-body resp)))
+                         :timeout timeout
+                         :media-type
+                         (cons 'application/json-array
+                               (plz-media-type:application/json-array
+                                :handler (lambda (resp)
+                                           (funcall on-element 
(plz-response-body resp)))))))
+
 (cl-defun llm-request-plz-event-stream (url &key headers data on-error 
on-success
                                             event-stream-handlers timeout)
   "Make a request to URL.
@@ -178,32 +215,22 @@ with the new event data as a string.
 ON-ERROR will be called with the error code and a response-body.
 This is required.
 "
-  (plz-media-type-request
-    'post url
-    :as `(media-types
-          ,(cons
-            (cons 'text/event-stream
-                  (plz-media-type:text/event-stream
-                   ;; Convert so that each event handler gets the body, not the
-                   ;; `plz-response' itself.
-                   :events (mapcar
-                            (lambda (cons)
-                              (cons (car cons)
-                                    (lambda (_ resp) (funcall (cdr cons) 
(plz-event-source-event-data resp)))))
-                            event-stream-handlers)))
-            plz-media-types))
-    :body (when data
-            (encode-coding-string (json-encode data) 'utf-8))
-    :headers (append headers
-                     '(("Accept-encoding" . "identity")
-                       ("Content-Type" . "application/json")))
-    :then (lambda (response)
-            (when on-success
-              (funcall on-success (plz-response-body response))))
-    :else (lambda (error)
-            (when on-error
-              (llm-request-plz--handle-error error on-error)))
-    :timeout (or timeout llm-request-plz-timeout)))
+  (llm-request-plz-async url
+                         :headers headers
+                         :data data
+                         :on-error on-error
+                         :on-success on-success
+                         :timeout timeout
+                         :media-type
+                         (cons 'text/event-stream
+                                (plz-media-type:text/event-stream
+                                 ;; Convert so that each event handler gets 
the body, not the
+                                 ;; `plz-response' itself.
+                                 :events (mapcar
+                                          (lambda (cons)
+                                            (cons (car cons)
+                                                  (lambda (_ resp) (funcall 
(cdr cons) (plz-event-source-event-data resp)))))
+                                          event-stream-handlers)))))
 
 ;; This is a useful method for getting out of the request buffer when it's time
 ;; to make callbacks.
diff --git a/llm-tester.el b/llm-tester.el
index f5fd9fdfa0..402f416d6e 100644
--- a/llm-tester.el
+++ b/llm-tester.el
@@ -132,7 +132,7 @@
       :interactions (list
                      (make-llm-chat-prompt-interaction
                       :role 'user
-                      :content "Write a medium length poem in iambic 
pentameter about the pleasures of using Emacs.  The poem should make snide 
references to vi."))
+                      :content "Write a long story about a magic backpack."))
       :temperature 0.5)
      (lambda (text)
        (unless (eq buf (current-buffer))
diff --git a/llm-vertex.el b/llm-vertex.el
index 0683a0e730..14fa218535 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -171,19 +171,6 @@ This handles different kinds of models."
                    ""))
              "NOTE: No response was sent back by the LLM, the prompt may have 
violated safety checks."))))
 
-(defun llm-vertex--get-partial-chat-response (response)
-  "Return the partial response from as much of RESPONSE as we can parse."
-  (with-temp-buffer
-    (insert response)
-    (let ((result ""))
-      ;; We just will parse every line that is "text": "..." and concatenate 
them.   
-      (save-excursion
-        (goto-char (point-min))
-        (while (re-search-forward (rx (seq (literal "\"text\": ")
-                                           (group-n 1 ?\" (* any) ?\") 
line-end)) nil t)
-          (setq result (concat result (json-read-from-string (match-string 
1))))))
-      result)))
-
 (defun llm-vertex--chat-request (prompt)
   "Return an alist with chat input for the streaming API.
 PROMPT contains the input to the call to the chat API."
@@ -284,7 +271,7 @@ ERROR-CALLBACK is called when an error is detected."
          (llm-provider-utils-process-result
           provider prompt
           (llm-vertex--normalize-function-calls
-           (llm-vertex--get-chat-response response)))))
+           (if (stringp response) response (llm-vertex--get-chat-response 
response))))))
     return-val))
 
 (defun llm-vertex--chat-url (provider &optional streaming)
@@ -324,22 +311,25 @@ If STREAMING is non-nil, use the URL for the streaming 
API."
 
 (cl-defmethod llm-chat-streaming ((provider llm-vertex) prompt 
partial-callback response-callback error-callback)
   (llm-vertex-refresh-key provider)
-  (let ((buf (current-buffer)))
-    (llm-request-async (llm-vertex--chat-url provider)
-                     :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
-                     :data (llm-vertex--chat-request prompt)
-                     :on-partial (lambda (partial)
-                                   (when-let ((response 
(llm-vertex--get-partial-chat-response partial)))
-                                     (when (> (length response) 0)
-                                       (llm-request-callback-in-buffer buf 
partial-callback response))))
-                     :on-success (lambda (data)
-                                   (llm-request-callback-in-buffer
-                                    buf response-callback
-                                    (llm-vertex--process-and-return
-                                     provider prompt data)))
-                     :on-error (lambda (_ data)
-                                 (llm-request-callback-in-buffer buf 
error-callback 'error
-                                                                 
(llm-vertex--error-message data))))))
+  (let ((buf (current-buffer))
+        (streamed-text ""))
+    (llm-request-plz-json-array
+     (llm-vertex--chat-url provider)
+     :headers `(("Authorization" . ,(format "Bearer %s" (llm-vertex-key 
provider))))
+     :data (llm-vertex--chat-request prompt)
+     :on-element (lambda (element)
+                   (when-let ((response (llm-vertex--get-chat-response 
element)))
+                     (when (> (length response) 0)
+                       (setq streamed-text (concat streamed-text response))
+                       (llm-request-callback-in-buffer buf partial-callback 
response))))
+     :on-success (lambda (data)
+                   (llm-request-callback-in-buffer
+                    buf response-callback
+                    (llm-vertex--process-and-return
+                     provider prompt (if (> (length streamed-text) 0) 
streamed-text data))))
+     :on-error (lambda (_ data)
+                 (llm-request-callback-in-buffer buf error-callback 'error
+                                                 (llm-vertex--error-message 
data))))))
 
 ;; Token counts
 ;; https://cloud.google.com/vertex-ai/docs/generative-ai/get-token-count

Reply via email to