branch: externals/llm
commit 6f9c604e584a9c5bc68e083c32eee6130f0f4231
Author: Roman Scherer <ro...@burningswell.com>
Commit: Roman Scherer <ro...@burningswell.com>

    Decode body and chunks using a coding system
    
    The coding system is taken from the "charset" parameter of the content
    type header, otherwise 'utf-8 will be assumed.
    
    The content types are symbols now. I eventually plan to do this for
    the event types as well at some point.
---
 llm-request-plz.el  |  2 +-
 plz-event-source.el |  5 +--
 plz-media-type.el   | 95 +++++++++++++++++++++++++++++++++++++++++------------
 3 files changed, 78 insertions(+), 24 deletions(-)

diff --git a/llm-request-plz.el b/llm-request-plz.el
index 03eb6ae1cc..40b92c4518 100644
--- a/llm-request-plz.el
+++ b/llm-request-plz.el
@@ -182,7 +182,7 @@ This is required.
     'post url
     :as `(media-types
           ,(cons
-            (cons "text/event-stream"
+            (cons 'text/event-stream
                   (plz-media-type:text/event-stream
                    ;; Convert so that each event handler gets the body, not the
                    ;; `plz-response' itself.
diff --git a/plz-event-source.el b/plz-event-source.el
index fe09cdc4a8..0bdb7ebf5e 100644
--- a/plz-event-source.el
+++ b/plz-event-source.el
@@ -371,7 +371,7 @@
   "Return the media types of the event SOURCE."
   (with-slots (handlers) source
     (let ((media-type (plz-media-type:text/event-stream :events handlers)))
-      (cons (cons "text/event-stream" media-type) plz-media-types))))
+      (cons (cons 'text/event-stream media-type) plz-media-types))))
 
 (cl-defmethod plz-event-source-open ((source plz-http-event-source))
   "Open a connection to the URL of the event SOURCE."
@@ -401,7 +401,8 @@
 ;; Content Type: text/event-stream
 
 (defclass plz-media-type:text/event-stream 
(plz-media-type:application/octet-stream)
-  ((name :initform "text/event-stream")
+  ((type :initform 'text)
+   (subtype :initform 'event-stream)
    (events :documentation "Association list from event type to handler."
            :initarg :events
            :initform nil
diff --git a/plz-media-type.el b/plz-media-type.el
index 4b1f493e66..e7f13f8a87 100644
--- a/plz-media-type.el
+++ b/plz-media-type.el
@@ -35,11 +35,37 @@
 (require 'plz)
 
 (defclass plz-media-type ()
-  ((name
-    :documentation "The name of the media type."
-    :initarg :name
-    :initform "application/octet-stream"
-    :type string)))
+  ((type
+    :documentation "The media type."
+    :initarg :type
+    :type symbol)
+   (subtype
+    :documentation "The media subtype."
+    :initarg :subtype
+    :subtype symbol)
+   (parameters
+    :documentation "The parameters of the media type."
+    :initarg :parameters
+    :initform nil
+    :subtype list)))
+
+(defun plz-media-type-charset (media-type)
+  "Return the character set of the MEDIA-TYPE."
+  (with-slots (parameters) media-type
+    (alist-get "charset" parameters nil nil #'equal)))
+
+(defun plz-media-type-coding-system (media-type)
+  "Return the coding system of the MEDIA-TYPE."
+  (coding-system-from-name (or (plz-media-type-charset media-type) "UTF-8")))
+
+(defun plz-media-type-name (media-type)
+  "Return the name of the MEDIA-TYPE as a string."
+  (with-slots (type subtype) media-type
+    (format "%s/%s" type subtype)))
+
+(defun plz-media-type-symbol (media-type)
+  "Return the name of the MEDIA-TYPE as a symbol."
+  (intern (plz-media-type-name media-type)))
 
 (cl-defgeneric plz-media-type-else (media-type error)
   "Transform the ERROR into a format suitable for MEDIA-TYPE.")
@@ -50,22 +76,43 @@
 (cl-defgeneric plz-media-type-process (media-type process chunk)
   "Process the CHUNK according to MEDIA-TYPE using PROCESS.")
 
+(defun plz-media-type-parse (header)
+  "Parse the Content-Type HEADER.
+
+Return a cons cell where the car is the MIME type, and the cdr is
+an alist of parameters."
+  (unless (or (null header) (string-blank-p header))
+    (let* ((components (split-string header ";"))
+           (mime-type (string-trim (car components)))
+           (parameters-list (cdr components))
+           (parameters-alist '()))
+      (dolist (param parameters-list parameters-alist)
+        (let* ((key-value (split-string param "="))
+               (key (string-trim (car key-value)))
+               (value (string-trim (cadr key-value) "\"")))
+          (setq parameters-alist (cons (cons key value) parameters-alist))))
+      (let ((parts (split-string mime-type "/")))
+        (plz-media-type
+         :type (intern (car parts))
+         :subtype (intern (cadr parts))
+         :parameters (nreverse parameters-alist))))))
+
 (defun plz-media-type--content-type (response)
   "Return the content type header of RESPONSE, or nil if it's not set."
   (let ((headers (plz-response-headers response)))
     (when-let (header (cdr (assoc 'content-type headers)))
-      (replace-regexp-in-string "\s*\\(;.*\\)?" "" header))))
+      (plz-media-type-parse header))))
 
 (defun plz-media--type-find (media-types media-type)
   "Lookup the MEDIA-TYPE in MEDIA-TYPES."
-  (or (alist-get media-type media-types nil nil #'equal)
+  (or (alist-get (plz-media-type-symbol media-type) media-types)
       (alist-get t media-types)
       (plz-media-type:application/octet-stream)))
 
 (defun plz-media-type--of-response (media-types response)
   "Lookup the content type of RESPONSE in MEDIA-TYPES."
   (let ((media-type (plz-media-type--content-type response)))
-    (plz-media--type-find media-types media-type)))
+    (clone (plz-media--type-find media-types media-type))))
 
 (defvar-local plz-media-type--current nil
   "The media type of the process buffer.")
@@ -89,8 +136,10 @@ CHUNK is a part of the HTTP body."
     (with-current-buffer (process-buffer process)
       (let ((moving (= (point) (process-mark process))))
         (if-let (media-type plz-media-type--current)
-            (let ((response plz-media-type--response))
-              (setf (plz-response-body response) chunk)
+            (let ((coding-system (plz-media-type-coding-system media-type))
+                  (response plz-media-type--response))
+              (setf (plz-response-body response)
+                    (decode-coding-string chunk coding-system))
               (plz-media-type-process media-type process response))
           (progn
             (save-excursion
@@ -102,10 +151,13 @@ CHUNK is a part of the HTTP body."
               (let ((body-start (point)))
                 (goto-char (point-min))
                 (let* ((response (prog1 (plz--response) (widen)))
-                       (media-type (plz-media-type--of-response media-types 
response)))
+                       (media-type (plz-media-type--of-response media-types 
response))
+                       (coding-system (plz-media-type-coding-system 
media-type)))
                   (setq-local plz-media-type--current media-type)
                   (when-let (body (plz-response-body response))
                     (when (> (length body) 0)
+                      (setf (plz-response-body response)
+                            (decode-coding-string body coding-system))
                       (delete-region body-start (point-max))
                       (set-marker (process-mark process) (point))
                       (plz-media-type-process media-type process response)))
@@ -117,7 +169,8 @@ CHUNK is a part of the HTTP body."
 ;; Content Type: application/octet-stream
 
 (defclass plz-media-type:application/octet-stream (plz-media-type)
-  ((name :initform "application/octet-stream")))
+  ((type :initform 'application)
+   (subtype :initform 'octet-stream)))
 
 (cl-defmethod plz-media-type-else ((media-type 
plz-media-type:application/octet-stream) error)
   "Transform the ERROR into a format suitable for MEDIA-TYPE."
@@ -142,8 +195,7 @@ CHUNK is a part of the HTTP body."
 ;; Content Type: application/json
 
 (defclass plz-media-type:application/json 
(plz-media-type:application/octet-stream)
-  ((name
-    :initform "application/json")
+  ((subtype :initform 'json)
    (array-type
     :documentation "Specifies which Lisp type is used to represent arrays.  It 
can be
 `array' (the default) or `list'."
@@ -231,7 +283,7 @@ be `hash-table', `alist' (the default) or `plist'."
 ;; Content Type: application/x-ndjson
 
 (defclass plz-media-type:application/x-ndjson (plz-media-type:application/json)
-  ((name :initform "application/x-ndjson")
+  ((subtype :initform 'x-ndjson)
    (handler
     :documentation "A function that will be called for each line that contains 
a JSON object."
     :initarg :handler
@@ -274,7 +326,7 @@ be `hash-table', `alist' (the default) or `plist'."
 ;; Content Type: application/xml
 
 (defclass plz-media-type:application/xml 
(plz-media-type:application/octet-stream)
-  ((name :initform "application/xml")))
+  ((subtype :initform 'xml)))
 
 (cl-defmethod plz-media-type-then ((media-type plz-media-type:application/xml) 
response)
   "Transform the RESPONSE into a format suitable for MEDIA-TYPE."
@@ -285,13 +337,14 @@ be `hash-table', `alist' (the default) or `plist'."
 ;; Content Type: text/html
 
 (defclass plz-media-type:text/html (plz-media-type:application/xml)
-  ((name :initform "text/html")))
+  ((type :initform 'text)
+   (subtype :initform 'xml)))
 
 (defvar plz-media-types
-  `(("application/json" . ,(plz-media-type:application/json))
-    ("application/octet-stream" . ,(plz-media-type:application/octet-stream))
-    ("application/xml" . ,(plz-media-type:application/xml))
-    ("text/html" . ,(plz-media-type:text/html))
+  `((application/json . ,(plz-media-type:application/json))
+    (application/octet-stream . ,(plz-media-type:application/octet-stream))
+    (application/xml . ,(plz-media-type:application/xml))
+    (text/html . ,(plz-media-type:text/html))
     (t . ,(plz-media-type:application/octet-stream)))
   "Alist from media type to content type.")
 

Reply via email to