branch: elpa/pg commit c68ecddd29f1eb8b9d7be67dac7df63a92514c9b Author: Eric Marsden <eric.mars...@risk-engineering.org> Commit: Eric Marsden <eric.mars...@risk-engineering.org>
Fix the large object functionality to work with v3 of the wire protocol. --- CHANGELOG.md | 5 ++ pg-lo.el | 186 +++++++++++++++++++++++++++++++++++--------------------- test/test-pg.el | 77 ++++++++++++++++++++--- 3 files changed, 190 insertions(+), 78 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17b0fcddc5..f16aed3858 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # Changelog +## [0.57] - Unreleased + +- Fix the large object functionality to work with version 3 of the frontend-backend wire protocol. + + ## [0.56] - 2025-07-20 - Allow for two successive messages of type `ErrorMessage` (which is unusual, but used by the OctoDB diff --git a/pg-lo.el b/pg-lo.el index 7ffdf686d7..ec5d4f5bea 100644 --- a/pg-lo.el +++ b/pg-lo.el @@ -1,6 +1,6 @@ ;;; pg-lo.el --- Support for PostgreSQL large objects -*- lexical-binding: t -*- ;; -;; Copyright: (C) 2024 Eric Marsden +;; Copyright: (C) 2024-2025 Eric Marsden ;; Author: Eric Marsden <eric.mars...@risk-engineering.org> ;; SPDX-License-Identifier: GPL-3.0-or-later @@ -9,15 +9,14 @@ ;; Humphrey: Who is Large and to what does he object? ;; -;; Large objects are the PostgreSQL way of doing what most databases -;; call BLOBs (binary large objects). In addition to being able to -;; stream data to and from large objects, PostgreSQL's -;; object-relational capabilities allow the user to provide functions -;; which act on the objects. +;; Large objects are the PostgreSQL way of doing what most databases call BLOBs +;; (binary large objects). In addition to being able to stream data to and from +;; large (up to 4TB in size) objects, PostgreSQL's object-relational +;; capabilities allow the user to provide functions which act on the objects. ;; -;; For example, the user can define a new type called "circle", and -;; define a C or Tcl function called `circumference' which will act on -;; circles. There is also an inheritance mechanism in PostgreSQL. +;; For example, the user can define a new type called "circle", and define a C +;; or Tcl function called `circumference' which will act on circles. There is +;; also an inheritance mechanism in PostgreSQL. ;;; Code: @@ -33,17 +32,12 @@ (declare-function pg-send "pg" (con str &optional bytes)) (declare-function pg-send-uint "pg" (con num bytes)) (declare-function pg-send-char "pg" (con char)) +(declare-function pg-send-octets "pg" (con octets)) (declare-function pg-connection-set-busy "pg" (con busy)) (declare-function pg-result "pg" (result what &rest arg)) (declare-function pg-exec "pg" (con &rest args)) -(defconst pg--INV_ARCHIVE 65536) ; fe-lobj.c -(defconst pg--INV_WRITE 131072) -(defconst pg--INV_READ 262144) -(defconst pg--LO_BUFIZE 1024) -(defconst pg--MAX_MESSAGE_LEN 8192) ; libpq-fe.h - (defvar pg-lo-initialized nil) (defvar pg-lo-functions '()) @@ -66,9 +60,11 @@ (pg-result res :tuples)) (setq pg-lo-initialized t))) -;; fn is either an integer, in which case it is the OID of an element -;; in the pg_proc table, and otherwise it is a string which we look up -;; in the alist `pg-lo-functions' to find the corresponding OID. +;; Interface to PostgreSQL's "fast-path" interface, which makes it possible to +;; send simple function calls to the server. FN is either an integer, in which +;; case it is the OID of an element in the pg_proc table, and otherwise it is a +;; string which we look up in the alist `pg-lo-functions' to find the +;; corresponding OID. (cl-defun pg-fn (con fn integer-result &rest args) (pg-connection-set-busy con t) (unless pg-lo-initialized @@ -80,43 +76,63 @@ ((assoc fn pg-lo-functions) ; blech (cdr (assoc fn pg-lo-functions))) (t - (error "Unknown builtin function %s" fn))))) + (let ((msg (format "Unknown builtin function %s" fn))) + (signal 'pg-user-error (list msg))))))) + ;; https://www.postgresql.org/docs/17/protocol-message-formats.html (pg-send-char con ?F) - (pg-send-char con 0) + (let* ((arg-len (length args)) + (msg-len (+ 4 4 2 (* 2 arg-len) 2 + (cl-loop for arg in args + sum (+ 4 (if (integerp arg) 4 (length arg)))) + 2))) + (pg-send-uint con msg-len 4)) (pg-send-uint con fnid 4) - (pg-send-uint con (length args) 4) - (mapc (lambda (arg) - (cond ((integerp arg) - (pg-send-uint con 4 4) - (pg-send-uint con arg 4)) - ((stringp arg) - (pg-send-uint con (length arg) 4) - (pg-send con arg)) - (t - (error "Unknown fastpath type %s" arg)))) - args) + ; The number of argument format codes that follow + (pg-send-uint con (length args) 2) + ;; The argument format codes, either zero for text or 1 for binary. + (dolist (arg args) + (cond ((integerp arg) + (pg-send-uint con 1 2)) + ((stringp arg) + (pg-send-uint con 0 2)) + (t + (let ((msg (format "Unknown fastpath type %s" arg))) + (signal 'pg-user-error (list msg)))))) + ;; Send the number of arguments being specified to the function + (pg-send-uint con (length args) 2) + ;; Send length/value for each argument + (dolist (arg args) + (cond ((integerp arg) + (pg-send-uint con 4 4) + (pg-send-uint con arg 4)) + ((stringp arg) + (pg-send-uint con (length arg) 4) + (pg-send-octets con arg)))) + ;; Int16: the format code for the function result. Must presently be zero (text) or one (binary). + (if integer-result + (pg-send-uint con 1 2) + (pg-send-uint con 0 2)) (pg-flush con) - (cl-loop with result = (list) + (cl-loop with result = nil for c = (pg-read-char con) do (cl-case c ;; ErrorResponse (?E (pg-handle-error-response con "in pg-fn")) - ;; FunctionResultResponse - (?V (setq result t)) - - ;; Nonempty response - (?G - (let* ((len (pg-read-net-int con 4)) - (res (if integer-result - (pg-read-net-int con len) - (pg-read-chars con len)))) - (setq result res))) + ;; FunctionCallResult + (?V + (let ((msg-len (pg-read-net-int con 4)) + (value-len (pg-read-net-int con 4))) + (setq result (if integer-result + (pg-read-net-int con value-len) + (pg-read-chars con value-len))))) ;; NoticeResponse (?N - (let ((notice (pg-read-string con pg--MAX_MESSAGE_LEN))) - (message "NOTICE: %s" notice)) + ;; a Notice response has the same structure and fields as an ErrorResponse + (let ((notice (pg-read-error-response con))) + (dolist (handler pg-handle-notice-functions) + (funcall handler notice))) (when (fboundp 'unix-sync) (unix-sync))) @@ -134,82 +150,116 @@ (t (error "Unexpected character in pg-fn: %s" c)))))) -;; returns an OID -(defun pg-lo-create (connection &optional args) - (let* ((modestr (or args "r")) +(defconst pg--INV_WRITE 131072) ; fe-lobj.c +(defconst pg--INV_READ 262144) + +;; Note: recent versions of PostgreSQL support an alternative function lo_create +;; which takes the desired oid as a parameter. +(defun pg-lo-create (con &optional mode) + "Create a new large object using PostgreSQL connection CON. +Returns the OID of the new object. Note that the MODE argument is +ignored in PostgreSQL releases after v8.1." + (let* ((modestr (or mode "r")) (mode (cond ((integerp modestr) modestr) ((string= "r" modestr) pg--INV_READ) ((string= "w" modestr) pg--INV_WRITE) ((string= "rw" modestr) (logior pg--INV_READ pg--INV_WRITE)) - (t (error "pg-lo-create: bad mode %s" modestr)))) - (oid (pg-fn connection "lo_creat" t mode))) + (t (let ((msg (format "pg-lo-create: bad mode %s" mode))) + (signal 'pg-user-error (list msg)))))) + (oid (pg-fn con "lo_creat" t mode))) (cond ((not (integerp oid)) (error "Returned value not an OID: %s" oid)) ((zerop oid) (error "Can't create large object")) (t oid)))) -;; args = modestring (default "r", or "w" or "rw") -;; returns a file descriptor for use in later pg-lo-* procedures -(defun pg-lo-open (connection oid &optional args) - (let* ((modestr (or args "r")) +;; mode = modestring (default "r", or "w" or "rw") +(defun pg-lo-open (connection oid &optional mode) + "Open the PostgreSQL large object designated by OID for reading. +Uses PostgreSQL connection CON. The string MODE determines whether the +object is opened for reading (`r'), or writing (`w'), or both (`rw'). +Returns a large object descriptor that can be used with functions +`pg-lo-read', `pg-lo-write', `pg-lo-lseek', `pg-lo-tell' and `pg-lo-close'." + (let* ((modestr (or mode "r")) (mode (cond ((integerp modestr) modestr) ((string= "r" modestr) pg--INV_READ) ((string= "w" modestr) pg--INV_WRITE) ((string= "rw" modestr) (logior pg--INV_READ pg--INV_WRITE)) - (t (error "pg-lo-open: bad mode %s" modestr)))) + (t (let ((msg (format "pg-lo-open: bad mode %s" mode))) + (signal 'pg-user-error (list msg)))))) (fd (pg-fn connection "lo_open" t oid mode))) (unless (integerp fd) (error "Couldn't open large object")) fd)) +;; TODO: should we be checking the return value and signalling a pg-error on failure? (defsubst pg-lo-close (connection fd) (pg-fn connection "lo_close" t fd)) -(defsubst pg-lo-read (connection fd bytes) - (pg-fn connection "loread" nil fd bytes)) +(defun pg-lo-read (connection fd bytes) + (let* ((encoded (pg-fn connection "loread" nil fd bytes)) + (hexdigits (substring encoded 2))) + ;; (message "lo-read: hex encoded is %s" encoded) + (unless (and (eql 92 (aref encoded 0)) ; \ character + (eql ?x (aref encoded 1))) + (signal 'pg-protocol-error + (list "Unexpected format for BYTEA binary string"))) + (decode-hex-string hexdigits))) (defsubst pg-lo-write (connection fd buf) (pg-fn connection "lowrite" t fd buf)) + +(defconst pg--SEEK_SET 0) +(defconst pg--SEEK_CUR 1) +(defconst pg--SEEK_END 2) + + +;; Whence can be SEEK_SET (seek from object start), SEEK_CUR (seek from current +;; position), and SEEK_END (seek from object end). (defsubst pg-lo-lseek (connection fd offset whence) (pg-fn connection "lo_lseek" t fd offset whence)) (defsubst pg-lo-tell (connection oid) (pg-fn connection "lo_tell" t oid)) +(defsubst pg-lo-truncate (con oid len) + (pg-fn con "lo_truncate" oid len)) + (defsubst pg-lo-unlink (connection oid) (pg-fn connection "lo_unlink" t oid)) -;; returns an OID ;; FIXME should use unwind-protect here -(defun pg-lo-import (connection filename) +(defun pg-lo-import (con filename) + "Import FILENAME as a PostgreSQL large object. +Uses PostgreSQL connection CON. Returns the OID of the new object." (let* ((buf (get-buffer-create (format " *pg-%s" filename))) - (oid (pg-lo-create connection "rw")) - (fdout (pg-lo-open connection oid "w")) + (oid (pg-lo-create con "rw")) + (fdout (pg-lo-open con oid "w")) (pos (point-min))) (with-current-buffer buf (insert-file-contents-literally filename) (while (< pos (point-max)) - (pg-lo-write - connection fdout + (pg-lo-write con fdout (buffer-substring-no-properties pos (min (point-max) (cl-incf pos 1024))))) - (pg-lo-close connection fdout)) + (pg-lo-close con fdout)) (kill-buffer buf) oid)) -(defun pg-lo-export (connection oid filename) +(defun pg-lo-export (con oid filename) + "Export PostgreSQL large object desingated by OID to FILENAME. +Uses PostgreSQL connection CON." (let* ((buf (get-buffer-create (format " *pg-%d" oid))) - (fdin (pg-lo-open connection oid "r"))) + (fdin (pg-lo-open con oid "r"))) (with-current-buffer buf - (cl-do ((str (pg-lo-read connection fdin 1024) - (pg-lo-read connection fdin 1024))) + (cl-do ((str (pg-lo-read con fdin 1024) + (pg-lo-read con fdin 1024))) ((or (not str) (zerop (length str)))) (insert str)) - (pg-lo-close connection fdin) + (pg-lo-close con fdin) (write-file filename)) (kill-buffer buf))) diff --git a/test/test-pg.el b/test/test-pg.el index 7d9fd23d0d..1b708109ee 100755 --- a/test/test-pg.el +++ b/test/test-pg.el @@ -11,6 +11,7 @@ (require 'pg-geometry) (require 'pg-gis) (require 'pg-bm25) +(require 'pg-lo) (require 'ert) @@ -356,6 +357,8 @@ (pgtest-add #'pg-test-notice) (pgtest-add #'pg-test-notify :skip-variants '(cratedb cockroachdb risingwave materialize greptimedb ydb questdb spanner vertica)) + (pgtest-add #'pg-test-lo + :skip-variants '(cratedb cockroachdb risingwave materialize greptimedb ydb questdb spanner vertica)) (dolist (test (reverse tests)) (message "== Running test %s" test) (condition-case err @@ -2720,26 +2723,80 @@ bar$$")))) ; (should (> notice-counter 0))))) -;; test of large-object interface. Note the use of with-pg-transaction -;; to wrap the requests in a BEGIN..END transaction which is necessary -;; when working with large objects. +(defun pg-test-lo (con) + (pg-test-lo-read con) + (pg-test-lo-ensure-size con) + (pg-test-lo-import con)) + +;; Note the use of with-pg-transaction to wrap the requests in a BEGIN..END transaction which is +;; necessary when working with large objects. (defun pg-test-lo-read (con) + (message "Testing lo-read and friends") (with-pg-transaction con (let* ((oid (pg-lo-create con "rw")) (fd (pg-lo-open con oid "rw"))) - (message "==================================================") + (let* ((sql "SELECT oid FROM pg_catalog.pg_largeobject_metadata WHERE oid=$1") + (res (pg-exec-prepared con sql `((,oid . "int4")))) + (rows (pg-result res :tuples))) + (should (eql 1 (length rows)))) (pg-lo-write con fd "Hi there mate") - (pg-lo-lseek con fd 3 0) ; SEEK_SET = 0 - (unless (= 3 (pg-lo-tell con fd)) - (error "lo-tell test failed!")) - (message "Read %s from lo" (pg-lo-read con fd 7)) - (message "==================================================") + (pg-lo-lseek con fd 3 pg--SEEK_SET) + (should (eql 3 (pg-lo-tell con fd))) + (let ((substring (pg-lo-read con fd 7))) + (should (string= substring "there m"))) + ;; Test the server-side function lo_get(), as per + ;; https://www.postgresql.org/docs/current/lo-funcs.html We don't have to decode this value + ;; from hex because the prepared statement infrastructure in pg-el does that for us. + (let ((res (pg-exec-prepared con "SELECT lo_get($1, 9, 4)" `((,oid . "int4"))))) + (should (string= "mate" (cl-first (pg-result res :tuple 0))))) (pg-lo-close con fd) (pg-lo-unlink con oid)))) +(defun pg-test-lo-ensure-size (con) + (message "Testing lo-lseek and friends") + (with-pg-transaction con + (let* ((oid (pg-lo-create con "rw")) + (fd (pg-lo-open con oid "rw")) + (filler (make-string (* 1024 1024) ?Z)) + (target-len (* 1024 1024 1024))) + (dotimes (i 512) + (pg-exec-prepared con "SELECT lo_put($1, $2, $3)" + `((,oid . "int4") (,(* i 1024 1024) . "int8") (,filler . "bytea")))) + (pg-lo-lseek con fd (* 512 1024 1024) pg--SEEK_SET) + (dotimes (i 512) + (should (eql (* 1024 1024) (pg-lo-write con fd filler)))) + ;; Now check that the octets have been written as expected + (let ((pos (pg-lo-lseek con fd 0 pg--SEEK_CUR))) + (should (eql pos target-len))) + (let ((pos (pg-lo-tell con fd))) + (should (eql pos target-len))) + (let ((pos (pg-lo-lseek con fd 0 pg--SEEK_END))) + (should (eql pos target-len))) + (dotimes (i 100) + (let ((pos (random target-len))) + (pg-lo-lseek con fd pos pg--SEEK_SET) + (should (string= "Z" (pg-lo-read con fd 1))))) + (let* ((halfway (* 512 1024 1024))) + (pg-lo-truncate con fd halfway) + (should (eql halfway (pg-lo-lseek con fd 0 pg--SEEK_END))) + (pg-lo-lseek con fd 0 pg--SEND_SET) + (let ((filler (make-string (* 1024 1024) ?#))) + (dotimes (i 512) + (pg-lo-write con fd filler))) + (dotimes (i 100) + (let ((pos (random halfway))) + (pg-lo-lseek con fd pos pg--SEEK_SET) + (should (string= "#" (pg-lo-read con fd 1))))))))) + (defun pg-test-lo-import (con) + (message "Testing lo-import and friends") (with-pg-transaction con - (let ((oid (pg-lo-import con "/etc/group"))) + (let* ((oid (pg-lo-import con "/etc/group")) + (sql "SELECT oid FROM pg_catalog.pg_largeobject_metadata WHERE oid=$1") + (res (pg-exec-prepared con sql `((,oid . "int4")))) + (rows (pg-result res :tuples))) + (should (eql 1 (length rows))) + (should (eql oid (caar rows))) (pg-lo-export con oid "/tmp/group") (cond ((zerop (call-process "diff" nil nil nil "/tmp/group" "/etc/group")) (message "lo-import test succeeded")