On 24/03/2026 18:44, Mark Dilger wrote:
pqDrainPending() reads pending bytes into conn->inBuffer + conn->inEnd
but never advances conn->inEnd. Compare with pqReadData_internal, which
always does conn->inEnd += nread after a successful read. Is
pgDrainPending()'s behavior correct? Sorry if I am misunderstanding.
Without that update, could the drained bytes occupy buffer memory that
libpq doesn't know about? Would the next call to pqReadData_internal
overwrite those positions, silently losing the drained data?
You're right, that was broken, it indeed needs to advance conn->inEnd.
In practice, I was unable to trigger this code path. When the protocol
message parser in fe-protocol3.c (around line 122) sees a large DataRow
header, it pre-expands the input buffer via pqCheckInBufferSpace() to
hold the entire message. This ensures that the buffer always has enough
free space for the next TLS record (max 16384 bytes plaintext), so
SSL_read never returns a partial record and SSL_pending() is always 0 at
the point pqDrainPending is called. Perhaps you thought of that, and
that's why you didn't bother to worry about this?
I suspect this is an unreachable bug, but perhaps it's just in need of
some code comments. What are your thoughts?
It is reached whenever you hit the original bug. It is difficult to hit,
and you need something that sits between the PostgreSQL server and the
client because the server won't create large enough TLS fragments.
I wrote a little python script to reproduce this based on Jacob's
instructions at
https://www.postgresql.org/message-id/CAOYmi%2B%3DjZNU9mZ4Z83aR0RiqZ2pF35u5040gCxBgcFqoA4oNbQ%40mail.gmail.com.
See attached 'psycotest.py'. With the previous version, which didn't
advance conn->inEnd in pqDrainPending() it fails. With the attached
patch, where that's fixed, it succeeds.
- Heikki
#!/usr/bin/python3
#
# Reproducer for the libpq TLS buffering bug
#
# Subject: libpq: Process buffered SSL read bytes to support records >8kB on async API
# https://www.postgresql.org/message-id/2039ac58-d3e0-434b-ac1a-2a987f3b4cb1%40greiz-reinsdorf.de
#
# This test program has two parts:
#
# - A psycopg2 client program that runs a query against a running server
#
# - TLS proxy which sits between the client and the running server. It
# buffers into larger chunks, and then splits them again into TLS frames
# so that the TLS frames don't align with the Postgres protocol messages
#
# Usage:
#
# You must have a PostgreSQL server running on localhost port 5432,
# and accepting connections on 'postgres' database without a password.
#
import select
import socket
import ssl
import tempfile
import time
import threading
import psycopg2
import psycopg2.extras
import psycopg2.extensions
# Where is the PostgreSQL server running?
dest_host = 'localhost'
dest_port = 5432
#
# TLS Proxy
#
# This listens for an incoming TLS connection on given
# 'server_socket', which has already been bound to a port. When a
# connection is received, open a connection to the server listening at
# dest_host:dest_port. The connection from the proxy to the server is
# not encrypted.
#
# To tickle the bug, this buffers all data coming from the server into
# larger chunks, and splits them again at 12 kB. If no data is
# received from the server for 1 s, any buffered data is sent even if
# it's not reached 12 kB yet.
def tls_proxy(server_socket, dest_host, dest_port):
print(f"Forwarding to {dest_host}:{dest_port}")
server_socket.listen(1)
raw_client_socket, addr = server_socket.accept()
print(f"Connection from {addr}")
# Create SSL context for the incoming connection
tmp_cert_file = tempfile.NamedTemporaryFile()
with open(tmp_cert_file.name, 'wb') as f:
f.write(b"""
-----BEGIN PRIVATE KEY-----
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDkpRy8folzDqSk
rV3NbIvIS9E4ZKiJmcnvHVCysPVwXB7iQiFNif2gJLqsV8KLsJxdM7QxE/dh2vug
sazXZAsCNBDQZD756hRtRvS+2OtCdjZ3WrPvVzQwG1iDHHIRISC9mrvQAvbdFtN1
nxILdIemPZtfrR3uzq0A+uHIGwoHih3+msrozQvF5IgHnz6J7JBZoabBFS8HGp9I
+SWrlwmj4LFVX0Gfx1x/Bf+8tJe4/lSgi6fS6qoBq9AtAb4+hyuw+VLyGlBbvE+C
DzEVjCJ9yXKguF4yNEy+VQp5IXrmiF9JAS/rg9/qGqTe9h89RTCP3ZIWKvGGvpoN
AhQEIHfnAgMBAAECggEAC+13Hc2GExao98mZfUgT1LDr1a1z7gBep0GCvlz6jiV9
cPUyh+PSc3e8YJWQGbnEC8f7jEc2iQgjnKLthCunuAeKTFdmbPgin6lrcXH0d+Tz
Jhp5DNvM5lOO3wFtYT+2qwghkxI3Lu/BZpST8ZK0ft4eNwlbaI6nOh1cXj7W25oQ
1EvomnUwUlc6ADudpg5rQh1HbiICk6uSE0A530Qo8D4FpiYoVZwt8l4vcIyGoyiU
kITL3KK17yZ+zWI8bqZT3iSzzcmiKxz6qLUomcfkwUAwFvgZVYmP2c5fTcRUMl5c
t0zuyJlus6vvovIvTwkCqs6LaY//fEO2xcYIK9UbfQKBgQD8c4ss1QDO1McOdG+3
Wd8Aj0t9Iu2ziknjwFDs2YbBOF6FW0WzItrKurxfKNPhN73qJPD6GmFiujRAR/Ui
xbVND8eGx9ocAmvUQyst8NK56OmKNpuKpNGnL5s2TeVJf5iXdaeBs/i+hkwiXe9C
+Ufdc5QAfVDD+B1m3146p3CcBQKBgQDn2+aA9ZMYv3qfypmg5VfhfmTF712HkSvn
nRoBH5YW5QQCG0kuXDz8TdTBSEonbyCDXD182szzn9epKb+IZhScXRYlYnChN6uG
3OmHvzAzsYUw5f+zWA3XfKnL3y9qkfaAOo9zONzWs9FB2sITv0eKNd4mf8Y+ddE1
EE1OHyCz+wKBgQDk5GpS+snhvmDBRWcpag3ctw/t5OZ6vC7kljGJnm0k8dQZu7jF
hBu2Znt3GFCLynuiOV5YleSonEXV5qhnn7UTqvPwy3GBpdxYt5IF9G1L7Nca3wpG
OcxxdqOXKCd1bYBQC3gWDLTDIocTPfI62kSDkFCn5Pd+x475ABuyuLBMdQKBgA4Z
J/X1eMFLe1hWCGtpJqPWfKgwet5wbFwECH3C/uxbdpfuMs/32dl5nhM2oxOsxSxX
ooGCCG5T7NgjarsPgfdUDbGuP6z95pcnvad8b6DlDXVAtwCfvQ+6S9TSuF5hi7yW
Uvytm3gOrQ21EJIE0oPL7Lsoj9Ric5snZ5v1dpabAoGAY5Akg6JCQhmefQtfGoQN
PDNIZGoc9MvAlhP+9Yg6qMSPapjN0BMt4KDcG7nF8fNAnIlAgzE7VVhyn3uHXr9g
q3MnT3ZZ/bOcmQSgNiUY/BYP2G8Wz47aC50RAsdX28/vF8HomD16bHwaoWN2M343
PXw+1B4WzaAKSS0JTRqfaxM=
-----END PRIVATE KEY-----
-----BEGIN CERTIFICATE-----
MIIDjzCCAnegAwIBAgIUKRYbc0eN7+LEdjc+X8QX3a+15EYwDQYJKoZIhvcNAQEL
BQAwVzELMAkGA1UEBhMCVVMxDjAMBgNVBAgMBVN0YXRlMQ0wCwYDVQQHDARDaXR5
MRUwEwYDVQQKDAxPcmdhbml6YXRpb24xEjAQBgNVBAMMCWxvY2FsaG9zdDAeFw0y
NjAzMjUxMTQ2MjhaFw0yNzAzMjUxMTQ2MjhaMFcxCzAJBgNVBAYTAlVTMQ4wDAYD
VQQIDAVTdGF0ZTENMAsGA1UEBwwEQ2l0eTEVMBMGA1UECgwMT3JnYW5pemF0aW9u
MRIwEAYDVQQDDAlsb2NhbGhvc3QwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
AoIBAQDkpRy8folzDqSkrV3NbIvIS9E4ZKiJmcnvHVCysPVwXB7iQiFNif2gJLqs
V8KLsJxdM7QxE/dh2vugsazXZAsCNBDQZD756hRtRvS+2OtCdjZ3WrPvVzQwG1iD
HHIRISC9mrvQAvbdFtN1nxILdIemPZtfrR3uzq0A+uHIGwoHih3+msrozQvF5IgH
nz6J7JBZoabBFS8HGp9I+SWrlwmj4LFVX0Gfx1x/Bf+8tJe4/lSgi6fS6qoBq9At
Ab4+hyuw+VLyGlBbvE+CDzEVjCJ9yXKguF4yNEy+VQp5IXrmiF9JAS/rg9/qGqTe
9h89RTCP3ZIWKvGGvpoNAhQEIHfnAgMBAAGjUzBRMB0GA1UdDgQWBBQ/KejUOncs
pm5k/wkn24uD4mbvqjAfBgNVHSMEGDAWgBQ/KejUOncspm5k/wkn24uD4mbvqjAP
BgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQA9GS+ZJqgidut9ZCBH
+GC/WlFZ5ueQFZWiaPGdylRdR3hDqdQy/s2pfshJPs8kmMqMGOJr2lI1GeZOYOQC
vgn3m2YZabtABzuLGpZxJXGByXa07AIob4DGgpoDY1hNEf8F7jjhnl7M/64R2blE
tA5hCUsKuF7qeGVmXKlkfMLbJvwbncXGMY4wComF4crrF4v9K+TpSLTTZpvaft63
ucwSU70izWOzt/QoiStpeOQ9aQ1p4Yf4wALd4LN5zvQklJTr98/30odGZj+vfgjg
p3AshIu+zB2GgUVMCsRbQcKuWwD5a28ZavqFWEu382v4tatAzh1HxnII52FKCepI
wRBr
-----END CERTIFICATE-----
""")
print(f"cert file {tmp_cert_file.name}")
listen_context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
listen_context.load_cert_chain(tmp_cert_file.name)
listen_context.set_alpn_protocols(["postgresql"])
ssl_client_socket = listen_context.wrap_socket(raw_client_socket, server_side=True)
tmp_cert_file.close()
try:
# Connect to the destination
dest_socket = socket.create_connection((dest_host, dest_port))
def forward_data(src_socket, dst_socket, direction):
"""Forward data from source to destination socket"""
try:
while True:
data = src_socket.recv(100 * 1024)
if not data:
print(f"[{direction}] exiting")
break
print(f"[{direction}] Received and forwarded {len(data)} bytes")
dst_socket.send(data)
finally:
try:
src_socket.close()
dst_socket.close()
except:
pass
def forward_data_buffered(src_socket, dst_socket, direction):
"""Forward data from source to destination socket"""
try:
src_socket.setblocking(False)
buf = bytearray()
while True:
# Set a timeout for recv operations
if len(buf) > 0:
timeout = 1
else:
timeout = None
r, w, e = select.select([src_socket], [], [], timeout)
if src_socket in r:
data = src_socket.recv(100 * 1024)
print(f"[{direction}] Received {len(data)} bytes")
buf.extend(data)
else:
while len(buf) > 0:
cutpoint = min(12*1024, len(buf))
dst_socket.send(buf[:cutpoint])
buf = buf[cutpoint:]
print(f"[{direction}] Timeout, Forwarded {cutpoint} bytes")
finally:
try:
src_socket.close()
dst_socket.close()
except:
pass
# Start forwarding threads
thread1 = threading.Thread(target=forward_data, args=(ssl_client_socket, dest_socket, "CLIENT->SERVER"))
thread1.daemon = True
thread2 = threading.Thread(target=forward_data_buffered, args=(dest_socket, ssl_client_socket, "DEST->SERVER"))
thread2.daemon = True
thread1.start()
thread2.start()
thread1.join()
thread2.join()
except Exception as e:
print(f"Error handling client: {e}")
finally:
try:
ssl_client_socket.close()
except:
pass
# Create a listener socket for the TLS proxy, and start thread to await for connection
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server_socket.bind(('localhost', 0))
listen_port = server_socket.getsockname()[1]
print(f"Listening on port {listen_port}")
client_thread = threading.Thread(
target=tls_proxy,
args=(server_socket, dest_host, dest_port)
)
client_thread.daemon = True
client_thread.start()
# wait a little so that the thread has reached listen()
time.sleep(1)
#
# Run the client
#
aconn = psycopg2.connect(f"host='localhost' port={listen_port} dbname='postgres' sslnegotiation='direct' sslmode='require'", async_=True)
psycopg2.extras.wait_select(aconn)
acurs = aconn.cursor()
acurs.execute("""
SELECT repeat('x', 32000)
union all
select repeat('y', 800)
""")
while True:
print("polling")
state = aconn.poll()
if state == psycopg2.extensions.POLL_OK:
break
elif state == psycopg2.extensions.POLL_WRITE:
select.select([], [aconn.fileno()], [])
elif state == psycopg2.extensions.POLL_READ:
print("read ready")
# If you hit the libpq bug, this will time out
r, w, e = select.select([aconn.fileno()], [], [], 5)
if r == [] and w == [] and e == []:
print("FAIL: the libpq bug was triggered, select timed out")
exit(1)
else:
raise psycopg2.OperationalError("poll() returned %s" % state)
result = acurs.fetchall()
print(f"Success, bug not triggered ({len(result)} rows)")
From b693f2f31d9081550fb196ef9755484e616068e0 Mon Sep 17 00:00:00 2001
From: Jacob Champion <[email protected]>
Date: Fri, 18 Jul 2025 10:06:13 -0700
Subject: [PATCH v3 1/2] libpq: Extend "read pending" check from SSL to GSS
An extra check for pending bytes in the SSL layer has been part of
pqReadReady() for a very long time (79ff2e96d). But when GSS transport
encryption was added, it didn't receive the same treatment. (As
79ff2e96d notes, "The bug that I fixed in this patch is exceptionally
hard to reproduce reliably.")
Without that check, it's possible to hit a hang in gssencmode, if the
server splits a large libpq message such that the final message in a
streamed response is part of the same wrapped token as the split
message:
DataRowDataRowDataRowDataRowDataRowData
-- token boundary --
RowDataRowCommandCompleteReadyForQuery
If the split message takes up enough memory to nearly fill libpq's
receive buffer, libpq may return from pqReadData() before the later
messages are pulled out of the PqGSSRecvBuffer. Without additional
socket activity from the server, pqReadReady() (via pqSocketCheck())
will never again return true, hanging the connection.
Pull the pending-bytes check into the pqsecure API layer, where both SSL
and GSS now implement it.
Note that this does not fix the root problem! Third party clients of
libpq have no way to call pqsecure_read_is_pending() in their own
polling. This just brings the GSS implementation up to par with the
existing SSL workaround; a broader fix is left to a subsequent commit.
(However, pgtls_read_pending() is renamed to pgtls_read_is_pending(), to
avoid conflation with the forthcoming pgtls_drain_pending().)
Discussion: https://postgr.es/m/CAOYmi%2BmpymrgZ76Jre2dx_PwRniS9YZojwH0rZnTuiGHCsj0rA%40mail.gmail.com
---
src/interfaces/libpq/fe-misc.c | 6 ++----
src/interfaces/libpq/fe-secure-gssapi.c | 6 ++++++
src/interfaces/libpq/fe-secure-openssl.c | 2 +-
src/interfaces/libpq/fe-secure.c | 19 +++++++++++++++++++
src/interfaces/libpq/libpq-int.h | 4 +++-
5 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c
index 13775cfb8b9..312399ac8e4 100644
--- a/src/interfaces/libpq/fe-misc.c
+++ b/src/interfaces/libpq/fe-misc.c
@@ -1099,14 +1099,12 @@ pqSocketCheck(PGconn *conn, int forRead, int forWrite, pg_usec_time_t end_time)
return -1;
}
-#ifdef USE_SSL
- /* Check for SSL library buffering read bytes */
- if (forRead && conn->ssl_in_use && pgtls_read_pending(conn))
+ /* Check for SSL/GSS library buffering read bytes */
+ if (forRead && pqsecure_read_is_pending(conn))
{
/* short-circuit the select */
return 1;
}
-#endif
}
/* We will retry as long as we get EINTR */
diff --git a/src/interfaces/libpq/fe-secure-gssapi.c b/src/interfaces/libpq/fe-secure-gssapi.c
index 72f438dfa9c..d8e05b6400d 100644
--- a/src/interfaces/libpq/fe-secure-gssapi.c
+++ b/src/interfaces/libpq/fe-secure-gssapi.c
@@ -471,6 +471,12 @@ gss_read(PGconn *conn, void *recv_buffer, size_t length, ssize_t *ret)
return PGRES_POLLING_OK;
}
+bool
+pg_GSS_read_is_pending(PGconn *conn)
+{
+ return PqGSSResultLength > PqGSSResultNext;
+}
+
/*
* Negotiate GSSAPI transport for a connection. When complete, returns
* PGRES_POLLING_OK. Will return PGRES_POLLING_READING or
diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c
index fbd3c63fb5d..c37a66e8d59 100644
--- a/src/interfaces/libpq/fe-secure-openssl.c
+++ b/src/interfaces/libpq/fe-secure-openssl.c
@@ -231,7 +231,7 @@ rloop:
}
bool
-pgtls_read_pending(PGconn *conn)
+pgtls_read_is_pending(PGconn *conn)
{
return SSL_pending(conn->ssl) > 0;
}
diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c
index 31d5b48d3f9..d97b71acd9a 100644
--- a/src/interfaces/libpq/fe-secure.c
+++ b/src/interfaces/libpq/fe-secure.c
@@ -243,6 +243,25 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
return n;
}
+/*
+ * Returns true if there are any bytes available in the transport buffer.
+ */
+bool
+pqsecure_read_is_pending(PGconn *conn)
+{
+#ifdef USE_SSL
+ if (conn->ssl_in_use)
+ return pgtls_read_is_pending(conn);
+#endif
+#ifdef ENABLE_GSS
+ if (conn->gssenc)
+ return pg_GSS_read_is_pending(conn);
+#endif
+
+ /* Plaintext connections have no transport buffer. */
+ return 0;
+}
+
/*
* Write data to a secure connection.
*
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index bd7eb59f5f8..dadec01c4a3 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -819,6 +819,7 @@ extern int pqWriteReady(PGconn *conn);
extern PostgresPollingStatusType pqsecure_open_client(PGconn *);
extern void pqsecure_close(PGconn *);
extern ssize_t pqsecure_read(PGconn *, void *ptr, size_t len);
+extern bool pqsecure_read_is_pending(PGconn *);
extern ssize_t pqsecure_write(PGconn *, const void *ptr, size_t len);
extern ssize_t pqsecure_raw_read(PGconn *, void *ptr, size_t len);
extern ssize_t pqsecure_raw_write(PGconn *, const void *ptr, size_t len);
@@ -857,7 +858,7 @@ extern ssize_t pgtls_read(PGconn *conn, void *ptr, size_t len);
/*
* Is there unread data waiting in the SSL read buffer?
*/
-extern bool pgtls_read_pending(PGconn *conn);
+extern bool pgtls_read_is_pending(PGconn *conn);
/*
* Write data to a secure connection.
@@ -905,6 +906,7 @@ extern PostgresPollingStatusType pqsecure_open_gss(PGconn *conn);
*/
extern ssize_t pg_GSS_write(PGconn *conn, const void *ptr, size_t len);
extern ssize_t pg_GSS_read(PGconn *conn, void *ptr, size_t len);
+extern bool pg_GSS_read_is_pending(PGconn *conn);
#endif
/* === in fe-trace.c === */
--
2.47.3
From 1c77a1e93b26399d8423c5760e45e4c938334de1 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <[email protected]>
Date: Fri, 5 Dec 2025 15:11:49 +0200
Subject: [PATCH v3 2/2] libpq: Drain all pending bytes from SSL/GSS during
pqReadData()
The previous commit strengthened a workaround for a hang when large
messages are split across TLS records/GSS tokens. Because that
workaround is implemented in libpq internals, it can only help us when
libpq itself is polling on the socket. In nonblocking situations, where
the client above libpq is expected to poll, the same bugs can show up.
As a contrived example, consider a large protocol-2.0 error coming back
from a server during PQconnectPoll(), split in an odd way across two
records:
-- TLS record (8192-byte payload) --
EEEE[...repeated a total of 8192 times]
-- TLS record (8193-byte payload) --
EEEE[...repeated a total of 8192 times]\0
The first record will fill the first half of the libpq receive buffer,
which is 16k long by default. The second record completely fills the
last half with its first 8192 bytes, leaving the terminating NULL in the
OpenSSL buffer. Since we still haven't seen the terminator at our level,
PQconnectPoll() will return PGRES_POLLING_READING, expecting to come
back when the server has sent "the rest" of the data. But there is
nothing left to read from the socket; OpenSSL had to pull all of the
data in the 8193-byte record off of the wire to decrypt it.
(A real server would probably not split up the records this way, nor
keep the connection open after sending a fatal connection error. But
servers that regularly use larger TLS records can get the libpq receive
buffer into the same state if DataRows are big enough, as reported on
the list.)
This is a layering violation. libpq makes decisions based on data in the
application buffer, above the transport buffer (whether SSL or GSS), but
clients are polling the socket, below the transport buffer. One way to
fix this in a backportable way, without changing APIs too much, is to
ensure data never stays in the transport buffer. Then pqReadData's
postconditions will look similar for both raw sockets and SSL/GSS: any
available data is either in the application buffer, or still on the
socket.
Building on the prior commit, make pqReadData() to drain all pending
data from the transport layer into conn->inBuffer, expanding the
buffer as necessary. This is not particularly efficient from an
architectural perspective (the pqsecure_read() implementations take
care to fit their packets into the current buffer, and that effort is
now completely discarded), but it's hopefully easier to reason about
than a full rewrite would be for the back branches.
Author: Jacob Champion <[email protected]>
Reported-by: Lars Kanis <[email protected]>
Discussion: https://postgr.es/m/2039ac58-d3e0-434b-ac1a-2a987f3b4cb1%40greiz-reinsdorf.de
Backpatch-through: 14
---
src/interfaces/libpq/fe-misc.c | 141 ++++++++++++++++++++++-
src/interfaces/libpq/fe-secure-gssapi.c | 7 +-
src/interfaces/libpq/fe-secure-openssl.c | 38 +++++-
src/interfaces/libpq/fe-secure.c | 14 ++-
src/interfaces/libpq/libpq-int.h | 8 +-
5 files changed, 190 insertions(+), 18 deletions(-)
diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c
index 312399ac8e4..f7db821c33b 100644
--- a/src/interfaces/libpq/fe-misc.c
+++ b/src/interfaces/libpq/fe-misc.c
@@ -55,6 +55,8 @@ static int pqPutMsgBytes(const void *buf, size_t len, PGconn *conn);
static int pqSendSome(PGconn *conn, int len);
static int pqSocketCheck(PGconn *conn, int forRead, int forWrite,
pg_usec_time_t end_time);
+static int pqReadData_internal(PGconn *conn);
+static int pqDrainPending(PGconn *conn);
/*
* PQlibVersion: return the libpq version number
@@ -593,6 +595,13 @@ pqPutMsgEnd(PGconn *conn)
/* ----------
* pqReadData: read more data, if any is available
+ *
+ * Upon a successful return, callers may assume that either 1) all available
+ * bytes have been consumed from the socket, or 2) the socket is still marked
+ * readable by the OS. (In other words: after a successful pqReadData, it's safe
+ * to tell a client to poll for readable bytes on the socket without any further
+ * draining of the SSL/GSS transport buffers.)
+ *
* Possible return values:
* 1: successfully loaded at least one more byte
* 0: no data is presently available, but no error detected
@@ -605,8 +614,7 @@ pqPutMsgEnd(PGconn *conn)
int
pqReadData(PGconn *conn)
{
- int someread = 0;
- int nread;
+ int available;
if (conn->sock == PGINVALID_SOCKET)
{
@@ -614,6 +622,40 @@ pqReadData(PGconn *conn)
return -1;
}
+ available = pqReadData_internal(conn);
+ if (available < 0)
+ return -1;
+ else if (available > 0)
+ {
+ /*
+ * Make sure there are no bytes stuck in layers between conn->inBuffer
+ * and the socket, to make it safe for clients to poll on PQsocket().
+ */
+ if (pqDrainPending(conn))
+ return -1;
+ }
+ else
+ {
+ /*
+ * If we're not returning any bytes from the underlying transport,
+ * that must imply there aren't any in the transport buffer...
+ */
+ Assert(pqsecure_bytes_pending(conn) == 0);
+ }
+
+ return available;
+}
+
+/*
+ * Workhorse for pqReadData(). It's kept separate from the pqDrainPending()
+ * logic to avoid adding to this function's goto complexity.
+ */
+static int
+pqReadData_internal(PGconn *conn)
+{
+ int someread = 0;
+ int nread;
+
/* Left-justify any data in the buffer to make room */
if (conn->inStart < conn->inEnd)
{
@@ -724,6 +766,8 @@ retry3:
* SSL_read() could still say WANT_READ because the data received was not
* a complete SSL record. So we must play dumb and assume there is more
* data, relying on the SSL layer to detect true EOF.
+ *
+ * XXX: do we have the same issue with GSS encryption?
*/
#ifdef USE_SSL
@@ -800,6 +844,97 @@ definitelyFailed:
return -1;
}
+/*---
+ * Drains any transport data that is already buffered in userspace and adds it
+ * to conn->inBuffer, enlarging inBuffer if necessary. The drain fails if
+ * inBuffer cannot be made to hold all available transport data.
+ *
+ * We assume that the underlying secure transport implementation does not
+ * attempt to read any more data from the socket while draining the transport
+ * buffer. After a successful return, pqsecure_bytes_pending() must be zero.
+ *
+ * This operation is necessary to prevent deadlock, due to a layering violation
+ * designed into our asynchronous client API: pqReadData() and all the parsing
+ * routines above it receive data from the SSL/GSS transport buffer, but clients
+ * poll on the raw PQsocket() handle. So data can be "lost" in the intermediate
+ * layer if we don't take it out here.
+ *
+ * To illustrate what we're trying to prevent, say that the server is sending
+ * two messages at once in response to a query (Aaaa and Bb), the libpq buffer
+ * is five characters in size, and TLS records max out at three-character
+ * payloads.
+ *
+ * Client libpq SSL Socket
+ * | | | |
+ * | [ ] [ ] [ ] [1] Buffers are empty, client is
+ * x --------------------------> | polling on socket
+ * | | | |
+ * | [ ] [ ] [xxx] [2] First record is received; poll
+ * | <-------------------------- | signals read-ready
+ * | | | |
+ * x ---> [ ] [ ] [xxx] [3] Client calls PQconsumeInput()
+ * | | | |
+ * | [ ] -> [ ] [xxx] [4] libpq calls pqReadData() to fill
+ * | | | | the receive buffer
+ * | [ ] [Aaa] <-- [ ] [5] SSL pulls payload off the wire
+ * | | | | and decrypts it
+ * | [Aaa ] <- [ ] [ ] [6] pqsecure_read() takes all data
+ * | | | |
+ * | <--- [Aaa ] [ ] [ ] [7] PQconsumeInput() returns with a
+ * x --------------------------> | partial message, PQisBusy() is
+ * | | | | still true, client polls again
+ * | [Aaa ] [ ] [xxx] [8] Second record is received; poll
+ * | <-------------------------- | signals read-ready
+ * | | | |
+ * x ---> [Aaa ] [ ] [xxx] [9] Client calls PQconsumeInput()
+ * | | | |
+ * | [Aaa ] -> [ ] [xxx] [10] libpq calls pqReadData() to fill
+ * | | | | the receive buffer
+ * | [Aaa ] [aBb] <-- [ ] [11] SSL decrypts
+ * | | | |
+ * | [AaaaB] <- [b ] [ ] [12] pqsecure_read() fills its
+ * | | | | buffer, taking only two bytes
+ * | <--- [AaaaB] [b ] [ ] [13] PQconsumeInput() returns with a
+ * | | | | complete message buffered;
+ * | | | | PQisBusy() is false
+ * x ---> [AaaaB] [b ] [ ] [14] Client calls PQgetResult()
+ * | | | |
+ * | <--- [B ] [b ] [ ] [15] Aaaa is returned; PQisBusy() is
+ * x --------------------------> | true and client polls again
+ * . | | .
+ * . [B ] [b ] . [16] No packets, and client hangs.
+ * . | | .
+ *
+ */
+static int
+pqDrainPending(PGconn *conn)
+{
+ ssize_t bytes_pending;
+ ssize_t nread;
+
+ bytes_pending = pqsecure_bytes_pending(conn);
+ if (bytes_pending <= 0)
+ return bytes_pending;
+
+ /* Expand the input buffer if necessary. */
+ if (pqCheckInBufferSpace(conn->inEnd + (size_t) bytes_pending, conn))
+ return -1; /* errorMessage already set */
+
+ nread = pqsecure_read(conn, conn->inBuffer + conn->inEnd,
+ bytes_pending);
+ conn->inEnd += nread;
+
+ /* When there are bytes pending, the read function is not supposed to fail */
+ if (nread != bytes_pending)
+ {
+ libpq_append_conn_error(conn,
+ "drained only %zu of %zd pending bytes in transport buffer",
+ nread, bytes_pending);
+ return -1;
+ }
+ return 0;
+}
+
/*
* pqSendSome: send data waiting in the output buffer.
*
@@ -1100,7 +1235,7 @@ pqSocketCheck(PGconn *conn, int forRead, int forWrite, pg_usec_time_t end_time)
}
/* Check for SSL/GSS library buffering read bytes */
- if (forRead && pqsecure_read_is_pending(conn))
+ if (forRead && pqsecure_bytes_pending(conn) != 0)
{
/* short-circuit the select */
return 1;
diff --git a/src/interfaces/libpq/fe-secure-gssapi.c b/src/interfaces/libpq/fe-secure-gssapi.c
index d8e05b6400d..cc60240582d 100644
--- a/src/interfaces/libpq/fe-secure-gssapi.c
+++ b/src/interfaces/libpq/fe-secure-gssapi.c
@@ -471,10 +471,11 @@ gss_read(PGconn *conn, void *recv_buffer, size_t length, ssize_t *ret)
return PGRES_POLLING_OK;
}
-bool
-pg_GSS_read_is_pending(PGconn *conn)
+ssize_t
+pg_GSS_bytes_pending(PGconn *conn)
{
- return PqGSSResultLength > PqGSSResultNext;
+ Assert(PqGSSResultLength >= PqGSSResultNext);
+ return (ssize_t) (PqGSSResultLength - PqGSSResultNext);
}
/*
diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c
index c37a66e8d59..14864fe2d6b 100644
--- a/src/interfaces/libpq/fe-secure-openssl.c
+++ b/src/interfaces/libpq/fe-secure-openssl.c
@@ -230,10 +230,42 @@ rloop:
return n;
}
-bool
-pgtls_read_is_pending(PGconn *conn)
+ssize_t
+pgtls_bytes_pending(PGconn *conn)
{
- return SSL_pending(conn->ssl) > 0;
+ int pending;
+
+ /*
+ * OpenSSL readahead is documented to break SSL_pending(). Plus, we can't
+ * afford to have OpenSSL take bytes off the socket without processing
+ * them; that breaks the postconditions for pqsecure_drain_pending().
+ */
+ Assert(!SSL_get_read_ahead(conn->ssl));
+
+ /* Figure out how many bytes to take off the connection. */
+ pending = SSL_pending(conn->ssl);
+
+ if (pending < 0)
+ {
+ /* shouldn't be possible */
+ Assert(false);
+ libpq_append_conn_error(conn, "OpenSSL reports negative bytes pending");
+ return -1;
+ }
+ else if (pending == INT_MAX)
+ {
+ /*
+ * If we ever found a legitimate way to hit this, we'd need to loop
+ * around in the caller to call pgtls_bytes_pending() again. Throw an
+ * error rather than complicate the code in that way, because
+ * SSL_read() should be bounded to the size of a single TLS record,
+ * and conn->inBuffer can't currently go past INT_MAX in size anyway.
+ */
+ libpq_append_conn_error(conn, "OpenSSL reports INT_MAX bytes pending");
+ return -1;
+ }
+
+ return (ssize_t) pending;
}
ssize_t
diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c
index d97b71acd9a..ab29b2b06e8 100644
--- a/src/interfaces/libpq/fe-secure.c
+++ b/src/interfaces/libpq/fe-secure.c
@@ -244,18 +244,22 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
}
/*
- * Returns true if there are any bytes available in the transport buffer.
+ * Return the number of bytes available in the transport buffer.
+ *
+ * If pqsecure_read() is called for this number of bytes, it's guaranteed to
+ * return successfully without reading from the underlying socket. See
+ * pqDrainPending() for a more complete discussion of the concepts involved.
*/
-bool
-pqsecure_read_is_pending(PGconn *conn)
+ssize_t
+pqsecure_bytes_pending(PGconn *conn)
{
#ifdef USE_SSL
if (conn->ssl_in_use)
- return pgtls_read_is_pending(conn);
+ return pgtls_bytes_pending(conn);
#endif
#ifdef ENABLE_GSS
if (conn->gssenc)
- return pg_GSS_read_is_pending(conn);
+ return pg_GSS_bytes_pending(conn);
#endif
/* Plaintext connections have no transport buffer. */
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index dadec01c4a3..83d883acea6 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -819,7 +819,7 @@ extern int pqWriteReady(PGconn *conn);
extern PostgresPollingStatusType pqsecure_open_client(PGconn *);
extern void pqsecure_close(PGconn *);
extern ssize_t pqsecure_read(PGconn *, void *ptr, size_t len);
-extern bool pqsecure_read_is_pending(PGconn *);
+extern ssize_t pqsecure_bytes_pending(PGconn *);
extern ssize_t pqsecure_write(PGconn *, const void *ptr, size_t len);
extern ssize_t pqsecure_raw_read(PGconn *, void *ptr, size_t len);
extern ssize_t pqsecure_raw_write(PGconn *, const void *ptr, size_t len);
@@ -856,9 +856,9 @@ extern void pgtls_close(PGconn *conn);
extern ssize_t pgtls_read(PGconn *conn, void *ptr, size_t len);
/*
- * Is there unread data waiting in the SSL read buffer?
+ * Return the number of bytes available in the transport buffer.
*/
-extern bool pgtls_read_is_pending(PGconn *conn);
+extern ssize_t pgtls_bytes_pending(PGconn *conn);
/*
* Write data to a secure connection.
@@ -906,7 +906,7 @@ extern PostgresPollingStatusType pqsecure_open_gss(PGconn *conn);
*/
extern ssize_t pg_GSS_write(PGconn *conn, const void *ptr, size_t len);
extern ssize_t pg_GSS_read(PGconn *conn, void *ptr, size_t len);
-extern bool pg_GSS_read_is_pending(PGconn *conn);
+extern ssize_t pg_GSS_bytes_pending(PGconn *conn);
#endif
/* === in fe-trace.c === */
--
2.47.3