From 2ae018686a1e1b9b2ada8735e4e35f214b8a75bd Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Mon, 5 Aug 2024 12:45:58 -0400
Subject: [PATCH v6] Improve file header comments for astramer code.

Make it clear that "astreamer" stands for "archive streamer".
Generalize comments that still believe this code can only be used
by pg_basebackup. Add some comments explaining the asymmetry
between the gzip, lz4, and zstd astreamers, in the hopes of making
life easier for anyone who hacks on this code in the future.
---
 src/fe_utils/astreamer_file.c    |  4 ++++
 src/fe_utils/astreamer_gzip.c    | 15 +++++++++++++++
 src/fe_utils/astreamer_lz4.c     |  4 ++++
 src/fe_utils/astreamer_zstd.c    |  4 ++++
 src/include/fe_utils/astreamer.h | 21 +++++++++++++++------
 5 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/src/fe_utils/astreamer_file.c b/src/fe_utils/astreamer_file.c
index 13d1192c6e..e75d166ebf 100644
--- a/src/fe_utils/astreamer_file.c
+++ b/src/fe_utils/astreamer_file.c
@@ -2,6 +2,10 @@
  *
  * astreamer_file.c
  *
+ * Archive streamers that write to files. astreamer_plan_writer writes
+ * the whole archive to a single file, and astreamer_extractor writes
+ * each archive member to a separate file in a given directory.
+ *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
diff --git a/src/fe_utils/astreamer_gzip.c b/src/fe_utils/astreamer_gzip.c
index dd28defac7..1c773a2384 100644
--- a/src/fe_utils/astreamer_gzip.c
+++ b/src/fe_utils/astreamer_gzip.c
@@ -2,6 +2,21 @@
  *
  * astreamer_gzip.c
  *
+ * Archive streamers that deal with data compressed using gzip.
+ * astreamer_gzip_writer applies gzip compression to the input data
+ * and writes the result to a file. astreamer_gzip_decompressor assumes
+ * that the input stream is compressed using gzip and decompresses it.
+ *
+ * Note that the code in this file is asymmetric with what we do for
+ * other compression types: for lz4 and zstd, there is a compressor and
+ * a decompressor, rather than a writer and a decompressor. The approach
+ * taken here is less flexible, because a writer can only write to a file,
+ * while a compressor can write to a subsequent astreamer which is free
+ * to do whatever it likes. The reason it's like this is because this
+ * code was adapated from old, less-modular pg_basebackup that used the
+ * same APIs that astreamer_gzip_writer uses, and it didn't seem
+ * necessary to change anything at the time.
+ *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
diff --git a/src/fe_utils/astreamer_lz4.c b/src/fe_utils/astreamer_lz4.c
index d8b2a367e4..2bf14084e7 100644
--- a/src/fe_utils/astreamer_lz4.c
+++ b/src/fe_utils/astreamer_lz4.c
@@ -2,6 +2,10 @@
  *
  * astreamer_lz4.c
  *
+ * Archive streamers that deal with data compressed using lz4.
+ * astreamer_lz4_compressor applies lz4 compression to the input stream,
+ * and astreamer_lz4_decompressor does the reverse.
+ *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
diff --git a/src/fe_utils/astreamer_zstd.c b/src/fe_utils/astreamer_zstd.c
index 45f6cb6736..4b2d42b231 100644
--- a/src/fe_utils/astreamer_zstd.c
+++ b/src/fe_utils/astreamer_zstd.c
@@ -2,6 +2,10 @@
  *
  * astreamer_zstd.c
  *
+ * Archive streamers that deal with data compressed using zstd.
+ * astreamer_zstd_compressor applies lz4 compression to the input stream,
+ * and astreamer_zstd_decompressor does the reverse.
+ *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
diff --git a/src/include/fe_utils/astreamer.h b/src/include/fe_utils/astreamer.h
index 2c014dbddb..570cfba304 100644
--- a/src/include/fe_utils/astreamer.h
+++ b/src/include/fe_utils/astreamer.h
@@ -2,9 +2,18 @@
  *
  * astreamer.h
  *
- * Each tar archive returned by the server is passed to one or more
- * astreamer objects for further processing. The astreamer may do
- * something simple, like write the archive to a file, perhaps after
+ * The "archive streamer" interface is intended to allow frontend code
+ * to stream from possibly-compressed archive files from any source and
+ * perform arbitrary actions based on the contents of those archives.
+ * Archive streamers are intended to be composable, and most tasks will
+ * require two or more archive streamers to complete. For instance,
+ * if the input is an uncompressed tar stream, a tar parser astreamer
+ * could be used to interpret it, and then an extractor astreamer could
+ * be used to write each archive member out to a file.
+ *
+ * In general, each archive streamer is relatively free to take whatever
+ * action it desires in the stream of chunks provided by the caller. It
+ * may do something simple, like write the archive to a file, perhaps after
  * compressing it, but it can also do more complicated things, like
  * annotating the byte stream to indicate which parts of the data
  * correspond to tar headers or trailing padding, vs. which parts are
@@ -33,9 +42,9 @@ typedef struct astreamer_ops astreamer_ops;
 
 /*
  * Each chunk of archive data passed to a astreamer is classified into one
- * of these categories. When data is first received from the remote server,
- * each chunk will be categorized as ASTREAMER_UNKNOWN, and the chunks will
- * be of whatever size the remote server chose to send.
+ * of these categories. When data is initially passed to an archive streamer,
+ * each chunk will be categorized as ASTREAMER_UNKNOWN, and the chunks can
+ * be of whatever size the caller finds convenient.
  *
  * If the archive is parsed (e.g. see astreamer_tar_parser_new()), then all
  * chunks should be labelled as one of the other types listed here. In
-- 
2.39.3 (Apple Git-145)

