From b69c860c9fb793d56493053042e5beceaf069123 Mon Sep 17 00:00:00 2001
From: "Andrey M. Borodin" <x4mmm@night.local>
Date: Wed, 20 Mar 2024 22:30:14 +0500
Subject: [PATCH v23] Implement UUID v7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds function for UUID generation. Most important function here
is uuidv7() which generates new UUID according to the new standard.
For code readability this commit adds alias uuidv4() to function
gen_random_uuid().

Author: Andrey Borodin
Reviewed-by: Sergey Prokhorenko, Kirk Wolak, Przemysław Sztoch
Reviewed-by: Nikolay Samokhvalov, Jelte Fennema-Nio, Aleksander Alekseev
Reviewed-by: Peter Eisentraut, Chris Travers, Lukas Fittl
Discussion: https://postgr.es/m/CAAhFRxitJv%3DyoGnXUgeLB_O%2BM7J2BJAmb5jqAT9gZ3bij3uLDA%40mail.gmail.com
---
 doc/src/sgml/func.sgml                   |  38 ++++-
 src/backend/utils/adt/uuid.c             | 206 ++++++++++++++++++++++-
 src/include/catalog/pg_proc.dat          |   6 +
 src/test/regress/expected/opr_sanity.out |   2 +
 src/test/regress/expected/uuid.out       |  46 ++++-
 src/test/regress/sql/uuid.sql            |  18 +-
 6 files changed, 308 insertions(+), 8 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5b225ccf4f..acb655cfeb 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14127,6 +14127,14 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
    <primary>gen_random_uuid</primary>
   </indexterm>
 
+  <indexterm>
+   <primary>uuidv4</primary>
+  </indexterm>
+
+  <indexterm>
+   <primary>uuidv7</primary>
+  </indexterm>
+
   <indexterm>
    <primary>uuid_extract_timestamp</primary>
   </indexterm>
@@ -14136,12 +14144,36 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
   </indexterm>
 
   <para>
-   <productname>PostgreSQL</productname> includes one function to generate a UUID:
+   <productname>PostgreSQL</productname> includes several functions to generate a UUID.
 <synopsis>
 <function>gen_random_uuid</function> () <returnvalue>uuid</returnvalue>
+<function>uuidv4</function> () <returnvalue>uuid</returnvalue>
+</synopsis>
+   These functions return a version 4 (random) UUID.
+<synopsis>
+<function>uuidv7</function> () <returnvalue>uuid</returnvalue>
+</synopsis>
+   This function returns a version 7 UUID (UNIX timestamp with 1ms precision +
+   randomly seeded counter + random).
+  </para>
+
+  <para>
+<synopsis>
+<function>uuid_extract_timestamp</function> (uuid) <returnvalue>timestamptz</returnvalue>
+</synopsis>
+   This function extracts a <type>timestamp with time zone</type> from UUID versions 1, 6 and 7.
+   For other versions and variants this function returns NULL. The extracted timestamp
+   does not necessarily equate to the time of UUID generation. How close it is
+   to the actual time depends on the implementation that generated UUID.
+   The uuidv7() function provided by PostgreSQL will normally store the actual time,
+   with some exceptions: prevention of time leaps backwards and counter overflow
+   being carried to time step.
+<synopsis>
+<function>uuid_extract_version</function> (uuid) <returnvalue>smallint</returnvalue>
 </synopsis>
-   This function returns a version 4 (random) UUID.  This is the most commonly
-   used type of UUID and is appropriate for most applications.
+   This function extracts a version bits from UUID of variant described by
+   <ulink url="https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis">IETF standard draft</ulink>
+   (b10xx variant). For other variants this function returns NULL.
   </para>
 
   <para>
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 45eb1b2fea..dfa0746fc7 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -13,11 +13,15 @@
 
 #include "postgres.h"
 
+#include <sys/time.h>
+
+#include "access/xlog.h"
 #include "common/hashfn.h"
 #include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
 #include "port/pg_bswap.h"
-#include "utils/fmgrprotos.h"
+#include "utils/builtins.h"
+#include "utils/datetime.h"
 #include "utils/guc.h"
 #include "utils/sortsupport.h"
 #include "utils/timestamp.h"
@@ -407,6 +411,12 @@ uuid_hash_extended(PG_FUNCTION_ARGS)
 	return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
 }
 
+/*
+ * Generate UUID version 4.
+ *
+ * All UUID bytes are filled with strong random numbers except version and
+ * variant 0b10 bits.
+ */
 Datum
 gen_random_uuid(PG_FUNCTION_ARGS)
 {
@@ -427,7 +437,164 @@ gen_random_uuid(PG_FUNCTION_ARGS)
 	PG_RETURN_UUID_P(uuid);
 }
 
-#define UUIDV1_EPOCH_JDATE  2299161 /* == date2j(1582,10,15) */
+static uint32_t sequence_counter;
+static uint64_t previous_timestamp = 0;
+
+/*
+ * Generate UUID version 7.
+ *
+ * Following description is taken from RFC draft and slightly extended to
+ * reflect implementation specific choices.
+ *
+ * UUIDv7 Field and Bit Layout:
+ * ----------
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           unix_ts_ms                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |          unix_ts_ms           |  ver  |       rand_a          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |var|                        rand_b                             |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                            rand_b                             |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * unix_ts_ms:
+ *  48 bit big-endian unsigned number of Unix epoch timestamp in milliseconds
+ *  as per Section 6.1. Occupies bits 0 through 47 (octets 0-5).
+ *
+ * ver:
+ *  The 4 bit version field as defined by Section 4.2, set to 0b0111 (7).
+ *  Occupies bits 48 through 51 of octet 6.
+ *
+ * rand_a:
+ *  Most significant 12 bits of 18-bit counter. This counter is designed to
+ *  guarantee additional monotonicity as per Section 6.2 (Method 1). rand_a
+ *  occupies bits 52 through 63 (octets 6-7).
+ *
+ * var:
+ *  The 2 bit variant field as defined by Section 4.1, set to 0b10. Occupies
+ *  bits 64 and 65 of octet 8.
+ *
+ * rand_b:
+ *  Starting 6 bits are least significant 6 bits of a counter. The final 56
+ *  bits filled with pseudo-random data to provide uniqueness as per
+ *  Section 6.9. rand_b Occupies bits 66 through 127 (octets 8-15).
+ * ----------
+ *
+ * "Fixed-Length Dedicated Counter Bits" (Method 1) can be implemented with
+ * arbitrary size of a counter. We choose size 18 to reuse all space of bytes
+ * that are touched by ver and var fields + rand_a bytes between them.
+ * Whenever timestamp unix_ts_ms is moving forward, this counter bits are
+ * reinitialized. Reinilialization always sets most significant bit to 0, other
+ * bits are initialized with random numbers. This gives as approximately 192K
+ * UUIDs within one millisecond without overflow. This ougth to be enough for
+ * most practical purposes. Whenever counter overflow happens, this overflow is
+ * translated to increment of unix_ts_ms. So generation of UUIDs ate rate
+ * higher than 128MHz might lead to using timestamps ahead of time.
+ *
+ * We're not using the "Replace Left-Most Random Bits with Increased Clock
+ * Precision" method Section 6.2 (Method 3), because of portability concerns.
+ * It's unclear if all supported platforms can provide reliable microsocond
+ * precision time.
+ *
+ * All UUID generator state is backend-local. For UUIDs generated in one
+ * backend we guarantee monotonicity. UUIDs generated on different backends
+ * will be mostly monotonic if they are generated at frequences less than 1KHz,
+ * but this monotonicity is not strictly guaranteed. UUIDs generated on
+ * different nodes are mostly monotonic with regards to possible clock drift.
+ * Uniqueness of UUIDs generated at the same timestamp across different
+ * backends and/or nodes is guaranteed by using random bits.
+ */
+Datum
+uuidv7(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = palloc(UUID_LEN);
+	uint64_t	tms;
+	struct timeval tp;
+	bool		increment_counter;
+
+	gettimeofday(&tp, NULL);
+	tms = ((uint64_t) tp.tv_sec) * 1000 + (tp.tv_usec) / 1000;
+	/* time from clock is protected from backward leaps */
+	increment_counter = (tms <= previous_timestamp);
+
+	if (increment_counter)
+	{
+		/*
+		 * Time did not advance from the previous generation, we must
+		 * increment counter
+		 */
+		++sequence_counter;
+		if (sequence_counter > 0x3ffff)
+		{
+			/* We only have 18-bit counter */
+			sequence_counter = 0;
+			previous_timestamp++;
+		}
+
+		/* protection from leap backward */
+		tms = previous_timestamp;
+
+		/* fill everything after the timestamp and counter with random bytes */
+		if (!pg_strong_random(&uuid->data[8], UUID_LEN - 8))
+			ereport(ERROR,
+					(errcode(ERRCODE_INTERNAL_ERROR),
+					 errmsg("could not generate random values")));
+
+		/* most significant 4 bits of 18-bit counter */
+		uuid->data[6] = (unsigned char) (sequence_counter >> 14);
+		/* next 8 bits */
+		uuid->data[7] = (unsigned char) (sequence_counter >> 6);
+		/* least significant 6 bits */
+		uuid->data[8] = (unsigned char) (sequence_counter);
+	}
+	else
+	{
+		/* fill everything after the timestamp with random bytes */
+		if (!pg_strong_random(&uuid->data[6], UUID_LEN - 6))
+			ereport(ERROR,
+					(errcode(ERRCODE_INTERNAL_ERROR),
+					 errmsg("could not generate random values")));
+
+		/*
+		 * Left-most counter bits are initialized as zero for the sole purpose
+		 * of guarding against counter rollovers. See section "Fixed-Length
+		 * Dedicated Counter Seeding"
+		 * https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#monotonicity_counters
+		 */
+		uuid->data[6] = (uuid->data[6] & 0xf7);
+
+		/* read randomly initialized bits of counter */
+		sequence_counter = ((uint32_t) uuid->data[8] & 0x3f) +
+			(((uint32_t) uuid->data[7]) << 6) +
+			(((uint32_t) uuid->data[6] & 0x0f) << 14);
+
+		previous_timestamp = tms;
+	}
+
+	/* Fill in time part */
+	uuid->data[0] = (unsigned char) (tms >> 40);
+	uuid->data[1] = (unsigned char) (tms >> 32);
+	uuid->data[2] = (unsigned char) (tms >> 24);
+	uuid->data[3] = (unsigned char) (tms >> 16);
+	uuid->data[4] = (unsigned char) (tms >> 8);
+	uuid->data[5] = (unsigned char) tms;
+
+	/*
+	 * Set magic numbers for a "version 7" (pseudorandom) UUID, see
+	 * https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis
+	 */
+	/* set version field, top four bits are 0, 1, 1, 1 */
+	uuid->data[6] = (uuid->data[6] & 0x0f) | 0x70;
+	/* set variant field, top two bits are 1, 0 */
+	uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;
+
+	PG_RETURN_UUID_P(uuid);
+}
+
+#define GREGORIAN_EPOCH_JDATE  2299161 /* == date2j(1582,10,15) */
 
 /*
  * Extract timestamp from UUID.
@@ -461,7 +628,40 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS)
 
 		/* convert 100-ns intervals to us, then adjust */
 		ts = (TimestampTz) (tms / 10) -
-			((uint64) POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+			((uint64) POSTGRES_EPOCH_JDATE - GREGORIAN_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+		PG_RETURN_TIMESTAMPTZ(ts);
+	}
+
+	if (version == 6)
+	{
+		tms = ((uint64_t) uuid->data[0]) << 52;
+		tms += ((uint64_t) uuid->data[1]) << 44;
+		tms += ((uint64_t) uuid->data[2]) << 36;
+		tms += ((uint64_t) uuid->data[3]) << 28;
+		tms += ((uint64_t) uuid->data[4]) << 20;
+		tms += ((uint64_t) uuid->data[5]) << 12;
+		tms += (((uint64_t) uuid->data[6]) & 0xf) << 8;
+		tms += ((uint64_t) uuid->data[7]);
+
+		/* convert 100-ns intervals to us, then adjust */
+		ts = (TimestampTz) (tms / 10) -
+			((uint64_t) POSTGRES_EPOCH_JDATE - GREGORIAN_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+
+		PG_RETURN_TIMESTAMPTZ(ts);
+	}
+
+	if (version == 7)
+	{
+		tms = uuid->data[5];
+		tms += ((uint64_t) uuid->data[4]) << 8;
+		tms += ((uint64_t) uuid->data[3]) << 16;
+		tms += ((uint64_t) uuid->data[2]) << 24;
+		tms += ((uint64_t) uuid->data[1]) << 32;
+		tms += ((uint64_t) uuid->data[0]) << 40;
+
+		/* convert ms to us, then adjust */
+		ts = (TimestampTz) (tms * 1000) -
+			(POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
 
 		PG_RETURN_TIMESTAMPTZ(ts);
 	}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 177d81a891..30dc2d82af 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9170,6 +9170,12 @@
 { oid => '3432', descr => 'generate random UUID',
   proname => 'gen_random_uuid', proleakproof => 't', provolatile => 'v',
   prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' },
+{ oid => '9895', descr => 'generate UUID version 4',
+  proname => 'uuidv4', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' },
+{ oid => '9896', descr => 'generate UUID version 7',
+  proname => 'uuidv7', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => '', prosrc => 'uuidv7' },
 { oid => '9897', descr => 'extract timestamp from UUID',
   proname => 'uuid_extract_timestamp', proleakproof => 't',
   prorettype => 'timestamptz', proargtypes => 'uuid',
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9d047b21b8..2b5d67b7f0 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -872,6 +872,8 @@ xid8ge(xid8,xid8)
 xid8eq(xid8,xid8)
 xid8ne(xid8,xid8)
 xid8cmp(xid8,xid8)
+uuidv4()
+uuidv7()
 uuid_extract_timestamp(uuid)
 uuid_extract_version(uuid)
 -- restore normal output mode
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 6026e15ed3..4f010eed67 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -168,6 +168,26 @@ SELECT count(DISTINCT guid_field) FROM guid1;
      2
 (1 row)
 
+-- test of uuidv4() alias
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     2
+(1 row)
+
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     2
+(1 row)
+
 -- extract functions
 -- version
 SELECT uuid_extract_version('11111111-1111-5111-8111-111111111111');  -- 5
@@ -188,8 +208,32 @@ SELECT uuid_extract_version('11111111-1111-1111-1111-111111111111');  -- null
                      
 (1 row)
 
+SELECT uuid_extract_version(uuidv4()); --4
+ uuid_extract_version 
+----------------------
+                    4
+(1 row)
+
+SELECT uuid_extract_version(uuidv7()); --7
+ uuid_extract_version 
+----------------------
+                    7
+(1 row)
+
 -- timestamp
-SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';  -- RFC 4122bis test vector
+SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector for v1
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT uuid_extract_timestamp('1EC9414C-232A-6B00-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector for v6
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector for v7
  ?column? 
 ----------
  t
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index c88f6d087a..6fa039800a 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -85,6 +85,18 @@ INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 SELECT count(DISTINCT guid_field) FROM guid1;
 
+-- test of uuidv4() alias
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+SELECT count(DISTINCT guid_field) FROM guid1;
+
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+SELECT count(DISTINCT guid_field) FROM guid1;
+
 
 -- extract functions
 
@@ -92,9 +104,13 @@ SELECT count(DISTINCT guid_field) FROM guid1;
 SELECT uuid_extract_version('11111111-1111-5111-8111-111111111111');  -- 5
 SELECT uuid_extract_version(gen_random_uuid());  -- 4
 SELECT uuid_extract_version('11111111-1111-1111-1111-111111111111');  -- null
+SELECT uuid_extract_version(uuidv4()); --4
+SELECT uuid_extract_version(uuidv7()); --7
 
 -- timestamp
-SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';  -- RFC 4122bis test vector
+SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector for v1
+SELECT uuid_extract_timestamp('1EC9414C-232A-6B00-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector for v6
+SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector for v7
 SELECT uuid_extract_timestamp(gen_random_uuid());  -- null
 SELECT uuid_extract_timestamp('11111111-1111-1111-1111-111111111111');  -- null
 
-- 
2.37.1 (Apple Git-137.1)

