From 916bbaf8ce0602b5a3639a0207a52a8aea78487a Mon Sep 17 00:00:00 2001
From: "Andrey M. Borodin" <x4mmm@night.local>
Date: Wed, 20 Mar 2024 22:30:14 +0500
Subject: [PATCH v24] Implement UUID v7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds function for UUID generation. Most important function here
is uuidv7() which generates new UUID according to the new standard.
For code readability this commit adds alias uuidv4() to function
gen_random_uuid().

Author: Andrey Borodin
Reviewed-by: Sergey Prokhorenko, Kirk Wolak, Przemysław Sztoch
Reviewed-by: Nikolay Samokhvalov, Jelte Fennema-Nio, Aleksander Alekseev
Reviewed-by: Peter Eisentraut, Chris Travers, Lukas Fittl
Discussion: https://postgr.es/m/CAAhFRxitJv%3DyoGnXUgeLB_O%2BM7J2BJAmb5jqAT9gZ3bij3uLDA%40mail.gmail.com
---
 doc/src/sgml/func.sgml                   |  19 ++-
 src/backend/utils/adt/uuid.c             | 165 ++++++++++++++++++++++-
 src/include/catalog/pg_proc.dat          |   6 +
 src/test/regress/expected/opr_sanity.out |   2 +
 src/test/regress/expected/uuid.out       |  46 ++++++-
 src/test/regress/sql/uuid.sql            |  18 ++-
 6 files changed, 249 insertions(+), 7 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 8ecc02f2b9..763fdab535 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14127,6 +14127,14 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
    <primary>gen_random_uuid</primary>
   </indexterm>
 
+  <indexterm>
+   <primary>uuidv4</primary>
+  </indexterm>
+
+  <indexterm>
+   <primary>uuidv7</primary>
+  </indexterm>
+
   <indexterm>
    <primary>uuid_extract_timestamp</primary>
   </indexterm>
@@ -14136,12 +14144,17 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
   </indexterm>
 
   <para>
-   <productname>PostgreSQL</productname> includes one function to generate a UUID:
+   <productname>PostgreSQL</productname> includes several functions to generate a UUID.
 <synopsis>
 <function>gen_random_uuid</function> () <returnvalue>uuid</returnvalue>
+<function>uuidv4</function> () <returnvalue>uuid</returnvalue>
+</synopsis>
+   These functions return a version 4 (random) UUID.
+<synopsis>
+<function>uuidv7</function> () <returnvalue>uuid</returnvalue>
 </synopsis>
-   This function returns a version 4 (random) UUID.  This is the most commonly
-   used type of UUID and is appropriate for most applications.
+   This function returns a version 7 UUID (UNIX timestamp with 1ms precision +
+   randomly seeded counter + random).
   </para>
 
   <para>
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 45eb1b2fea..e3b42224e6 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -13,6 +13,8 @@
 
 #include "postgres.h"
 
+#include <sys/time.h>
+
 #include "common/hashfn.h"
 #include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
@@ -407,6 +409,12 @@ uuid_hash_extended(PG_FUNCTION_ARGS)
 	return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
 }
 
+/*
+ * Generate UUID version 4.
+ *
+ * All UUID bytes are filled with strong random numbers except version and
+ * variant 0b10 bits.
+ */
 Datum
 gen_random_uuid(PG_FUNCTION_ARGS)
 {
@@ -427,7 +435,127 @@ gen_random_uuid(PG_FUNCTION_ARGS)
 	PG_RETURN_UUID_P(uuid);
 }
 
-#define UUIDV1_EPOCH_JDATE  2299161 /* == date2j(1582,10,15) */
+/*
+ * Generate UUID version 7.
+ *
+ * Following description is taken from RFC draft and slightly extended to
+ * reflect implementation specific choices.
+ *
+ * "Fixed-Length Dedicated Counter Bits" (Method 1) can be implemented with
+ * arbitrary size of a counter. We choose size 18 to reuse all space of bytes
+ * that are touched by ver and var fields + rand_a bytes between them.
+ * Whenever timestamp unix_ts_ms is moving forward, this counter bits are
+ * reinitialized. Reinilialization always sets most significant bit to 0, other
+ * bits are initialized with random numbers. This gives as approximately 192K
+ * UUIDs within one millisecond without overflow. This ougth to be enough for
+ * most practical purposes. Whenever counter overflow happens, this overflow is
+ * translated to increment of unix_ts_ms. So generation of UUIDs ate rate
+ * higher than 128MHz might lead to using timestamps ahead of time.
+ *
+ * We're not using the "Replace Left-Most Random Bits with Increased Clock
+ * Precision" method Section 6.2 (Method 3), because of portability concerns.
+ * It's unclear if all supported platforms can provide reliable microsocond
+ * precision time.
+ *
+ * All UUID generator state is backend-local. For UUIDs generated in one
+ * backend we guarantee monotonicity. UUIDs generated on different backends
+ * will be mostly monotonic if they are generated at frequences less than 1KHz,
+ * but this monotonicity is not strictly guaranteed. UUIDs generated on
+ * different nodes are mostly monotonic with regards to possible clock drift.
+ * Uniqueness of UUIDs generated at the same timestamp across different
+ * backends and/or nodes is guaranteed by using random bits.
+ */
+Datum
+uuidv7(PG_FUNCTION_ARGS)
+{
+	static uint32 sequence_counter;
+	static uint64 previous_timestamp = 0;
+
+	pg_uuid_t  *uuid = palloc(UUID_LEN);
+	uint64	tms;
+	struct timeval tp;
+	bool		time_tick_forward;
+
+	gettimeofday(&tp, NULL);
+	tms = ((uint64) tp.tv_sec) * 1000 + (tp.tv_usec) / 1000;
+	/* time from clock is protected from backward leaps */
+	time_tick_forward = (tms > previous_timestamp);
+
+	if (time_tick_forward)
+	{
+		/* fill everything after the timestamp with random bytes */
+		if (!pg_strong_random(&uuid->data[6], UUID_LEN - 6))
+			ereport(ERROR,
+					(errcode(ERRCODE_INTERNAL_ERROR),
+					 errmsg("could not generate random values")));
+
+		/*
+		 * Left-most counter bit is initialized as zero for the sole purpose
+		 * of guarding against counter rollovers. See section "Fixed-Length
+		 * Dedicated Counter Seeding"
+		 * https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#monotonicity_counters
+		 */
+		uuid->data[6] = (uuid->data[6] & 0xf7);
+
+		/* read randomly initialized bits of counter */
+		sequence_counter = ((uint32) uuid->data[8] & 0x3f) +
+			(((uint32) uuid->data[7]) << 6) +
+			(((uint32) uuid->data[6] & 0x0f) << 14);
+
+		previous_timestamp = tms;
+	}
+	else
+	{
+		/*
+		 * Time did not advance from the previous generation, we must
+		 * increment counter
+		 */
+		++sequence_counter;
+		if (sequence_counter > 0x3ffff)
+		{
+			/* We only have 18-bit counter */
+			sequence_counter = 0;
+			previous_timestamp++;
+		}
+
+		/* protection from leap backward */
+		tms = previous_timestamp;
+
+		/* fill everything after the timestamp and counter with random bytes */
+		if (!pg_strong_random(&uuid->data[9], UUID_LEN - 9))
+			ereport(ERROR,
+					(errcode(ERRCODE_INTERNAL_ERROR),
+					 errmsg("could not generate random values")));
+
+		/* most significant 4 bits of 18-bit counter */
+		uuid->data[6] = (unsigned char) (sequence_counter >> 14);
+		/* next 8 bits */
+		uuid->data[7] = (unsigned char) (sequence_counter >> 6);
+		/* least significant 6 bits */
+		uuid->data[8] = (unsigned char) (sequence_counter);
+	}
+
+	/* Fill in time part */
+	uuid->data[0] = (unsigned char) (tms >> 40);
+	uuid->data[1] = (unsigned char) (tms >> 32);
+	uuid->data[2] = (unsigned char) (tms >> 24);
+	uuid->data[3] = (unsigned char) (tms >> 16);
+	uuid->data[4] = (unsigned char) (tms >> 8);
+	uuid->data[5] = (unsigned char) tms;
+
+	/*
+	 * Set magic numbers for a "version 7" (pseudorandom) UUID, see
+	 * https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis
+	 */
+	/* set version field, top four bits are 0, 1, 1, 1 */
+	uuid->data[6] = (uuid->data[6] & 0x0f) | 0x70;
+	/* set variant field, top two bits are 1, 0 */
+	uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;
+
+	PG_RETURN_UUID_P(uuid);
+}
+
+#define GREGORIAN_EPOCH_JDATE  2299161 /* == date2j(1582,10,15) */
 
 /*
  * Extract timestamp from UUID.
@@ -461,7 +589,40 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS)
 
 		/* convert 100-ns intervals to us, then adjust */
 		ts = (TimestampTz) (tms / 10) -
-			((uint64) POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+			((uint64) POSTGRES_EPOCH_JDATE - GREGORIAN_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+		PG_RETURN_TIMESTAMPTZ(ts);
+	}
+
+	if (version == 6)
+	{
+		tms = ((uint64) uuid->data[0]) << 52;
+		tms += ((uint64) uuid->data[1]) << 44;
+		tms += ((uint64) uuid->data[2]) << 36;
+		tms += ((uint64) uuid->data[3]) << 28;
+		tms += ((uint64) uuid->data[4]) << 20;
+		tms += ((uint64) uuid->data[5]) << 12;
+		tms += (((uint64) uuid->data[6]) & 0xf) << 8;
+		tms += ((uint64) uuid->data[7]);
+
+		/* convert 100-ns intervals to us, then adjust */
+		ts = (TimestampTz) (tms / 10) -
+			((uint64) POSTGRES_EPOCH_JDATE - GREGORIAN_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+
+		PG_RETURN_TIMESTAMPTZ(ts);
+	}
+
+	if (version == 7)
+	{
+		tms = uuid->data[5];
+		tms += ((uint64) uuid->data[4]) << 8;
+		tms += ((uint64) uuid->data[3]) << 16;
+		tms += ((uint64) uuid->data[2]) << 24;
+		tms += ((uint64) uuid->data[1]) << 32;
+		tms += ((uint64) uuid->data[0]) << 40;
+
+		/* convert ms to us, then adjust */
+		ts = (TimestampTz) (tms * 1000) -
+			(POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
 
 		PG_RETURN_TIMESTAMPTZ(ts);
 	}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 0d26e5b422..2d8bbd3137 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9173,6 +9173,12 @@
 { oid => '3432', descr => 'generate random UUID',
   proname => 'gen_random_uuid', proleakproof => 't', provolatile => 'v',
   prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' },
+{ oid => '9895', descr => 'generate UUID version 4',
+  proname => 'uuidv4', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' },
+{ oid => '9896', descr => 'generate UUID version 7',
+  proname => 'uuidv7', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => '', prosrc => 'uuidv7' },
 { oid => '9897', descr => 'extract timestamp from UUID',
   proname => 'uuid_extract_timestamp', proleakproof => 't',
   prorettype => 'timestamptz', proargtypes => 'uuid',
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9d047b21b8..2b5d67b7f0 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -872,6 +872,8 @@ xid8ge(xid8,xid8)
 xid8eq(xid8,xid8)
 xid8ne(xid8,xid8)
 xid8cmp(xid8,xid8)
+uuidv4()
+uuidv7()
 uuid_extract_timestamp(uuid)
 uuid_extract_version(uuid)
 -- restore normal output mode
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 6026e15ed3..4f010eed67 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -168,6 +168,26 @@ SELECT count(DISTINCT guid_field) FROM guid1;
      2
 (1 row)
 
+-- test of uuidv4() alias
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     2
+(1 row)
+
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     2
+(1 row)
+
 -- extract functions
 -- version
 SELECT uuid_extract_version('11111111-1111-5111-8111-111111111111');  -- 5
@@ -188,8 +208,32 @@ SELECT uuid_extract_version('11111111-1111-1111-1111-111111111111');  -- null
                      
 (1 row)
 
+SELECT uuid_extract_version(uuidv4()); --4
+ uuid_extract_version 
+----------------------
+                    4
+(1 row)
+
+SELECT uuid_extract_version(uuidv7()); --7
+ uuid_extract_version 
+----------------------
+                    7
+(1 row)
+
 -- timestamp
-SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';  -- RFC 4122bis test vector
+SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector for v1
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT uuid_extract_timestamp('1EC9414C-232A-6B00-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector for v6
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector for v7
  ?column? 
 ----------
  t
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index c88f6d087a..6fa039800a 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -85,6 +85,18 @@ INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 SELECT count(DISTINCT guid_field) FROM guid1;
 
+-- test of uuidv4() alias
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+SELECT count(DISTINCT guid_field) FROM guid1;
+
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+SELECT count(DISTINCT guid_field) FROM guid1;
+
 
 -- extract functions
 
@@ -92,9 +104,13 @@ SELECT count(DISTINCT guid_field) FROM guid1;
 SELECT uuid_extract_version('11111111-1111-5111-8111-111111111111');  -- 5
 SELECT uuid_extract_version(gen_random_uuid());  -- 4
 SELECT uuid_extract_version('11111111-1111-1111-1111-111111111111');  -- null
+SELECT uuid_extract_version(uuidv4()); --4
+SELECT uuid_extract_version(uuidv7()); --7
 
 -- timestamp
-SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';  -- RFC 4122bis test vector
+SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector for v1
+SELECT uuid_extract_timestamp('1EC9414C-232A-6B00-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector for v6
+SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 4122bis test vector for v7
 SELECT uuid_extract_timestamp(gen_random_uuid());  -- null
 SELECT uuid_extract_timestamp('11111111-1111-1111-1111-111111111111');  -- null
 
-- 
2.37.1 (Apple Git-137.1)

