From d2306902956be14a76e37ae714072e6ac59024ca Mon Sep 17 00:00:00 2001
From: "Andrey M. Borodin" <x4mmm@night.local>
Date: Wed, 20 Mar 2024 22:30:14 +0500
Subject: [PATCH v30] Implement UUID v7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds function for UUID generation. Most important function here
is uuidv7() which generates new UUID according to the new standard.
For code readability this commit adds alias uuidv4() to function
gen_random_uuid().

Author: Andrey Borodin
Reviewed-by: Sergey Prokhorenko, Kirk Wolak, Przemysław Sztoch
Reviewed-by: Nikolay Samokhvalov, Jelte Fennema-Nio, Aleksander Alekseev
Reviewed-by: Peter Eisentraut, Chris Travers, Lukas Fittl
Reviewed-by: Michael Paquier, Masahiko Sawada, Stepan Neretin
Discussion: https://postgr.es/m/CAAhFRxitJv%3DyoGnXUgeLB_O%2BM7J2BJAmb5jqAT9gZ3bij3uLDA%40mail.gmail.com
---
 doc/src/sgml/datatype.sgml               |   2 +-
 doc/src/sgml/func.sgml                   |  21 +++-
 src/backend/utils/adt/uuid.c             | 146 +++++++++++++++++++++--
 src/include/catalog/pg_proc.dat          |  11 +-
 src/test/regress/expected/opr_sanity.out |   3 +
 src/test/regress/expected/uuid.out       |  41 ++++++-
 src/test/regress/sql/uuid.sql            |  18 ++-
 7 files changed, 227 insertions(+), 15 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index e0d33f12e1..3e6751d64c 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4380,7 +4380,7 @@ SELECT to_tsvector( 'postgraduate' ), to_tsquery( 'postgres:*' );
 
    <para>
     The data type <type>uuid</type> stores Universally Unique Identifiers
-    (UUID) as defined by <ulink url="https://datatracker.ietf.org/doc/html/rfc4122">RFC 4122</ulink>,
+    (UUID) as defined by <ulink url="https://datatracker.ietf.org/doc/html/rfc9562">RFC 9562</ulink>,
     ISO/IEC 9834-8:2005, and related standards.
     (Some systems refer to this data type as a globally unique identifier, or
     GUID,<indexterm><primary>GUID</primary></indexterm> instead.)  This
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 05f630c6a6..8f69f4d3b6 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14213,6 +14213,14 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
    <primary>gen_random_uuid</primary>
   </indexterm>
 
+  <indexterm>
+   <primary>uuidv4</primary>
+  </indexterm>
+
+  <indexterm>
+   <primary>uuidv7</primary>
+  </indexterm>
+
   <indexterm>
    <primary>uuid_extract_timestamp</primary>
   </indexterm>
@@ -14222,12 +14230,17 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
   </indexterm>
 
   <para>
-   <productname>PostgreSQL</productname> includes one function to generate a UUID:
+   <productname>PostgreSQL</productname> includes several functions to generate a UUID.
 <synopsis>
 <function>gen_random_uuid</function> () <returnvalue>uuid</returnvalue>
+<function>uuidv4</function> () <returnvalue>uuid</returnvalue>
+</synopsis>
+   These functions return a version 4 (random) UUID.
+<synopsis>
+<function>uuidv7</function> () <returnvalue>uuid</returnvalue>
 </synopsis>
-   This function returns a version 4 (random) UUID.  This is the most commonly
-   used type of UUID and is appropriate for most applications.
+   This function returns a version 7 UUID (UNIX timestamp with 1ms precision +
+   randomly seeded counter + random).
   </para>
 
   <para>
@@ -14251,7 +14264,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
 <function>uuid_extract_version</function> (uuid) <returnvalue>smallint</returnvalue>
 </synopsis>
    This function extracts the version from a UUID of the variant described by
-   <ulink url="https://datatracker.ietf.org/doc/html/rfc4122">RFC 4122</ulink>.  For
+   <ulink url="https://datatracker.ietf.org/doc/html/rfc9562">RFC 9562</ulink>.  For
    other variants, this function returns null.  For example, for a UUID
    generated by <function>gen_random_uuid</function>, this function will
    return 4.
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 5284d23dcc..2f45086237 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -13,6 +13,8 @@
 
 #include "postgres.h"
 
+#include <sys/time.h>
+
 #include "common/hashfn.h"
 #include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
@@ -401,6 +403,12 @@ uuid_hash_extended(PG_FUNCTION_ARGS)
 	return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
 }
 
+/*
+ * Generate UUID version 4.
+ *
+ * All UUID bytes are filled with strong random numbers except version and
+ * variant 0b10 bits.
+ */
 Datum
 gen_random_uuid(PG_FUNCTION_ARGS)
 {
@@ -413,7 +421,7 @@ gen_random_uuid(PG_FUNCTION_ARGS)
 
 	/*
 	 * Set magic numbers for a "version 4" (pseudorandom) UUID, see
-	 * http://tools.ietf.org/html/rfc4122#section-4.4
+	 * http://tools.ietf.org/html/rfc9562#section-4.4
 	 */
 	uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40;	/* time_hi_and_version */
 	uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;	/* clock_seq_hi_and_reserved */
@@ -421,12 +429,121 @@ gen_random_uuid(PG_FUNCTION_ARGS)
 	PG_RETURN_UUID_P(uuid);
 }
 
-#define UUIDV1_EPOCH_JDATE  2299161 /* == date2j(1582,10,15) */
+/*
+ * Generate UUID version 7 per RFC 9562.
+ *
+ * Monotonicity (regarding generation on given backend) is ensured with method
+ * "Replace Leftmost Random Bits with Increased Clock Precision (Method 3)".
+ * We use 10 bits in "rand_a" bits to store microseconds.
+ * Usage of pg_testtime indicates that such precision is available on most
+ * systems. If timestamp is not advancing between two consecutive UUID
+ * generations, previous timestamp is incremented and used instead of current
+ * timestamp.
+ */
+static Datum
+generate_uuidv7(FunctionCallInfo fcinfo)
+{
+	static uint64 previous_us = 0;
+
+	pg_uuid_t	*uuid = palloc(UUID_LEN);
+	uint64		 us;
+	uint64		 unix_ts_ms;
+	uint16 		 increased_clock_precision;
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+
+	us = tv.tv_sec * SECS_PER_DAY * USECS_PER_SEC + tv.tv_usec;
+	if (previous_us >= us)
+		us = previous_us + 1;
+	previous_us = us;
+
+	if (PG_NARGS() > 0)
+	{
+		/*
+		 * We are given a time shift interval as an argument.
+		 * To make correct computations we call
+		 * timestamptz_pl_interval() with corresponding logic. This logic is
+		 * implemented on TimestampTz, so we have to convert there and back.
+		 */
+		Interval *span;
+		/* Convert time part of UUID to Timestamptz (us since Postgres epoch) */
+		TimestampTz ts = (TimestampTz) (us -
+			(POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC);
+		span = PG_GETARG_INTERVAL_P(0);
+		/* Copmute shifted time */
+		ts = DatumGetTimestampTz(DirectFunctionCall2(timestamptz_pl_interval,
+													 TimestampTzGetDatum(ts),
+													 IntervalPGetDatum(span)));
+		/* Convert TimestampTz back and carry nanoseconds. */
+		us = (ts + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC);
+	}
+
+	unix_ts_ms = us / 1000;
+	/* microsecond fraction (used to fill 10 bits) */
+	increased_clock_precision = us % 1000;
+
+	/* Fill in time part */
+	uuid->data[0] = (unsigned char) (unix_ts_ms >> 40);
+	uuid->data[1] = (unsigned char) (unix_ts_ms >> 32);
+	uuid->data[2] = (unsigned char) (unix_ts_ms >> 24);
+	uuid->data[3] = (unsigned char) (unix_ts_ms >> 16);
+	uuid->data[4] = (unsigned char) (unix_ts_ms >> 8);
+	uuid->data[5] = (unsigned char) unix_ts_ms;
+
+
+	uuid->data[6] = (unsigned char) (increased_clock_precision >> 6);
+	uuid->data[7] = (unsigned char) (increased_clock_precision << 2);
+
+	/* fill everything after the increased clock precision with random bytes */
+	if (!pg_strong_random(&uuid->data[8], UUID_LEN - 8))
+		ereport(ERROR,
+				(errcode(ERRCODE_INTERNAL_ERROR),
+				 errmsg("could not generate random values")));
+
+	/* Take 2 bits of entropy from overwritten part */
+	uuid->data[7] = uuid->data[7] | ((uuid->data[8] >> 6) & 3);
+
+	/*
+	 * Set magic numbers for a "version 7" (pseudorandom) UUID, see
+	 * https://www.rfc-editor.org/rfc/rfc9562#name-version-field
+	 */
+	/* set version field, top four bits are 0, 1, 1, 1 */
+	uuid->data[6] = (uuid->data[6] & 0x0f) | 0x70;
+	/* set variant field, top two bits are 1, 0 */
+	uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;
+
+	PG_RETURN_UUID_P(uuid);
+}
+
+/*
+ * Entry point for uuidv7()
+ */
+Datum
+uuidv7(PG_FUNCTION_ARGS)
+{
+   return generate_uuidv7(fcinfo);
+}
+
+/*
+ * Entry point for uuidv7(interval)
+ */
+Datum
+uuidv7_interval(PG_FUNCTION_ARGS)
+{
+   return generate_uuidv7(fcinfo);
+}
+
+/*
+ * Start of a Gregorian epoch == date2j(1582,10,15)
+ * We cast it to 64-bit because it's used in overflow-prone computations
+ */
+#define GREGORIAN_EPOCH_JDATE  INT64CONST(2299161)
 
 /*
  * Extract timestamp from UUID.
  *
- * Returns null if not RFC 4122 variant or not a version that has a timestamp.
+ * Returns null if not RFC 9562 variant or not a version that has a timestamp.
  */
 Datum
 uuid_extract_timestamp(PG_FUNCTION_ARGS)
@@ -436,7 +553,7 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS)
 	uint64		tms;
 	TimestampTz ts;
 
-	/* check if RFC 4122 variant */
+	/* check if RFC 9562 variant */
 	if ((uuid->data[8] & 0xc0) != 0x80)
 		PG_RETURN_NULL();
 
@@ -455,7 +572,22 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS)
 
 		/* convert 100-ns intervals to us, then adjust */
 		ts = (TimestampTz) (tms / 10) -
-			((uint64) POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+			((uint64) POSTGRES_EPOCH_JDATE - GREGORIAN_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+		PG_RETURN_TIMESTAMPTZ(ts);
+	}
+
+	if (version == 7)
+	{
+		tms = uuid->data[5];
+		tms += ((uint64) uuid->data[4]) << 8;
+		tms += ((uint64) uuid->data[3]) << 16;
+		tms += ((uint64) uuid->data[2]) << 24;
+		tms += ((uint64) uuid->data[1]) << 32;
+		tms += ((uint64) uuid->data[0]) << 40;
+
+		/* convert ms to us, then adjust */
+		ts = (TimestampTz) (tms * 1000) -
+			(POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
 
 		PG_RETURN_TIMESTAMPTZ(ts);
 	}
@@ -467,7 +599,7 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS)
 /*
  * Extract UUID version.
  *
- * Returns null if not RFC 4122 variant.
+ * Returns null if not RFC 9562 variant.
  */
 Datum
 uuid_extract_version(PG_FUNCTION_ARGS)
@@ -475,7 +607,7 @@ uuid_extract_version(PG_FUNCTION_ARGS)
 	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
 	uint16		version;
 
-	/* check if RFC 4122 variant */
+	/* check if RFC 9562 variant */
 	if ((uuid->data[8] & 0xc0) != 0x80)
 		PG_RETURN_NULL();
 
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 1ec0d6f6b5..3c426ca532 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9340,11 +9340,20 @@
 { oid => '3432', descr => 'generate random UUID',
   proname => 'gen_random_uuid', proleakproof => 't', provolatile => 'v',
   prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' },
+{ oid => '9895', descr => 'generate UUID version 4',
+  proname => 'uuidv4', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' },
+{ oid => '9896', descr => 'generate UUID version 7',
+  proname => 'uuidv7', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => '', prosrc => 'uuidv7' },
+{ oid => '9897', descr => 'generate UUID version 7 with a timestamp shifted on specific interval',
+  proname => 'uuidv7', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => 'interval', prosrc => 'uuidv7_interval' },
 { oid => '6342', descr => 'extract timestamp from UUID',
   proname => 'uuid_extract_timestamp', proleakproof => 't',
   prorettype => 'timestamptz', proargtypes => 'uuid',
   prosrc => 'uuid_extract_timestamp' },
-{ oid => '6343', descr => 'extract version from RFC 4122 UUID',
+{ oid => '6343', descr => 'extract version from RFC 9562 UUID',
   proname => 'uuid_extract_version', proleakproof => 't', prorettype => 'int2',
   proargtypes => 'uuid', prosrc => 'uuid_extract_version' },
 
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 34a32bd11d..43e7180a16 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -878,6 +878,9 @@ crc32(bytea)
 crc32c(bytea)
 bytea_larger(bytea,bytea)
 bytea_smaller(bytea,bytea)
+uuidv4()
+uuidv7()
+uuidv7(interval)
 -- restore normal output mode
 \a\t
 -- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 6026e15ed3..aa6224e81b 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -168,6 +168,27 @@ SELECT count(DISTINCT guid_field) FROM guid1;
      2
 (1 row)
 
+-- test of uuidv4() alias
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     2
+(1 row)
+
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+INSERT INTO guid1 (guid_field) VALUES (uuidv7(INTERVAL '1 day'));
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     3
+(1 row)
+
 -- extract functions
 -- version
 SELECT uuid_extract_version('11111111-1111-5111-8111-111111111111');  -- 5
@@ -188,8 +209,26 @@ SELECT uuid_extract_version('11111111-1111-1111-1111-111111111111');  -- null
                      
 (1 row)
 
+SELECT uuid_extract_version(uuidv4()); --4
+ uuid_extract_version 
+----------------------
+                    4
+(1 row)
+
+SELECT uuid_extract_version(uuidv7()); --7
+ uuid_extract_version 
+----------------------
+                    7
+(1 row)
+
 -- timestamp
-SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';  -- RFC 4122bis test vector
+SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v1
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v7
  ?column? 
 ----------
  t
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index c88f6d087a..eec7f160f8 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -85,6 +85,19 @@ INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 SELECT count(DISTINCT guid_field) FROM guid1;
 
+-- test of uuidv4() alias
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+SELECT count(DISTINCT guid_field) FROM guid1;
+
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+INSERT INTO guid1 (guid_field) VALUES (uuidv7(INTERVAL '1 day'));
+SELECT count(DISTINCT guid_field) FROM guid1;
+
 
 -- extract functions
 
@@ -92,9 +105,12 @@ SELECT count(DISTINCT guid_field) FROM guid1;
 SELECT uuid_extract_version('11111111-1111-5111-8111-111111111111');  -- 5
 SELECT uuid_extract_version(gen_random_uuid());  -- 4
 SELECT uuid_extract_version('11111111-1111-1111-1111-111111111111');  -- null
+SELECT uuid_extract_version(uuidv4()); --4
+SELECT uuid_extract_version(uuidv7()); --7
 
 -- timestamp
-SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';  -- RFC 4122bis test vector
+SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v1
+SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v7
 SELECT uuid_extract_timestamp(gen_random_uuid());  -- null
 SELECT uuid_extract_timestamp('11111111-1111-1111-1111-111111111111');  -- null
 
-- 
2.39.5 (Apple Git-154)

