From 25460815ccbfcd6f86d39beddc6c0aa7005fc0a4 Mon Sep 17 00:00:00 2001
From: "Andrey M. Borodin" <x4mmm@night.local>
Date: Sun, 20 Aug 2023 23:55:31 +0300
Subject: [PATCH v9] Implement UUID v7 as per IETF draft

This commit addes function to generate UUID v7.
This function optionally accepts datetime used to generate
next UUID.
Also we add a function to extract timestamp from UUID v7.

Authors: Andrey Borodin, Sergey Prokhorenko
---
 doc/src/sgml/func.sgml                   |  18 ++-
 src/backend/utils/adt/pseudotypes.c      |  12 +-
 src/backend/utils/adt/uuid.c             | 157 +++++++++++++++++++++++
 src/include/catalog/pg_proc.dat          |  14 ++
 src/test/regress/expected/opr_sanity.out |   4 +
 src/test/regress/expected/uuid.out       |  43 +++++++
 src/test/regress/sql/uuid.sql            |  22 ++++
 7 files changed, 267 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 210c7c0b02..d6c83cb13f 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14130,13 +14130,29 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
    <primary>gen_random_uuid</primary>
   </indexterm>
 
+  <indexterm>
+   <primary>gen_uuid_v7</primary>
+  </indexterm>
+
+  <indexterm>
+   <primary>uuid_v7_time</primary>
+  </indexterm>
+
   <para>
-   <productname>PostgreSQL</productname> includes one function to generate a UUID:
+   <productname>PostgreSQL</productname> includes two functions to generate a UUID:
 <synopsis>
 <function>gen_random_uuid</function> () <returnvalue>uuid</returnvalue>
 </synopsis>
    This function returns a version 4 (random) UUID.  This is the most commonly
    used type of UUID and is appropriate for most applications.
+<synopsis>
+<function>gen_uuid_v7</function> () <returnvalue>uuid</returnvalue>
+</synopsis>
+   This function returns a version 7 (time-ordered + random) UUID.
+<synopsis>
+<function>uuid_v7_time</function> (uuid) <returnvalue>timestamptz</returnvalue>
+</synopsis>
+   This function extracts a timestamptz from UUID version 7.
   </para>
 
   <para>
diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c
index a3a991f634..82c2d1309d 100644
--- a/src/backend/utils/adt/pseudotypes.c
+++ b/src/backend/utils/adt/pseudotypes.c
@@ -23,6 +23,7 @@
 #include "postgres.h"
 
 #include "libpq/pqformat.h"
+#include "miscadmin.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/rangetypes.h"
@@ -332,11 +333,18 @@ shell_out(PG_FUNCTION_ARGS)
  *
  * We must disallow input of pg_node_tree values because the SQL functions
  * that operate on the type are not secure against malformed input.
- * We do want to allow output, though.
+ * We do want to allow output, though. Also we need input during bootstrap.
  */
-PSEUDOTYPE_DUMMY_INPUT_FUNC(pg_node_tree);
 PSEUDOTYPE_DUMMY_RECEIVE_FUNC(pg_node_tree);
 
+Datum
+pg_node_tree_in(PG_FUNCTION_ARGS)
+{
+	if (!IsBootstrapProcessingMode())
+		elog(ERROR, "cannot accept a value of type pg_node_tree_in");
+	return textin(fcinfo);
+}
+
 Datum
 pg_node_tree_out(PG_FUNCTION_ARGS)
 {
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 73dfd711c7..66d9672dd1 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -13,6 +13,9 @@
 
 #include "postgres.h"
 
+#include <sys/time.h>
+
+#include "access/xlog.h"
 #include "common/hashfn.h"
 #include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
@@ -20,6 +23,7 @@
 #include "utils/builtins.h"
 #include "utils/guc.h"
 #include "utils/sortsupport.h"
+#include "utils/timestamp.h"
 #include "utils/uuid.h"
 
 /* sortsupport for uuid */
@@ -421,3 +425,156 @@ gen_random_uuid(PG_FUNCTION_ARGS)
 
 	PG_RETURN_UUID_P(uuid);
 }
+
+static uint32_t sequence_counter;
+static uint64_t previous_timestamp = 0;
+
+
+Datum
+gen_uuid_v7(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = palloc(UUID_LEN);
+	TimestampTz ts;
+	uint64_t tms;
+	struct timeval tp;
+	bool increment_counter;
+
+	if (PG_NARGS() == 0 || PG_ARGISNULL(0))
+	{
+		gettimeofday(&tp, NULL);
+		tms = ((uint64_t)tp.tv_sec) * 1000 + (tp.tv_usec) / 1000;
+		/* time from clock is protected from backward leaps */
+		increment_counter = tms <= previous_timestamp;
+	}
+	else
+	{
+		ts = PG_GETARG_TIMESTAMPTZ(0);
+		tms = (ts + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC) / 1000;
+		/*
+		 * The time can leap backwards when provided by the user, so we use
+		 * counter only when called with exactly same unix_ts_ms argument.
+		 */
+		increment_counter = (tms == previous_timestamp);
+	}
+
+	if (increment_counter)
+	{
+		/* Time did not increment from the previous generation, we must increment counter */
+		++sequence_counter;
+		if (sequence_counter > 0x3ffff)
+		{
+			/* We only have 18-bit counter */
+			sequence_counter = 0;
+			previous_timestamp++;
+		}
+
+		/* protection from leap backward */
+		tms = previous_timestamp;
+
+		/* fill everything after the timestamp and counter with random bytes */
+		if (!pg_strong_random(&uuid->data[8], UUID_LEN - 8))
+			ereport(ERROR,
+					(errcode(ERRCODE_INTERNAL_ERROR),
+					errmsg("could not generate random values")));
+
+		/* most significant 4 bits of 18-bit counter */
+		uuid->data[6] = (unsigned char)(sequence_counter >> 14);
+		/* next 8 bits */
+		uuid->data[7] = (unsigned char)(sequence_counter >> 6);
+		/* least significant 6 bits */
+		uuid->data[8] = (unsigned char)(sequence_counter);
+	}
+	else
+	{
+		/* fill everything after the timestamp with random bytes */
+		if (!pg_strong_random(&uuid->data[6], UUID_LEN - 6))
+			ereport(ERROR,
+					(errcode(ERRCODE_INTERNAL_ERROR),
+					errmsg("could not generate random values")));
+
+		/*
+		 * Left-most counter bits are initialized as zero for the sole purpose
+		 * of guarding against counter rollovers.
+		 * See section "Fixed-Length Dedicated Counter Seeding"
+		 * https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis-09#monotonicity_counters
+		 */
+		uuid->data[6] = (uuid->data[6] & 0xf7);
+
+		sequence_counter = ((uint32_t)uuid->data[8] & 0x3f) +
+							(((uint32_t)uuid->data[7]) << 6) +
+							(((uint32_t)uuid->data[6] & 0x0f) << 14);
+
+		previous_timestamp = tms;
+	}
+
+	/* Fill in time part */
+	uuid->data[0] = (unsigned char)(tms >> 40);
+	uuid->data[1] = (unsigned char)(tms >> 32);
+	uuid->data[2] = (unsigned char)(tms >> 24);
+	uuid->data[3] = (unsigned char)(tms >> 16);
+	uuid->data[4] = (unsigned char)(tms >> 8);
+	uuid->data[5] = (unsigned char)tms;
+
+	/*
+	 * Set magic numbers for a "version 7" (pseudorandom) UUID, see
+	 * http://tools.ietf.org/html/rfc ???
+	 * https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format#name-creating-a-uuidv7-value
+	 */
+	/* set version field, top four bits are 0, 1, 1, 1 */
+	uuid->data[6] = (uuid->data[6] & 0x0f) | 0x70;
+	/* set variant field, top two bits are 1, 0 */
+	uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;
+
+	PG_RETURN_UUID_P(uuid);
+}
+
+Datum
+uuid_v7_time(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
+	TimestampTz ts;
+	uint64_t tms;
+
+	if (((uuid->data[6] & 0xf0) != 0x70)
+		|| ((uuid->data[8] & 0xc0) != 0x80))
+		elog(ERROR,"uuid_v7_time() can only extract timestamp from UUID v7");
+
+	tms =			  uuid->data[5];
+	tms += ((uint64_t)uuid->data[4]) << 8;
+	tms += ((uint64_t)uuid->data[3]) << 16;
+	tms += ((uint64_t)uuid->data[2]) << 24;
+	tms += ((uint64_t)uuid->data[1]) << 32;
+	tms += ((uint64_t)uuid->data[0]) << 40;
+
+	ts = (TimestampTz) (tms * 1000) - /* convert ms to us, than adjust */
+		(POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+
+	PG_RETURN_TIMESTAMPTZ(ts);
+}
+
+Datum
+uuid_ver(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
+	TimestampTz ts;
+	uint64_t tms;
+	uint16_t result;
+
+	if ((uuid->data[8] & 0xc0) != 0x80)
+		elog(ERROR,"uuid_ver() is only defined for RFC 4122 variants");
+	result = uuid->data[6] >> 4;
+
+	PG_RETURN_UINT16(result);
+}
+
+Datum
+uuid_var(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
+	TimestampTz ts;
+	uint64_t tms;
+	uint16_t result;
+	result = uuid->data[8] >> 6;
+
+	PG_RETURN_UINT16(result);
+}
\ No newline at end of file
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 58811a6530..c00cd1320f 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9174,6 +9174,20 @@
 { oid => '3432', descr => 'generate random UUID',
   proname => 'gen_random_uuid', proleakproof => 't', provolatile => 'v',
   prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' },
+{ oid => '9895', descr => 'generate UUID version 7', proisstrict => 'f',
+  proname => 'gen_uuid_v7', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => 'timestamptz', prosrc => 'gen_uuid_v7',
+  proargnames => '{unix_ts_ms}', pronargdefaults => 1, proargmodes => '{i}',
+  proargdefaults => '({CONST :consttype 1184 :consttypmod -1 :constcollid 0 :constlen 8 :constbyval true :constisnull true :location 46 :constvalue <>})' },
+{ oid => '9896', descr => 'extract timestamp from UUID version 7',
+  proname => 'uuid_v7_time', proleakproof => 't', provolatile => 'i',
+  prorettype => 'timestamptz', proargtypes => 'uuid', prosrc => 'uuid_v7_time' },
+{ oid => '9897', descr => 'extract version from RFC 4122 UUID',
+  proname => 'uuid_ver', proleakproof => 't', provolatile => 'i',
+  prorettype => 'int2', proargtypes => 'uuid', prosrc => 'uuid_ver' },
+{ oid => '9898', descr => 'extract variant from UUID',
+  proname => 'uuid_var', proleakproof => 't', provolatile => 'i',
+  prorettype => 'int2', proargtypes => 'uuid', prosrc => 'uuid_var' },
 
 # pg_lsn
 { oid => '3229', descr => 'I/O',
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 7610b011d6..163658f002 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -872,6 +872,10 @@ xid8ge(xid8,xid8)
 xid8eq(xid8,xid8)
 xid8ne(xid8,xid8)
 xid8cmp(xid8,xid8)
+gen_uuid_v7(timestamp with time zone)
+uuid_v7_time(uuid)
+uuid_ver(uuid)
+uuid_var(uuid)
 -- restore normal output mode
 \a\t
 -- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 8e7f21910d..b8426b1f8e 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -168,5 +168,48 @@ SELECT count(DISTINCT guid_field) FROM guid1;
      2
 (1 row)
 
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (gen_uuid_v7());
+INSERT INTO guid1 (guid_field) VALUES (gen_uuid_v7());
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     2
+(1 row)
+
+-- generation test for v7 with same unix_ts_ms
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (gen_uuid_v7(now()));
+INSERT INTO guid1 (guid_field) VALUES (gen_uuid_v7(now()));
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     2
+(1 row)
+
+-- check that timestamp is extracted correctly
+SELECT uuid_v7_time(gen_uuid_v7(TIMESTAMP '2024-01-16 13:37:00')) - TIMESTAMP '2024-01-16 13:37:00';
+ ?column? 
+----------
+ @ 0
+(1 row)
+
+-- support functions for UUID versions and variants
+SELECT uuid_ver(gen_uuid_v7());
+ uuid_ver 
+----------
+        7
+(1 row)
+
+SELECT uuid_var(gen_uuid_v7());
+ uuid_var 
+----------
+        2
+(1 row)
+
+-- uuid_v7_time() must refuse to accept non-UUIDv7
+select uuid_v7_time(gen_random_uuid());
+ERROR:  uuid_v7_time() can only extract timestamp from UUID v7
 -- clean up
 DROP TABLE guid1, guid2 CASCADE;
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index 9a8f437c7d..fb28766ece 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -85,5 +85,27 @@ INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 SELECT count(DISTINCT guid_field) FROM guid1;
 
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (gen_uuid_v7());
+INSERT INTO guid1 (guid_field) VALUES (gen_uuid_v7());
+SELECT count(DISTINCT guid_field) FROM guid1;
+
+-- generation test for v7 with same unix_ts_ms
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (gen_uuid_v7(now()));
+INSERT INTO guid1 (guid_field) VALUES (gen_uuid_v7(now()));
+SELECT count(DISTINCT guid_field) FROM guid1;
+
+-- check that timestamp is extracted correctly
+SELECT uuid_v7_time(gen_uuid_v7(TIMESTAMP '2024-01-16 13:37:00')) - TIMESTAMP '2024-01-16 13:37:00';
+
+-- support functions for UUID versions and variants
+SELECT uuid_ver(gen_uuid_v7());
+SELECT uuid_var(gen_uuid_v7());
+
+-- uuid_v7_time() must refuse to accept non-UUIDv7
+select uuid_v7_time(gen_random_uuid());
+
 -- clean up
 DROP TABLE guid1, guid2 CASCADE;
-- 
2.37.1 (Apple Git-137.1)

