Re: Performance implications of partitioning by UUIDv7 range in PostgreSQL v18
On Fri, 2025-10-24 at 11:54 +1300, David Rowley wrote: > On Fri, 24 Oct 2025 at 09:38, Laurenz Albe wrote: > > I recommend that you create a primary key on each partition rather than > > having one > > on the partitioned table. > > It might be worth mentioning that doing that would forego having the > ability to reference the partitioned table in a foreign key > constraint. Right, but referencing a partitioned table with a foreign key is a mixed blessing anyway: you could no longer drop partitions from the partitioned table without scanning the referencing table to verify that the foreign key is not violated. Yours, Laurenz Albe
Re: Performance implications of partitioning by UUIDv7 range in PostgreSQL v18
I think from a practical standpoint, partitioning directly on uuidv7 is
going to cause problems. You can't directly see the partition constraints,
you have to do tricks like your floor function to make it work, and you
have to be super careful in how you construct your where clauses. However,
what if you partition by the extracted timestamp? That way, queries are
simplified, timestamps will not span multiple tables, partitions are
human-readable again, and you can use pg_partman once more. Untested for
large-scale performance, but something like this:
\set ON_ERROR_STOP on
drop schema if exists gregtest cascade;
create schema gregtest;
set search_path = gregtest;
create table message (
id uuid
-- ... plus other columns
) partition by range (uuid_extract_timestamp(id));
create table message_2025_10_22 partition of message for values from
('2025-10-22') to ('2025-10-23');
create table message_2025_10_23 partition of message for values from
('2025-10-23') to ('2025-10-24');
create table message_2025_10_24 partition of message for values from
('2025-10-24') to ('2025-10-25');
create index m_2025_10_22_id on message_2025_10_22
(uuid_extract_timestamp(id));
create index m_2025_10_23_id on message_2025_10_23
(uuid_extract_timestamp(id));
create index m_2025_10_24_id on message_2025_10_24
(uuid_extract_timestamp(id));
-- Today:
insert into message select uuidv7() from generate_series(1, 111_000);
-- Yesterday:
insert into message select uuidv7('-1 day') from generate_series(1,
222_000);
-- Tomorrow:
insert into message select uuidv7('+1 day') from generate_series(1,
333_000);
set random_page_cost = 1.1; -- SSD rulez
vacuum analyze message;
select count(id) from only message;
select count(id) from message_2025_10_22;
select count(id) from message_2025_10_23;
select count(id) from message_2025_10_24;
explain select * from message where uuid_extract_timestamp(id) =
'2025-10-23 10:23:45';
explain select * from message where uuid_extract_timestamp(id)
between '2025-10-23 23:00:00' and '2025-10-24 03:00:00';
Which gives this output when run:
count
---
0
count
222000
count
111000
count
333000
QUERY PLAN
-
Index Scan using m_2025_10_23_id on message_2025_10_23 message
(cost=0.29..5.29 rows=160)
Index Cond: (uuid_extract_timestamp(id) = '2025-10-23
10:23:45-04'::timestamptz)
QUERY PLAN
---
Append (cost=0.29..5.04 rows=2)
-> Index Scan using m_2025_10_23_id on message_2025_10_23 message_1
(cost=0.29..2.51 rows=1)
Index Cond: ((uuid_extract_timestamp(id) >= '2025-10-23
23:00:00-04'::timestamptz)
AND (uuid_extract_timestamp(id) <= '2025-10-24
03:00:00-04'::timestamptz))
-> Index Scan using m_2025_10_24_id on message_2025_10_24 message_2
(cost=0.30..2.52 rows=1)
Index Cond: ((uuid_extract_timestamp(id) >= '2025-10-23
23:00:00-04'::timestamptz)
AND (uuid_extract_timestamp(id) <= '2025-10-24
03:00:00-04'::timestamptz))
Cheers,
Greg
--
Crunchy Data - https://www.crunchydata.com
Enterprise Postgres Software Products & Tech Support
Re: Performance implications of partitioning by UUIDv7 range in PostgreSQL v18
Greg,
Thank you very much for your recommendations and your sample code. I
originally had it your way, but then I found out this is not possible
create table message (
id uuid PRIMARY KEY
-- ... plus other columns
) partition by range (uuid_extract_timestamp(id));
whereas, this is
create table message (
id uuid PRIMARY KEY
-- ... plus other columns
) partition by range (id);
Also, I had a misunderstanding that in this query
explain select * from message where uuid_extract_timestamp(id)
between '2025-10-23 23:00:00' and '2025-10-24 03:00:00';
that having uuid_extract_timestamp(id) on the left was SARGable, but
clearly it is based on your sample.
Thank you again, this was very helpful.
On Thu, Oct 23, 2025 at 9:52 AM Greg Sabino Mullane
wrote:
> I think from a practical standpoint, partitioning directly on uuidv7 is
> going to cause problems. You can't directly see the partition constraints,
> you have to do tricks like your floor function to make it work, and you
> have to be super careful in how you construct your where clauses. However,
> what if you partition by the extracted timestamp? That way, queries are
> simplified, timestamps will not span multiple tables, partitions are
> human-readable again, and you can use pg_partman once more. Untested for
> large-scale performance, but something like this:
>
> \set ON_ERROR_STOP on
>
> drop schema if exists gregtest cascade;
> create schema gregtest;
> set search_path = gregtest;
>
> create table message (
> id uuid
> -- ... plus other columns
> ) partition by range (uuid_extract_timestamp(id));
>
> create table message_2025_10_22 partition of message for values from
> ('2025-10-22') to ('2025-10-23');
> create table message_2025_10_23 partition of message for values from
> ('2025-10-23') to ('2025-10-24');
> create table message_2025_10_24 partition of message for values from
> ('2025-10-24') to ('2025-10-25');
>
> create index m_2025_10_22_id on message_2025_10_22
> (uuid_extract_timestamp(id));
> create index m_2025_10_23_id on message_2025_10_23
> (uuid_extract_timestamp(id));
> create index m_2025_10_24_id on message_2025_10_24
> (uuid_extract_timestamp(id));
>
> -- Today:
> insert into message select uuidv7() from generate_series(1, 111_000);
> -- Yesterday:
> insert into message select uuidv7('-1 day') from generate_series(1,
> 222_000);
> -- Tomorrow:
> insert into message select uuidv7('+1 day') from generate_series(1,
> 333_000);
>
> set random_page_cost = 1.1; -- SSD rulez
> vacuum analyze message;
>
> select count(id) from only message;
> select count(id) from message_2025_10_22;
> select count(id) from message_2025_10_23;
> select count(id) from message_2025_10_24;
>
> explain select * from message where uuid_extract_timestamp(id) =
> '2025-10-23 10:23:45';
>
> explain select * from message where uuid_extract_timestamp(id)
> between '2025-10-23 23:00:00' and '2025-10-24 03:00:00';
>
>
> Which gives this output when run:
>
> count
> ---
> 0
>
> count
>
> 222000
>
> count
>
> 111000
>
> count
>
> 333000
>
>
> QUERY PLAN
>
> -
> Index Scan using m_2025_10_23_id on message_2025_10_23 message
> (cost=0.29..5.29 rows=160)
>Index Cond: (uuid_extract_timestamp(id) = '2025-10-23
> 10:23:45-04'::timestamptz)
>
>
> QUERY PLAN
>
> ---
> Append (cost=0.29..5.04 rows=2)
>-> Index Scan using m_2025_10_23_id on message_2025_10_23 message_1
> (cost=0.29..2.51 rows=1)
> Index Cond: ((uuid_extract_timestamp(id) >= '2025-10-23
> 23:00:00-04'::timestamptz)
> AND (uuid_extract_timestamp(id) <= '2025-10-24
> 03:00:00-04'::timestamptz))
>-> Index Scan using m_2025_10_24_id on message_2025_10_24 message_2
> (cost=0.30..2.52 rows=1)
> Index Cond: ((uuid_extract_timestamp(id) >= '2025-10-23
> 23:00:00-04'::timestamptz)
> AND (uuid_extract_timestamp(id) <= '2025-10-24
> 03:00:00-04'::timestamptz))
>
>
>
> Cheers,
> Greg
>
> --
> Crunchy Data - https://www.crunchydata.com
> Enterprise Postgres Software Products & Tech Support
>
>
Re: Performance implications of partitioning by UUIDv7 range in PostgreSQL v18
On Thu, 2025-10-23 at 13:11 -0700, Jonathan Reis wrote: > Thank you very much for your recommendations and your sample code. I > originally had it your way, but then I found out this is not possible > > create table message ( > id uuid PRIMARY KEY > -- ... plus other columns > ) partition by range (uuid_extract_timestamp(id)); That's because you want to make "id" a primary key, but you can only create a primary key constraint on a partitioned table if the partitioning key is a subset of the primary key. I recommend that you create a primary key on each partition rather than having one on the partitioned table. Yours, Laurenz Albe
Re: Performance implications of partitioning by UUIDv7 range in PostgreSQL v18
On Fri, 24 Oct 2025 at 09:38, Laurenz Albe wrote: > I recommend that you create a primary key on each partition rather than > having one > on the partitioned table. It might be worth mentioning that doing that would forego having the ability to reference the partitioned table in a foreign key constraint. David
Re: Performance implications of partitioning by UUIDv7 range in PostgreSQL v18
Thank you all for your input on this. Here is a summary of what I have
learned from you all.
Approach 1: partition on uuid_extract_timestamp(id)
Pros: No need for custom function to convert from timestamptz to uuidv7
Partitions are human-readable
Can use pg_partman
Cons: Cannot have a primary key on id
Cannot use foreign keys
Approach 2: partition on id
Pros: Can have a primary key on id
Can have foreign key constraints
Cons Require custom function to convert from timestamptz to uuidv7
Partitions ranges must be decoded to be human-readable
Can't use pg_partman (this is true?)
>From a cursory glance, they both seem to perform the same
with partition pruning and lookups. So, I can't decide which approach is
better. I am planning on doing some load testing within our product to see
if either performs better.
The main thing I don't like about Approach 2 is the need for a custom
uuidv7_floor function to calculate a uuidv7 from a timestamptz. It would be
nice to have this baked in.
Approach 1: (thank you Greg for this)
create table message (
id uuid
-- ... plus other columns
) partition by range (uuid_extract_timestamp(id));
create table message_2025_10_22 partition of message for values from
('2025-10-22') to ('2025-10-23');
create table message_2025_10_23 partition of message for values from
('2025-10-23') to ('2025-10-24');
create table message_2025_10_24 partition of message for values from
('2025-10-24') to ('2025-10-25');
create index m_2025_10_22_id on message_2025_10_22
(uuid_extract_timestamp(id));
create index m_2025_10_23_id on message_2025_10_23
(uuid_extract_timestamp(id));
create index m_2025_10_24_id on message_2025_10_24
(uuid_extract_timestamp(id));
-- Today:
insert into message select uuidv7() from generate_series(1, 111_000);
-- Yesterday:
insert into message select uuidv7('-1 day') from generate_series(1,
222_000);
-- Tomorrow:
insert into message select uuidv7('+1 day') from generate_series(1,
333_000);
set random_page_cost = 1.1; -- SSD rulez
vacuum analyze message;
select count(id) from only message;
select count(id) from message_2025_10_22;
select count(id) from message_2025_10_23;
select count(id) from message_2025_10_24;
explain select * from message where uuid_extract_timestamp(id) =
'2025-10-23 10:23:45';
explain select * from message where uuid_extract_timestamp(id)
between '2025-10-23 23:00:00' and '2025-10-24 03:00:00';
Approach 2: (thank you Greg for this, too)
create table message2 (
id uuid PRIMARY KEY
-- ... plus other columns
) partition by range (id);
create table message_2025_10_22 partition of message2 for values from
(uuidv7_floor('2025-10-22')) to (uuidv7_floor('2025-10-23'));
create table message_2025_10_23 partition of message2 for values from
(uuidv7_floor('2025-10-23')) to (uuidv7_floor('2025-10-24'));
create table message_2025_10_24 partition of message2 for values from
(uuidv7_floor('2025-10-24')) to (uuidv7_floor('2025-10-25'));
--Primary key handles this
--create index m_2025_10_22_id on message_2025_10_22 (id);
--create index m_2025_10_23_id on message_2025_10_23 (id);
--create index m_2025_10_24_id on message_2025_10_24 (id);
-- Today:
insert into message2 select uuidv7() from generate_series(1, 111_000);
-- Yesterday:
insert into message2 select uuidv7('-1 day') from generate_series(1,
222_000);
-- Tomorrow:
insert into message2 select uuidv7('+1 day') from generate_series(1,
333_000);
set random_page_cost = 1.1; -- SSD rulez
vacuum analyze message2;
explain select * from message2 where id
between uuidv7_floor('2025-10-23 23:00:00') and uuidv7_floor('2025-10-23
23:59:59');
-- is there a standard function for this?
CREATE OR REPLACE FUNCTION uuidv7_floor(ts timestamptz)
RETURNS uuid
LANGUAGE sql
IMMUTABLE
AS $$
WITH ms AS (
SELECT floor(extract(epoch FROM ts) * 1000)::bigint AS ms
),
h AS (
SELECT lpad(to_hex(ms), 12, '0') AS h FROM ms
)
SELECT (
substr(h.h,1,8) || '-' ||
substr(h.h,9,4) || '-' ||
'7000' || '-' || -- version 7 + rand_a all zero
'8000' || '-' || -- variant '10' + rest zero
'' -- zero node
)::uuid
FROM h;
$$;
On Thu, Oct 23, 2025 at 3:54 PM David Rowley wrote:
> On Fri, 24 Oct 2025 at 09:38, Laurenz Albe
> wrote:
> > I recommend that you create a primary key on each partition rather than
> having one
> > on the partitioned table.
>
> It might be worth mentioning that doing that would forego having the
> ability to reference the partitioned table in a foreign key
> constraint.
>
> David
>
