Re: Query planning read a large amount of buffers for partitioned tables

2025-02-17 Thread bruno vieira da silva
Well, the query plans were generated with pg 17.3. and the buffer usage was
half.
did pg 17.3 had any fixes to reduce the planning buffer usage?

On Mon, Feb 17, 2025 at 3:18 PM bruno vieira da silva 
wrote:

> Hello, I did a more comprehensive test with a different number of
> partitions and I found this:
>
> Summary buffers usage for the first call vs second call on the same
> session.
>
> Query 200, 100, 50, and 10 partitions:
> 200 Partitions: 12,828 (100MB)
> 100 Partitions:  9,329 (72MB)
>  50 Partitions:  3,305 (25MB)
>  10 Partitions:875 (7MB)
>
> Same query on the same session:
> 200 Partitions:205 (1.6MB)
> 100 Partitions:  5 (40KB)
> 50  Partitions:  5 (40KB)
> 10  Partitions:  5 (40KB)
>
> I did test on PG 17.3 no relevant changes.
>
> Question is, does it make sense?
>
> *these are the steps to reproduce it:*
>
> docker pull postgres:17.2
> docker run -itd -e POSTGRES_USER=bruno -e POSTGRES_PASSWORD=bruno -p
> 5500:5432 -v /home/bruno/pgdata17:/var/lib/postgresql/data --name
> postgresql postgres:17.2
> export PGHOST="localhost"
> export PGPORT=5500
> export PGDATABASE="postgres"
> export PGUSER="bruno"
> export PGPASSWORD="bruno"
>
> CREATE EXTENSION IF NOT EXISTS "pgcrypto"; -- Enables the gen_random_uuid
> function
>
> CREATE TABLE dicom_series (
> series_uid UUID DEFAULT gen_random_uuid(),
> series_description VARCHAR(255),
> modality VARCHAR(16),
> body_part_examined VARCHAR(64),
> patient_id VARCHAR(64),
> study_uid UUID DEFAULT gen_random_uuid(),
> created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
> );
>
> -- Create the parent table
> CREATE TABLE dicom_sops_100_part (
> sop_uid UUID NOT NULL,
> series_uid UUID NOT NULL,
> instance_number INT,
> image_position_patient TEXT,
> image_orientation_patient TEXT,
> slice_thickness DECIMAL(10, 2),
> slice_location DECIMAL(10, 2),
> pixel_spacing TEXT,
> rows INT,
> columns INT,
> acquisition_date DATE,
> acquisition_time TIME,
> created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
> ) PARTITION BY HASH (sop_uid);
>
> -- Create 100 partitions
> DO $$
> DECLARE
> partition_number INT;
> BEGIN
> FOR partition_number IN 0..99 LOOP
> EXECUTE format(
> 'CREATE TABLE dicom_sops_100_p%1$s PARTITION OF
> dicom_sops_100_part FOR VALUES WITH (MODULUS 100, REMAINDER %1$s);',
> partition_number
> );
> END LOOP;
> END $$;
>
> *Data population:*
>
> DO $$
> DECLARE
> series_count INT := 100; -- Number of series to create
> sops_per_series INT := 20;
> i INT;
> j INT;
> series_id UUID;
> sop_id UUID;
> BEGIN
> FOR i IN 1..series_count LOOP
> -- Insert into dicom_series table with a generated UUID
> INSERT INTO dicom_series (
> series_description,
> modality,
> body_part_examined,
> patient_id
> ) VALUES (
> 'Series Description ' || i,
> 'CT',
> 'Chest',
> 'PATIENT-' || i
> )
> RETURNING series_uid INTO series_id;
>
> FOR j IN 1..sops_per_series LOOP
> -- Insert into dicom_sops_200_part table with a generated UUID
> INSERT INTO dicom_sops_100_part (
> sop_uid,
> series_uid,
> instance_number,
> image_position_patient,
> image_orientation_patient,
> slice_thickness,
> slice_location,
> pixel_spacing,
> rows,
> columns,
> acquisition_date,
> acquisition_time
> ) VALUES (
> gen_random_uuid(),
> series_id,
> j,
> '(0.0, 0.0, ' || j || ')',
> '(1.0, 0.0, 0.0, 0.0, 1.0, 0.0)',
> 1.0,
> j * 5.0,
> '1.0\\1.0',
> 512,
> 512,
> CURRENT_DATE,
> CURRENT_TIME
> );
> END LOOP;
> END LOOP;
> END $$;
>
> *Add indexes and vacuum analyze:*
>
> CREATE UNIQUE INDEX idx_series_uid ON dicom_series(series_uid);
> CREATE INDEX dicom_sops_100_part_sop_uid_idx ON
> dicom_sops_100_part(sop_uid);
> CREATE INDEX dicom_sops_100_part_series_uid_idx ON
> dicom_sops_100_part(series_uid);
>
> vacuum freeze;
> analyze;
>
> *Testing:*
> disconnect and reconnect to the db with psql.
>
> Query used for test:
>
> drop table temp_series_id;CREATE TEMPORARY TABLE temp_series_id AS select
> series_uid from dicom_series order by random() limit 1; analyze
> temp_series_id;
> explain (analyze,buffers) select * from dicom_sops_100_part where
> series_uid = (select series_uid from temp_series_id);
>
> Query plan:
>
>
>QUERY PLAN
>
> ---

Re: Query planning read a large amount of buffers for partitioned tables

2025-02-17 Thread bruno vieira da silva
nevermind. The query plan was done on test data with 50 partitions.
Sorry for the confusion.

On Mon, Feb 17, 2025 at 3:25 PM bruno vieira da silva 
wrote:

> Well, the query plans were generated with pg 17.3. and the buffer usage
> was half.
> did pg 17.3 had any fixes to reduce the planning buffer usage?
>
> On Mon, Feb 17, 2025 at 3:18 PM bruno vieira da silva <
> [email protected]> wrote:
>
>> Hello, I did a more comprehensive test with a different number of
>> partitions and I found this:
>>
>> Summary buffers usage for the first call vs second call on the same
>> session.
>>
>> Query 200, 100, 50, and 10 partitions:
>> 200 Partitions: 12,828 (100MB)
>> 100 Partitions:  9,329 (72MB)
>>  50 Partitions:  3,305 (25MB)
>>  10 Partitions:875 (7MB)
>>
>> Same query on the same session:
>> 200 Partitions:205 (1.6MB)
>> 100 Partitions:  5 (40KB)
>> 50  Partitions:  5 (40KB)
>> 10  Partitions:  5 (40KB)
>>
>> I did test on PG 17.3 no relevant changes.
>>
>> Question is, does it make sense?
>>
>> *these are the steps to reproduce it:*
>>
>> docker pull postgres:17.2
>> docker run -itd -e POSTGRES_USER=bruno -e POSTGRES_PASSWORD=bruno -p
>> 5500:5432 -v /home/bruno/pgdata17:/var/lib/postgresql/data --name
>> postgresql postgres:17.2
>> export PGHOST="localhost"
>> export PGPORT=5500
>> export PGDATABASE="postgres"
>> export PGUSER="bruno"
>> export PGPASSWORD="bruno"
>>
>> CREATE EXTENSION IF NOT EXISTS "pgcrypto"; -- Enables the gen_random_uuid
>> function
>>
>> CREATE TABLE dicom_series (
>> series_uid UUID DEFAULT gen_random_uuid(),
>> series_description VARCHAR(255),
>> modality VARCHAR(16),
>> body_part_examined VARCHAR(64),
>> patient_id VARCHAR(64),
>> study_uid UUID DEFAULT gen_random_uuid(),
>> created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
>> );
>>
>> -- Create the parent table
>> CREATE TABLE dicom_sops_100_part (
>> sop_uid UUID NOT NULL,
>> series_uid UUID NOT NULL,
>> instance_number INT,
>> image_position_patient TEXT,
>> image_orientation_patient TEXT,
>> slice_thickness DECIMAL(10, 2),
>> slice_location DECIMAL(10, 2),
>> pixel_spacing TEXT,
>> rows INT,
>> columns INT,
>> acquisition_date DATE,
>> acquisition_time TIME,
>> created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
>> ) PARTITION BY HASH (sop_uid);
>>
>> -- Create 100 partitions
>> DO $$
>> DECLARE
>> partition_number INT;
>> BEGIN
>> FOR partition_number IN 0..99 LOOP
>> EXECUTE format(
>> 'CREATE TABLE dicom_sops_100_p%1$s PARTITION OF
>> dicom_sops_100_part FOR VALUES WITH (MODULUS 100, REMAINDER %1$s);',
>> partition_number
>> );
>> END LOOP;
>> END $$;
>>
>> *Data population:*
>>
>> DO $$
>> DECLARE
>> series_count INT := 100; -- Number of series to create
>> sops_per_series INT := 20;
>> i INT;
>> j INT;
>> series_id UUID;
>> sop_id UUID;
>> BEGIN
>> FOR i IN 1..series_count LOOP
>> -- Insert into dicom_series table with a generated UUID
>> INSERT INTO dicom_series (
>> series_description,
>> modality,
>> body_part_examined,
>> patient_id
>> ) VALUES (
>> 'Series Description ' || i,
>> 'CT',
>> 'Chest',
>> 'PATIENT-' || i
>> )
>> RETURNING series_uid INTO series_id;
>>
>> FOR j IN 1..sops_per_series LOOP
>> -- Insert into dicom_sops_200_part table with a generated UUID
>> INSERT INTO dicom_sops_100_part (
>> sop_uid,
>> series_uid,
>> instance_number,
>> image_position_patient,
>> image_orientation_patient,
>> slice_thickness,
>> slice_location,
>> pixel_spacing,
>> rows,
>> columns,
>> acquisition_date,
>> acquisition_time
>> ) VALUES (
>> gen_random_uuid(),
>> series_id,
>> j,
>> '(0.0, 0.0, ' || j || ')',
>> '(1.0, 0.0, 0.0, 0.0, 1.0, 0.0)',
>> 1.0,
>> j * 5.0,
>> '1.0\\1.0',
>> 512,
>> 512,
>> CURRENT_DATE,
>> CURRENT_TIME
>> );
>> END LOOP;
>> END LOOP;
>> END $$;
>>
>> *Add indexes and vacuum analyze:*
>>
>> CREATE UNIQUE INDEX idx_series_uid ON dicom_series(series_uid);
>> CREATE INDEX dicom_sops_100_part_sop_uid_idx ON
>> dicom_sops_100_part(sop_uid);
>> CREATE INDEX dicom_sops_100_part_series_uid_idx ON
>> dicom_sops_100_part(series_uid);
>>
>> vacuum freeze;
>> analyze;
>>
>> *Testing:*
>> disconnect and reconnect to the db with psql.
>>
>> Query used for test:
>>
>> drop table temp_series_id;CREATE TEMPORARY TABLE temp_series_id AS select
>> series_uid from dicom_

Re: Query planning read a large amount of buffers for partitioned tables

2025-02-17 Thread bruno vieira da silva
Hello, I did a more comprehensive test with a different number of
partitions and I found this:

Summary buffers usage for the first call vs second call on the same session.

Query 200, 100, 50, and 10 partitions:
200 Partitions: 12,828 (100MB)
100 Partitions:  9,329 (72MB)
 50 Partitions:  3,305 (25MB)
 10 Partitions:875 (7MB)

Same query on the same session:
200 Partitions:205 (1.6MB)
100 Partitions:  5 (40KB)
50  Partitions:  5 (40KB)
10  Partitions:  5 (40KB)

I did test on PG 17.3 no relevant changes.

Question is, does it make sense?

*these are the steps to reproduce it:*

docker pull postgres:17.2
docker run -itd -e POSTGRES_USER=bruno -e POSTGRES_PASSWORD=bruno -p
5500:5432 -v /home/bruno/pgdata17:/var/lib/postgresql/data --name
postgresql postgres:17.2
export PGHOST="localhost"
export PGPORT=5500
export PGDATABASE="postgres"
export PGUSER="bruno"
export PGPASSWORD="bruno"

CREATE EXTENSION IF NOT EXISTS "pgcrypto"; -- Enables the gen_random_uuid
function

CREATE TABLE dicom_series (
series_uid UUID DEFAULT gen_random_uuid(),
series_description VARCHAR(255),
modality VARCHAR(16),
body_part_examined VARCHAR(64),
patient_id VARCHAR(64),
study_uid UUID DEFAULT gen_random_uuid(),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Create the parent table
CREATE TABLE dicom_sops_100_part (
sop_uid UUID NOT NULL,
series_uid UUID NOT NULL,
instance_number INT,
image_position_patient TEXT,
image_orientation_patient TEXT,
slice_thickness DECIMAL(10, 2),
slice_location DECIMAL(10, 2),
pixel_spacing TEXT,
rows INT,
columns INT,
acquisition_date DATE,
acquisition_time TIME,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
) PARTITION BY HASH (sop_uid);

-- Create 100 partitions
DO $$
DECLARE
partition_number INT;
BEGIN
FOR partition_number IN 0..99 LOOP
EXECUTE format(
'CREATE TABLE dicom_sops_100_p%1$s PARTITION OF
dicom_sops_100_part FOR VALUES WITH (MODULUS 100, REMAINDER %1$s);',
partition_number
);
END LOOP;
END $$;

*Data population:*

DO $$
DECLARE
series_count INT := 100; -- Number of series to create
sops_per_series INT := 20;
i INT;
j INT;
series_id UUID;
sop_id UUID;
BEGIN
FOR i IN 1..series_count LOOP
-- Insert into dicom_series table with a generated UUID
INSERT INTO dicom_series (
series_description,
modality,
body_part_examined,
patient_id
) VALUES (
'Series Description ' || i,
'CT',
'Chest',
'PATIENT-' || i
)
RETURNING series_uid INTO series_id;

FOR j IN 1..sops_per_series LOOP
-- Insert into dicom_sops_200_part table with a generated UUID
INSERT INTO dicom_sops_100_part (
sop_uid,
series_uid,
instance_number,
image_position_patient,
image_orientation_patient,
slice_thickness,
slice_location,
pixel_spacing,
rows,
columns,
acquisition_date,
acquisition_time
) VALUES (
gen_random_uuid(),
series_id,
j,
'(0.0, 0.0, ' || j || ')',
'(1.0, 0.0, 0.0, 0.0, 1.0, 0.0)',
1.0,
j * 5.0,
'1.0\\1.0',
512,
512,
CURRENT_DATE,
CURRENT_TIME
);
END LOOP;
END LOOP;
END $$;

*Add indexes and vacuum analyze:*

CREATE UNIQUE INDEX idx_series_uid ON dicom_series(series_uid);
CREATE INDEX dicom_sops_100_part_sop_uid_idx ON
dicom_sops_100_part(sop_uid);
CREATE INDEX dicom_sops_100_part_series_uid_idx ON
dicom_sops_100_part(series_uid);

vacuum freeze;
analyze;

*Testing:*
disconnect and reconnect to the db with psql.

Query used for test:

drop table temp_series_id;CREATE TEMPORARY TABLE temp_series_id AS select
series_uid from dicom_series order by random() limit 1; analyze
temp_series_id;
explain (analyze,buffers) select * from dicom_sops_100_part where
series_uid = (select series_uid from temp_series_id);

Query plan:


 QUERY PLAN
-
 Append  (cost=1.43..423.26 rows=50 width=128) (actual time=2.565..27.216
rows=20 loops=1)
   Buffers: shared hit=50 read=118, local hit=1
   InitPlan 1
 ->  Seq Scan on temp_series_id  (cost=0.00..1.01 rows=1 width=16)
(actual time=0.006..0.007 rows=1 loops=1)
   Buffers: local hit=1
   ->  Index Scan using dicom_sops_100_p0_series_uid_idx on
dicom_sops_100_p0 dicom_sops_100_part_1  (cost=0.42..8.44 rows=1 width=128)
(actual time=0.84