Query plan prefers hash join when nested loop is much faster

2020-08-21 Thread iulian dragos
Hi,

I am trying to understand why the query planner insists on using a hash
join, and how to make it choose the better option, which in this case would
be a nested loop. I have two tables:

// about 200 million rows
CREATE TABLE module_result(
*id* bigserial PRIMARY KEY,
name_id bigint NOT NULL references result_name(id),
*run_id* integer NOT NULL references run (id),
logs text NOT NULL,
status result_status NOT NULL
);
CREATE INDEX ON module_result (run_id);

// 500 million rows
CREATE TABLE test_result(
id bigserial PRIMARY KEY,
name_id bigint NOT NULL references result_name(id),
*module_result_id* bigint NOT NULL references module_result (id),
seconds float NOT NULL,
failure_msg text, -- Either a ... or an   Gather Merge  (cost=7771702.73..7771804.02 rows=6 width=12) (actual
time=32341.970..32343.222 rows=6 loops=1)
   |
| Workers Planned: 2

|
| Workers Launched: 2

   |
| ->  Partial GroupAggregate  (cost=7770702.71..7770803.30 rows=3
width=12) (actual time=32340.278..32340.286 rows=2 loops=3)
 |
|   Group Key: test_result.status

   |
|   ->  Sort  (cost=7770702.71..7770736.23 rows=13408 width=4)
(actual time=32339.698..32339.916 rows=4941 loops=3)
 |
| Sort Key: test_result.status

|
| Sort Method: quicksort  Memory: 431kB

   |
| Worker 0:  Sort Method: quicksort  Memory: 433kB

|
| Worker 1:  Sort Method: quicksort  Memory: 409kB

|
| ->  Hash Join  (cost=586.15..7769783.54 rows=13408
width=4) (actual time=18112.078..32339.011 rows=4941 loops=3)
  |
|   Hash Cond: (test_result.module_result_id =
module_result.id)
|
|   ->  Parallel Seq Scan on test_result
 (cost=0.00..7145224.72 rows=237703872 width=12) (actual
time=0.034..15957.894 rows=190207740 loops=3)|
|   ->  Hash  (cost=438.41..438.41 rows=11819
width=8) (actual time=3.905..3.905 rows=14824 loops=3)
  |
| Buckets: 16384  Batches: 1  Memory Usage:
708kB
   |
| ->  Index Scan using
module_result_run_id_idx on module_result  (cost=0.57..438.41 rows=11819
width=8) (actual time=0.017..2.197 rows=14824 loops=3) |
|   Index Cond: (run_id = 158523)

   |
| Planning Time: 0.178 ms

   |
| Execution Time: 32343.330 ms

|
+--+
EXPLAIN
Time: 32.572s (32 seconds), executed in: 32.551s (32 seconds)

This plan takes about 30s to execute. If I turn off seqscan, I get a nested
loop join that takes about 0.02s to execute:

set enable_seqscan = off


SET
Time: 0.305s
> explain analyze select test_result.status, count(test_result.status) as
"Count"  from test_result inner join module_result ON module_result.id =
test_result.module_result_id where module_resul
 t.run_id=158523   group by test_result.status


+---+
| QUERY PLAN

 |
|---|
| Finalize GroupAggregate  (cost=34297042.16..34297143.50 rows=3 width=12)
(actual time=15.014..15.015 rows=2 loops=1)
 |
|   Group Key: test_result.status

|
|   ->  Gather Merge  (cost=34297042.16..34297143.44 rows=6 width=12)
(actual time=15.005..15.850 rows=6 loops=1)
  |
| Workers Planned: 2

 |
| Workers Launched: 2

|
| ->  Partial GroupAggregate  (cost=34296042.13..34296142.72 rows=3
width=12) (actual time=12.937..12.940 rows=2 loops=3)
|
|   Group Key: test_result.status

|
|   ->  Sort  (cost=34296042.13..34296075.65 rows=13408
width=4) (actual time=12.339..12.559 rows=4941 loops=3)
 

Query plan prefers hash join when nested loop is much faster

2020-08-21 Thread iulian dragos
Hi,

I am trying to understand why the query planner insists on using a hash
join, and how to make it choose the better option, which in this case would
be a nested loop. I have two tables:

// about 200 million rows
CREATE TABLE module_result(
*id* bigserial PRIMARY KEY,
name_id bigint NOT NULL references result_name(id),
*run_id* integer NOT NULL references run (id),
logs text NOT NULL,
status result_status NOT NULL
);
CREATE INDEX ON module_result (run_id);

// 500 million rows
CREATE TABLE test_result(
id bigserial PRIMARY KEY,
name_id bigint NOT NULL references result_name(id),
*module_result_id* bigint NOT NULL references module_result (id),
seconds float NOT NULL,
failure_msg text, -- Either a ... or an   Gather Merge  (cost=7771702.73..7771804.02 rows=6 width=12) (actual
time=32341.970..32343.222 rows=6 loops=1)
   |
| Workers Planned: 2

|
| Workers Launched: 2

   |
| ->  Partial GroupAggregate  (cost=7770702.71..7770803.30 rows=3
width=12) (actual time=32340.278..32340.286 rows=2 loops=3)
 |
|   Group Key: test_result.status

   |
|   ->  Sort  (cost=7770702.71..7770736.23 rows=13408 width=4)
(actual time=32339.698..32339.916 rows=4941 loops=3)
 |
| Sort Key: test_result.status

|
| Sort Method: quicksort  Memory: 431kB

   |
| Worker 0:  Sort Method: quicksort  Memory: 433kB

|
| Worker 1:  Sort Method: quicksort  Memory: 409kB

|
| ->  Hash Join  (cost=586.15..7769783.54 rows=13408
width=4) (actual time=18112.078..32339.011 rows=4941 loops=3)
  |
|   Hash Cond: (test_result.module_result_id =
module_result.id)
|
|   ->  Parallel Seq Scan on test_result
 (cost=0.00..7145224.72 rows=237703872 width=12) (actual
time=0.034..15957.894 rows=190207740 loops=3)|
|   ->  Hash  (cost=438.41..438.41 rows=11819
width=8) (actual time=3.905..3.905 rows=14824 loops=3)
  |
| Buckets: 16384  Batches: 1  Memory Usage:
708kB
   |
| ->  Index Scan using
module_result_run_id_idx on module_result  (cost=0.57..438.41 rows=11819
width=8) (actual time=0.017..2.197 rows=14824 loops=3) |
|   Index Cond: (run_id = 158523)

   |
| Planning Time: 0.178 ms

   |
| Execution Time: 32343.330 ms

|
+--+
EXPLAIN
Time: 32.572s (32 seconds), executed in: 32.551s (32 seconds)

This plan takes about 30s to execute. If I turn off seqscan, I get a nested
loop join that takes about 0.02s to execute:

set enable_seqscan = off


SET
Time: 0.305s
> explain analyze select test_result.status, count(test_result.status) as
"Count"  from test_result inner join module_result ON module_result.id =
test_result.module_result_id where module_resul
 t.run_id=158523   group by test_result.status


+---+
| QUERY PLAN

 |
|---|
| Finalize GroupAggregate  (cost=34297042.16..34297143.50 rows=3 width=12)
(actual time=15.014..15.015 rows=2 loops=1)
 |
|   Group Key: test_result.status

|
|   ->  Gather Merge  (cost=34297042.16..34297143.44 rows=6 width=12)
(actual time=15.005..15.850 rows=6 loops=1)
  |
| Workers Planned: 2

 |
| Workers Launched: 2

|
| ->  Partial GroupAggregate  (cost=34296042.13..34296142.72 rows=3
width=12) (actual time=12.937..12.940 rows=2 loops=3)
|
|   Group Key: test_result.status

|
|   ->  Sort  (cost=34296042.13..34296075.65 rows=13408
width=4) (actual time=12.339..12.559 rows=4941 loops=3)
 

Re: Query plan prefers hash join when nested loop is much faster

2020-08-24 Thread iulian dragos
Hi Michael,

Thanks for the answer. It's an RDS instance using SSD storage and the
default `random_page_cost` set to 4.0. I don't expect a lot of repetitive
queries here, so I think caching may not be extremely useful. I wonder if
the selectivity of the query is wrongly estimated (out of 500 million rows,
only a few thousands are returned).

I tried lowering the `random_page_cost` to 1.2 and it didn't make a
difference in the query plan.

iulian


On Fri, Aug 21, 2020 at 6:30 PM Michael Lewis  wrote:

> Your system is preferring sequential scan to
> using test_result_module_result_id_idx in this case. What type of storage
> do you use, what type of cache hits do you expect, and what do you have
> random_page_cost set to? That comes to mind as a significant factor in
> choosing index scans based on costs.
>


Re: Query plan prefers hash join when nested loop is much faster

2020-08-24 Thread iulian dragos
On Mon, Aug 24, 2020 at 4:21 PM iulian dragos 
wrote:

> Hi Michael,
>
> Thanks for the answer. It's an RDS instance using SSD storage and the
> default `random_page_cost` set to 4.0. I don't expect a lot of repetitive
> queries here, so I think caching may not be extremely useful. I wonder if
> the selectivity of the query is wrongly estimated (out of 500 million rows,
> only a few thousands are returned).
>
> I tried lowering the `random_page_cost` to 1.2 and it didn't make a
> difference in the query plan.
>

I experimented a bit more with different values for this setting. The only
way I could make it use the index was to use a value strictly less than
`seq_page_cost` (0.8 for instance). That doesn't sound right, though.

The size of the effective_cache_size is fairly high as well (32 GB) for an
instance with 64GB (db.m5.4xlarge).

iulian

>


Re: Query plan prefers hash join when nested loop is much faster

2020-08-25 Thread iulian dragos
On Tue, Aug 25, 2020 at 12:27 AM David Rowley  wrote:

> On Sat, 22 Aug 2020 at 00:35, iulian dragos
>  wrote:
> > I am trying to understand why the query planner insists on using a hash
> join, and how to make it choose the better option, which in this case would
> be a nested loop.
>
> > |   ->  Index Scan using
> test_result_module_result_id_idx on test_result  (cost=0.57..6911.17
> rows=4331 width=12) (actual time=0.002..0.002 rows=1 loops=14824) |
> > | Index Cond: (module_result_id =
> module_result.id)
>  |
>
> You might want to check if the pg_stats view reports a realistic
> n_distinct value for test_result.module_result_id.  If the
> pg_class.retuples is correct for that relation then that would
> indicate the n_distinct estimate is about 115000. Going by the number
> of rows you've mentioned it would appear a more realistic value for
> that would be -0.4. which is 0 - 1 / (5 / 2.0).
> However, that's assuming each module_result  has a test_result.  You
> could run a SELECT COUNT(DISTINCT module_result_id) FROM test_result;
> to get a better idea.
>
> If ANALYZE is not getting you a good value for n_distinct, then you
> can overwrite it. See [1], search for n_distinct.
>

Thanks for the tip! Indeed, `n_distinct` isn't right. I found it in
pg_stats set at 131736.0, but the actual number is much higher: 210104361.
I tried to set it manually, but the plan is still the same (both the actual
number and a percentage, -0.4, as you suggested):

> ALTER TABLE test_result ALTER COLUMN module_result_id SET
(n_distinct=210104361)

You're about to run a destructive command.
Do you want to proceed? (y/n): y
Your call!
ALTER TABLE
Time: 0.205s



>
> David
>
> [1] https://www.postgresql.org/docs/current/sql-altertable.html
>


Re: Query plan prefers hash join when nested loop is much faster

2020-08-25 Thread iulian dragos
On Tue, Aug 25, 2020 at 12:36 PM David Rowley  wrote:

> On Tue, 25 Aug 2020 at 22:10, iulian dragos
>  wrote:
> > Thanks for the tip! Indeed, `n_distinct` isn't right. I found it in
> pg_stats set at 131736.0, but the actual number is much higher: 210104361.
> I tried to set it manually, but the plan is still the same (both the actual
> number and a percentage, -0.4, as you suggested):
>
> You'll need to run ANALYZE on the table after doing the ALTER TABLE to
> change the n_distinct.  The ANALYZE writes the value to pg_statistic.
> ALTER TABLE only takes it as far as pg_attribute's attoptions.
> ANALYZE reads that column to see if the n_distinct estimate should be
> overwritten before writing out pg_statistic
>

Ah, rookie mistake. Thanks for clarifying this. Indeed, after I ran ANALYZE
the faster plan was selected! Yay!


> Just remember if you're hardcoding a positive value that it'll stay
> fixed until you change it. If the table is likely to grow, then you
> might want to reconsider using a positive value and consider using a
> negative value as mentioned in the doc link.
>

Good point, I went for -0.4 and that seems to be doing the trick!

Thanks a lot for helping out!


>
> David
>


How to reduce query planning time (10s)

2021-12-20 Thread iulian dragos
Hi,

I was analyzing the query performance in a certain code path and noticed
that practically all of the query time is spent planning (11s planning,
200ms execution time). Here is the output of EXPLAIN ANALYZE. I tried using
a prepared statement with three parameters (run_id, and the two text
comparisons) and the performance is similar, even when run multiple times
with the same parameters. Could someone give me some hints on how to speed
this query up?

 explain analyze SELECT x13.run_id, x19.text, x13.id, x17.status

 FROM module_result x13

 INNER JOIN result_name x14 ON x13.name_id = x14.id

 INNER JOIN test_result x17 ON x13.id = x17.module_result_id

 INNER JOIN result_name x19 ON x17.name_id = x19.id

 WHERE x19.text IN ('devtools/devbox/devbox_test_scalastyle') AND x13.run_id
IN (3769263) AND x14.text = 'Lint-Pr'

+--+

| QUERY PLAN
|

|--|

| Nested Loop  (cost=993.29..160107.01 rows=1 width=557) (actual
time=0.147..0.148 rows=0 loops=1)
  |

|   Join Filter: (x13.id = x17.module_result_id)
|

|   ->  Nested Loop  (cost=2.27..18.32 rows=1 width=12) (actual
time=0.147..0.147 rows=0 loops=1)
|

| ->  Index Scan using result_name_text_key on result_name x14
 (cost=1.69..9.71
rows=1 width=8) (actual time=0.146..0.147 rows=0 loops=1) |

|   Index Cond: (text = 'Lint-Pr'::text)
|

| ->  Index Scan using module_result_run_id_name_id_idx on
module_result x13  (cost=0.57..8.59 rows=1 width=20) (never executed)
|

|   Index Cond: ((run_id = 3769263) AND (name_id = x14.id))
|

|   ->  Nested Loop  (cost=991.02..160087.73 rows=77 width=553) (never
executed)
|

| ->  Index Scan using result_name_text_key on result_name x19
 (cost=1.69..9.71
rows=1 width=549) (never executed)|

|   Index Cond: (text =
'devtools/devbox/devbox_test_scalastyle'::text)
|

| ->  Bitmap Heap Scan on test_result x17  (cost=989.33..159674.48
rows=40354 width=20) (never executed)   |

|   Recheck Cond: (name_id = x19.id)
|

|   ->  Bitmap Index Scan on test_result_name_id_idx
(cost=0.00..979.24
rows=40354 width=0) (never executed)  |

| Index Cond: (name_id = x19.id)
|

| Planning Time: 11257.713 ms
|

| Execution Time: 0.204 ms
|

+--+

EXPLAIN

Time: 11.505s (11 seconds), executed in: 11.496s (11 seconds)



explain analyze execute test_history_prep2(3769263, 'Lint-Pr',
'devtools/devbox/devbox_test_scalastyle')

+--+

| QUERY PLAN
|

|--|

| Nested Loop  (cost=993.30..160114.89 rows=1 width=557) (actual
time=0.173..0.173 rows=0 loops=1)
  |

|   Join Filter: (x13.id = x17.module_result_id)
|

|   ->  Nested Loop  (cost=2.27..18.32 rows=1 width=12) (actual
time=0.172..0.173 rows=0 loops=1)
|

| ->  Index Scan using result_name_text_key on result_name x14
 (cost=1.69..9.71
rows=1 width=8) (actual time=0.172..0.172 rows=0 loops=1) |

|   Index Cond: (text = 'Lint-Pr'::text)
|

| ->  Index Scan using module_result_run_id_name_id_idx on
module_result x13  (cost=0.57..8.59 rows=1 width=20) (never executed)
|

|   Index Cond: ((run_id = 3769263) AND (name_id = x14.id))
|

|   ->  Nested Loop  (cost=991.03..160095.61 rows=77 width=553) (never
executed)
|

| ->  Index Scan using result_n

Re: How to reduce query planning time (10s)

2021-12-20 Thread iulian dragos
Thanks for the suggestion!

Reading through the docs it looks like this might involve some downtime
(locking writes to the tables whos indexes are being rebuilt) and this is a
running system. I may need some time to setup a database copy and try to
reproduce the issue (dev deployment doesn't exhibit the performance
problem, so it must be related to table contents).

Is there any way to validate the bloated index hypothesis before I invest
too much in this direction?

iulian


On Mon, Dec 20, 2021 at 1:42 PM Pavel Stehule 
wrote:

> Hi
>
> po 20. 12. 2021 v 13:31 odesílatel iulian dragos <
> iulian.dra...@databricks.com> napsal:
>
>> Hi,
>>
>> I was analyzing the query performance in a certain code path and noticed
>> that practically all of the query time is spent planning (11s planning,
>> 200ms execution time). Here is the output of EXPLAIN ANALYZE. I tried using
>> a prepared statement with three parameters (run_id, and the two text
>> comparisons) and the performance is similar, even when run multiple times
>> with the same parameters. Could someone give me some hints on how to speed
>> this query up?
>>
>>  explain analyze SELECT x13.run_id, x19.text, x13.id, x17.status
>>
>>  FROM module_result x13
>>
>>  INNER JOIN result_name x14 ON x13.name_id = x14.id
>>
>>  INNER JOIN test_result x17 ON x13.id = x17.module_result_id
>>
>>  INNER JOIN result_name x19 ON x17.name_id = x19.id
>>
>>  WHERE x19.text IN ('devtools/devbox/devbox_test_scalastyle') AND x13.run_id
>> IN (3769263) AND x14.text = 'Lint-Pr'
>>
>>
>> +--+
>>
>> | QUERY PLAN
>>
>> |
>>
>>
>> |--|
>>
>> | Nested Loop  (cost=993.29..160107.01 rows=1 width=557) (actual
>> time=0.147..0.148 rows=0 loops=1)
>>   |
>>
>> |   Join Filter: (x13.id = x17.module_result_id)
>>
>> |
>>
>> |   ->  Nested Loop  (cost=2.27..18.32 rows=1 width=12) (actual
>> time=0.147..0.147 rows=0 loops=1)
>>   |
>>
>> | ->  Index Scan using result_name_text_key on result_name x14  
>> (cost=1.69..9.71
>> rows=1 width=8) (actual time=0.146..0.147 rows=0 loops=1) |
>>
>> |   Index Cond: (text = 'Lint-Pr'::text)
>>
>> |
>>
>> | ->  Index Scan using module_result_run_id_name_id_idx on
>> module_result x13  (cost=0.57..8.59 rows=1 width=20) (never executed)
>> |
>>
>> |   Index Cond: ((run_id = 3769263) AND (name_id = x14.id))
>>
>> |
>>
>> |   ->  Nested Loop  (cost=991.02..160087.73 rows=77 width=553) (never
>> executed)
>> |
>>
>> | ->  Index Scan using result_name_text_key on result_name x19  
>> (cost=1.69..9.71
>> rows=1 width=549) (never executed)|
>>
>> |   Index Cond: (text =
>> 'devtools/devbox/devbox_test_scalastyle'::text)
>>   |
>>
>> | ->  Bitmap Heap Scan on test_result x17  (cost=989.33..159674.48
>> rows=40354 width=20) (never executed)   |
>>
>> |   Recheck Cond: (name_id = x19.id)
>>
>> |
>>
>> |   ->  Bitmap Index Scan on test_result_name_id_idx  
>> (cost=0.00..979.24
>> rows=40354 width=0) (never executed)  |
>>
>> | Index Cond: (name_id = x19.id)
>>
>> |
>>
>> | Planning Time: 11257.713 ms
>>
>> |
>>
>> | Execution Time: 0.204 ms
>>
>> |
>>
>>
>> +--+
>>
>> EXPLAIN
>>
>> Time: 11.505s (11 seconds), executed in: 11.496s (11 seconds)
>>
>>
>>
>> explain analyze execute test_history_prep2(3769263, 'Lint-Pr',
>> 'devtools/devbox/devbox_test_scalastyle')
>>
>>
>> +--+
>>
>> | QUERY PLAN
>>
>> |
>>
>>
>> |