Possible hang in 10.1 with JSON query over partially indexed partitions

2018-01-17 Thread Paul Jones

Version 10.1, Community version from PGDG repo
OS  RHEL 7.3

I may have discovered a situation in 10.1 where EXECUTEing a PREPARED
statement acting on JSON data in partitioned tables hangs in an
infinite loop for a particular set of data.  Unfortunately, the data is
proprietary, so I did the best I could below to describe what happened.

1) Partitioning is done with an (I hope) IMMUTABLE FUNC
2) Each partition has a partial index on IP address
3) The query looks for logins within a certain small time frame
   from the same IP

I compiled a debug version and took 3 backtraces.  When I first discovered
this, I allowed it to sit for 10-15 minutes before interrupting it.
Similar queries for different addresses return within seconds.

 Setup --

CREATE FUNCTION sesstime(tin JSONB)
RETURNS DATE
AS $$
SELECT (substring(tin->>'timestamp' from 1 for 8))::DATE;
$$ LANGUAGE SQL IMMUTABLE;

DROP TABLE sessparts;
CREATE TABLE sessparts
(
session JSONB
) PARTITION BY RANGE (sesstime(session));

CREATE TABLE sessparts_20171116
PARTITION OF sessparts
FOR VALUES FROM ('2017-11-16') TO ('2017-11-17');

CREATE TABLE sessparts_20171117
PARTITION OF sessparts
FOR VALUES FROM ('2017-11-17') TO ('2017-11-18');


.OTHER PARTITIONS ELIDED FOR BREVITY.


CREATE INDEX ON sessparts_20171116 (CAST(session->>'requestIP' AS INET))
WHERE session->>'requestIP' != ''
AND session->>'requestIP' != '0';

CREATE INDEX ON sessparts_20171117 (CAST(session->>'requestIP' AS INET))
WHERE session->>'requestIP' != ''
AND session->>'requestIP' != '0';


.OTHER PARTITIONS ELIDED FOR BREVITY.


PREPARE fanalq1(INET, TIMESTAMPTZ, INTERVAL) AS
SELECT inetad, aid, count(aid), stddev(td)
FROM (
SELECT $1, aid,
lid,
tstmp,
EXTRACT(EPOCH FROM (tstmp - lag(tstmp) OVER (ORDER BY tstmp)))
FROM (
SELECT session->>'authID' AUTHID,
session->>'loginID' LOGINID,
to_timestamp(session->>'timestamp', 'MMDDHH24MISS') 
tt
FROM sessparts
WHERE sesstime(session) >= ($2 - $3)::DATE
AND sesstime(session) <= $2::DATE
AND to_timestamp(session->>'timestamp', 
'MMDDHH24MISS')
<@ tstzrange($2 - $3, 
$2)
AND session->>'requestIP' != ''
AND session->>'requestIP' != '0'
AND (session->>'requestIP')::INET = $1
AND session->>'isAuthenticationSuccessful' = 'false'
) attempts(aid, lid, tstmp)
) tdiffs(inetad, aid, lid, ts, td)
GROUP BY aid, 1;

 Query --

sessions=# explain EXECUTE fanalq1('206.108.41.102', '2017-11-17 16:23:31-05', 
'60 seconds');



QUERY PLAN  

 
---
 GroupAggregate  (cost=1175004.52..1175004.72 rows=8 width=80)
   Group Key: tdiffs.aid, tdiffs.inetad
   ->  Sort  (cost=1175004.52..1175004.54 rows=8 width=72)
 Sort Key: tdiffs.aid, tdiffs.inetad
 ->  Subquery Scan on tdiffs  (cost=1175004.00..1175004.40 rows=8 
width=72)
   ->  WindowAgg  (cost=1175004.00..1175004.32 rows=8 width=112)
 ->  Sort  (cost=1175004.00..1175004.02 rows=8 width=1400)
   Sort Key: (to_timestamp((sessparts_20171110.session 
->> 'timestamp'::text), 'MMDDHH24MISS'::text))
   ->  Result  (cost=74.23..1175003.88 rows=8 
width=1400)
 ->  Append  (cost=74.23..1175003.76 rows=8 
width=1392)
   ->  Bitmap Heap Scan

Possible hang in 10.1 with JSON query over partially indexed partitions

2018-01-18 Thread Paul Jones
Version   10.1, Community version from PGDG repo
OSRHEL 7.3

I may have discovered a situation in 10.1 where EXECUTEing a PREPARED
statement acting on JSON data in partitioned tables hangs in an
infinite loop for a particular set of data.  Unfortunately, the data is
proprietary, so I did the best I could below to describe what happened.

1) Partitioning is done with an (I hope) IMMUTABLE FUNC
2) Each partition has a partial index on IP address
3) The query looks for logins within a certain small time frame
   from the same IP

I compiled a debug version and took 3 backtraces.  When I first discovered
this, I allowed it to sit for 10-15 minutes before interrupting it.
Similar queries for different addresses return within seconds.

 Setup --

CREATE FUNCTION sesstime(tin JSONB)
RETURNS DATE
AS $$
SELECT (substring(tin->>'timestamp' from 1 for 8))::DATE;
$$ LANGUAGE SQL IMMUTABLE;

DROP TABLE sessparts;
CREATE TABLE sessparts
(
sessionJSONB
) PARTITION BY RANGE (sesstime(session));

CREATE TABLE sessparts_20171116
PARTITION OF sessparts
FOR VALUES FROM ('2017-11-16') TO ('2017-11-17');

CREATE TABLE sessparts_20171117
PARTITION OF sessparts
FOR VALUES FROM ('2017-11-17') TO ('2017-11-18');


.OTHER PARTITIONS ELIDED FOR BREVITY.


CREATE INDEX ON sessparts_20171116 (CAST(session->>'requestIP' AS INET))
WHERE session->>'requestIP' != ''
AND session->>'requestIP' != '0';

CREATE INDEX ON sessparts_20171117 (CAST(session->>'requestIP' AS INET))
WHERE session->>'requestIP' != ''
AND session->>'requestIP' != '0';


.OTHER PARTITIONS ELIDED FOR BREVITY.


PREPARE fanalq1(INET, TIMESTAMPTZ, INTERVAL) AS
SELECT inetad, aid, count(aid), stddev(td)
FROM (
SELECT $1, aid,
lid,
tstmp,
EXTRACT(EPOCH FROM (tstmp - lag(tstmp) OVER (ORDER BY tstmp)))
FROM (
SELECT session->>'authID' AUTHID,
session->>'loginID' LOGINID,
to_timestamp(session->>'timestamp', 'MMDDHH24MISS') tt
FROM sessparts
WHERE sesstime(session) >= ($2 - $3)::DATE
AND sesstime(session) <= $2::DATE
AND to_timestamp(session->>'timestamp', 'MMDDHH24MISS')
<@ tstzrange($2 - $3, $2)
AND session->>'requestIP' != ''
AND session->>'requestIP' != '0'
AND (session->>'requestIP')::INET = $1
AND session->>'isAuthenticationSuccessful' = 'false'
) attempts(aid, lid, tstmp)
) tdiffs(inetad, aid, lid, ts, td)
GROUP BY aid, 1;

 Query --

sessions=# explain EXECUTE fanalq1('206.108.41.102', '2017-11-17 16:23:31-05', 
'60 seconds');



QUERY PLAN  

 
---
 GroupAggregate  (cost=1175004.52..1175004.72 rows=8 width=80)
   Group Key: tdiffs.aid, tdiffs.inetad
   ->  Sort  (cost=1175004.52..1175004.54 rows=8 width=72)
 Sort Key: tdiffs.aid, tdiffs.inetad
 ->  Subquery Scan on tdiffs  (cost=1175004.00..1175004.40 rows=8 
width=72)
   ->  WindowAgg  (cost=1175004.00..1175004.32 rows=8 width=112)
 ->  Sort  (cost=1175004.00..1175004.02 rows=8 width=1400)
   Sort Key: (to_timestamp((sessparts_20171110.session 
->> 'timestamp'::text), 'MMDDHH24MISS'::text))
   ->  Result  (cost=74.23..1175003.88 rows=8 
width=1400)
 ->  Append  (cost=74.23..1175003.76 rows=8 
width=1392)
   ->  Bitmap Heap Scan on 
sessparts_20171110  (cost=74.23..16077.23 rows=1 width=1380)
 Recheck Cond: session ->> 
'requestIP'::text))::inet = '206.108.41.102'::inet) AND ((session ->> 
'requestIP'::text) <> ''::text) AND ((sessio