Hi Andrey
I've been reviewing the patch and ran some concurrent tests against it.
I found an issue with false positive corruption reports under concurrent
VACUUM.
+ slot = table_slot_create(state->heaprel, NULL);
+ found = table_tuple_fetch_row_version(state->heaprel, tid,
+ SnapshotAny, slot);
+ if (!found)
+ {
+ ExecDropSingleTupleTableSlot(slot);
+ ereport(ERROR,
bt_verify_index_tuple_points_to_heap uses SnapshotAny with
table_tuple_fetch_row_version to distinguish "tuple doesn't exist" from
"tuple exists but is dead". However, bt_index_check only holds
AccessShareLock which compatible with VACUUM's ShareUpdateExclusiveLock.
A concurrent VACUUM Phase 1 can prune heap pages(ItemIdSetDead) between
Bloom filter probe and the heap lookup. Causing
table_tuple_fetch_row_version to return false for a legal
dead-but-now-pruned tuple and a false positive corruption report.
The table_tuple_satisfies_snapshot check does not help, it only runs
when the fetch succeeds(LP_NORMAL). Once VACUUM sets LP_DEAD,
heapam_fetch sees !ItemIdIsNormal(lp) and returns false before any
snapshot checks.
The reproduce should be: DELETE all rows form a big table, then launch
VACUUM and bt_index_check concurrently. A POC test script attached.
# POC: indexallkeysmatch false positive under concurrent VACUUM
use strict;
use warnings FATAL => 'all';
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
use IPC::Run ();
my $node = PostgreSQL::Test::Cluster->new('race');
$node->init;
$node->append_conf('postgresql.conf', q{
autovacuum = off
shared_buffers = 32MB
});
$node->start;
$node->safe_psql('postgres', 'CREATE EXTENSION amcheck');
my $port = $node->port;
my $host = $node->host;
# 1M rows x ~230 bytes => ~30K heap pages => VACUUM Phase 1 takes real time
$node->safe_psql('postgres', q{
CREATE TABLE race(id int, pad text);
INSERT INTO race SELECT i, repeat('x', 200)
FROM generate_series(1, 1000000) i;
CREATE INDEX race_idx ON race(id);
ANALYZE race;
});
my $false_positive = 0;
for my $attempt (1..8)
{
# Delete all rows. After commit, tuples are LP_NORMAL with xmax set.
$node->safe_psql('postgres', 'DELETE FROM race');
# Start both VACUUM and bt_index_check
my ($v_out, $v_err, $c_out, $c_err) = ('', '', '', '');
my $vacuum_h = IPC::Run::start(
['psql', '-X', '-h', $host, '-p', $port,
'-d', 'postgres', '-c', 'VACUUM race'],
\my $v_in, \$v_out, \$v_err);
my $check_h = IPC::Run::start(
['psql', '-X', '-h', $host, '-p', $port,
'-d', 'postgres', '-c',
"SELECT bt_index_check('race_idx', false, false, true)"],
\my $c_in, \$c_out, \$c_err);
$check_h->finish;
$vacuum_h->finish;
if ($c_err =~ /points to non-existent heap tuple/)
{
$false_positive = 1;
diag("Race triggered on attempt $attempt");
last;
}
diag("Attempt $attempt: no race (check_err='$c_err')");
# Restore data for next round
$node->safe_psql('postgres', q{
TRUNCATE race;
INSERT INTO race SELECT i, repeat('x', 200)
FROM generate_series(1, 1000000) i;
REINDEX INDEX race_idx;
});
}
ok($false_positive,
'FALSE POSITIVE: indexallkeysmatch reports corruption during concurrent VACUUM');
$node->stop;
done_testing();