Hi Andrey
I've been reviewing the patch and ran some concurrent tests against it.
I found an issue with false positive corruption reports under concurrent
VACUUM.

+ slot = table_slot_create(state->heaprel, NULL);
+ found = table_tuple_fetch_row_version(state->heaprel, tid,
+                         SnapshotAny, slot);
+ if (!found)
+ {
+         ExecDropSingleTupleTableSlot(slot);
+         ereport(ERROR,

bt_verify_index_tuple_points_to_heap uses SnapshotAny with
table_tuple_fetch_row_version to distinguish "tuple doesn't exist" from
"tuple exists but is dead". However, bt_index_check only holds
AccessShareLock which compatible with VACUUM's ShareUpdateExclusiveLock.
A concurrent VACUUM Phase 1 can prune heap pages(ItemIdSetDead) between
Bloom filter probe and the heap lookup. Causing
table_tuple_fetch_row_version to return false for a legal
dead-but-now-pruned tuple and a false positive corruption report.

The table_tuple_satisfies_snapshot check does not help, it only runs
when the fetch succeeds(LP_NORMAL). Once VACUUM sets LP_DEAD,
heapam_fetch sees !ItemIdIsNormal(lp) and returns false before any
snapshot checks.

The reproduce should be: DELETE all rows form a big table, then launch
VACUUM and bt_index_check concurrently. A POC test script attached.

# POC: indexallkeysmatch false positive under concurrent VACUUM

use strict;
use warnings FATAL => 'all';
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
use IPC::Run ();

my $node = PostgreSQL::Test::Cluster->new('race');
$node->init;
$node->append_conf('postgresql.conf', q{
autovacuum = off
shared_buffers = 32MB
});
$node->start;

$node->safe_psql('postgres', 'CREATE EXTENSION amcheck');

my $port = $node->port;
my $host = $node->host;

# 1M rows x ~230 bytes => ~30K heap pages => VACUUM Phase 1 takes real time
$node->safe_psql('postgres', q{
    CREATE TABLE race(id int, pad text);
    INSERT INTO race SELECT i, repeat('x', 200)
        FROM generate_series(1, 1000000) i;
    CREATE INDEX race_idx ON race(id);
    ANALYZE race;
});

my $false_positive = 0;

for my $attempt (1..8)
{
    # Delete all rows. After commit, tuples are LP_NORMAL with xmax set.
    $node->safe_psql('postgres', 'DELETE FROM race');

    # Start both VACUUM and bt_index_check
    my ($v_out, $v_err, $c_out, $c_err) = ('', '', '', '');

    my $vacuum_h = IPC::Run::start(
        ['psql', '-X', '-h', $host, '-p', $port,
         '-d', 'postgres', '-c', 'VACUUM race'],
        \my $v_in, \$v_out, \$v_err);

    my $check_h = IPC::Run::start(
        ['psql', '-X', '-h', $host, '-p', $port,
         '-d', 'postgres', '-c',
         "SELECT bt_index_check('race_idx', false, false, true)"],
        \my $c_in, \$c_out, \$c_err);

    $check_h->finish;
    $vacuum_h->finish;

    if ($c_err =~ /points to non-existent heap tuple/)
    {
        $false_positive = 1;
        diag("Race triggered on attempt $attempt");
        last;
    }

    diag("Attempt $attempt: no race (check_err='$c_err')");

    # Restore data for next round
    $node->safe_psql('postgres', q{
        TRUNCATE race;
        INSERT INTO race SELECT i, repeat('x', 200)
            FROM generate_series(1, 1000000) i;
        REINDEX INDEX race_idx;
    });
}

ok($false_positive,
   'FALSE POSITIVE: indexallkeysmatch reports corruption during concurrent VACUUM');

$node->stop;
done_testing();

Reply via email to