Package: glusterfs-server Version: 3.2.7-3 Severity: grave Tags: upstream fixed-upstream patch Control: fixed -1 3.4.0-4
Hi Patrick Note: I have choosen the severity as 'grave' as it makes GlusterFS not usable under Debian Wheezy when GlusterFS used on ext4 bricks. I'm also Cc'ing Jonathan Nieder and Ben Hutchings (at least to have both informed). Linux Kernel package linux/3.2.46-1 introduced the following change: [ Jonathan Nieder ] * ext3,ext4,nfsd: dir_index: Return 64-bit readdir cookies for NFSv3 and 4 (Closes: #685407) These are the patches: # 64-bit NFS readdir cookies on ext3/ext4 with dir_index bugfix/all/fs-add-new-FMODE-flags-FMODE_32bithash-and-FMODE_64b.patch bugfix/all/ext4-return-32-64-bit-dir-name-hash-according-to-usa.patch bugfix/all/nfsd-rename-int-access-to-int-may_flags-in-nfsd_open.patch bugfix/all/nfsd-vfs_llseek-with-32-or-64-bit-offsets-hashes.patch bugfix/all/ext3-return-32-64-bit-dir-name-hash-according-to-usa.patch This kernel change and it's consequences for GlusterFS are mentioned in [1], where RedHat hat also this issue which reflects in the bugreport[2]. There is a patch backported to 3.3 which I have applied to test on 3.2.7 in wheezy[3,4,5]. Now after updating the kernel on the servers holding the glusterfs from linux/3.2.41-2+deb7u2 to linux/3.2.46-1+deb7u1 in consequence I cannot list anymore the GlusterFS content; a find over it loops infinitively on getdents system calls and consumes memory. I have used the attached patch to test, but as it's not my knowledge aerea, so needs some review if something is missing. I have tested the following setups: wheezy + glusterfs/3.2.7-3 + linux/3.2.41-2+deb7u2 -> ok wheezy + glusterfs/3.2.7-3 + linux/3.2.46-1+deb7u1 -> broken wheezy + glusterfs/3.2.7-3 + patch + linux/3.2.46-1+deb7u1 -> ok unstable + glusterfs/3.4.0-4 + linux/3.10.7-1 -> ok So right now with current given kernel in Wheezy GlusterFS on ext4 bricks is broken, so needs an update trough a stable proposed update if possible. [1] http://lwn.net/Articles/544298/ [2] https://bugzilla.redhat.com/show_bug.cgi?id=838784 [3] https://bugzilla.redhat.com/show_bug.cgi?id=838784#c24 [4] http://review.gluster.org/#/c/4822/ [5] http://review.gluster.org/#/c/4822/2/xlators/cluster/dht/src/dht-helper.c,unified Regards, Salvatore
--- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -49,6 +40,43 @@ } +static uint64_t +dht_bits_for (uint64_t num) +{ + uint64_t bits = 0, ctrl = 1; + + while (ctrl < num) { + ctrl *= 2; + bits ++; + } + + return bits; +} + +/* + * A slightly "updated" version of the algorithm described in the commit log + * is used here. + * + * The only enhancement is that: + * + * - The number of bits used by the backend filesystem for HUGE d_off which + * is described as 63, and + * - The number of bits used by the d_off presented by the transformation + * upwards which is described as 64, are both made "configurable." + */ + + +#define BACKEND_D_OFF_BITS 63 +#define PRESENT_D_OFF_BITS 63 + +#define ONE 1ULL +#define MASK (~0ULL) +#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS)) +#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS)) + +#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1)) +#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1))) + int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p) { @@ -56,6 +84,9 @@ int cnt = 0; int max = 0; uint64_t y = 0; + uint64_t hi_mask = 0; + uint64_t off_mask = 0; + int max_bits = 0; if (x == ((uint64_t) -1)) { y = (uint64_t) -1; @@ -69,7 +100,23 @@ max = conf->subvolume_cnt; cnt = dht_subvol_cnt (this, subvol); - y = ((x * max) + cnt); + if (max == 1) { + y = x; + goto out; + } + + max_bits = dht_bits_for (max); + + hi_mask = ~(PRESENT_MASK >> (max_bits + 1)); + + if (x & hi_mask) { + /* HUGE d_off */ + off_mask = MASK << max_bits; + y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt; + } else { + /* small d_off */ + y = ((x * max) + cnt); + } out: if (y_p) @@ -147,16 +193,38 @@ int max = 0; uint64_t x = 0; xlator_t *subvol = 0; + int max_bits = 0; + uint64_t off_mask = 0; + uint64_t host_mask = 0; if (!this->private) - goto out; + return -1; conf = this->private; max = conf->subvolume_cnt; - cnt = y % max; - x = y / max; + if (max == 1) { + x = y; + cnt = 0; + goto out; + } + + if (y & TOP_BIT) { + /* HUGE d_off */ + max_bits = dht_bits_for (max); + off_mask = (MASK << max_bits); + host_mask = ~(off_mask); + + x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS; + + cnt = y & host_mask; + } else { + /* small d_off */ + cnt = y % max; + x = y / max; + } +out: subvol = conf->subvolumes[cnt]; if (subvol_p) @@ -165,7 +233,6 @@ if (x_p) *x_p = x; -out: return 0; }
signature.asc
Description: Digital signature