The bswap pass deals with 3 possibly different byte size: host, target and the size a byte marker occupied in the symbolic_number structure [1]. However, as of now the code mixes the three size. This works in practice as the pass is only enabled for target with BITS_PER_UNIT == 8 and nobody runs GCC on a host with CHAR_BIT != 8. As prompted by Jakub Jelinek, this patch fixes this mess. Byte marker are 8-bit quantities (they could be made 4-bit quantities but I prefered to keep the code working the same as before) for which a new macro is introduced (BITS_PER_MARKERS), anything related to storing the value or a byte marker in a variable should check for the host byte size or wide integer size and anything aimed at manipulating the target value should check for BITS_PER_UNIT.
[1] Although the comment for this structure implies that a byte marker as the same size as the host byte, the way it is used in the code (even before any of my patch) shows that it uses a fixed size of 8 [2]. [2] Note that since the pass is only active for targets with BITS_PER_UNIT == 8, it might be using the target byte size. gcc/ChangeLog: 2014-07-04 Thomas Preud'homme <thomas.preudho...@arm.com> * tree-ssa-math-opts.c (struct symbolic_number): Clarify comment about the size of byte markers. (do_shift_rotate): Fix confusion between host, target and marker byte size. (verify_symbolic_number_p): Likewise. (find_bswap_or_nop_1): Likewise. (find_bswap_or_nop): Likewise. diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index ca2b30d..55c5df7 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -1602,11 +1602,10 @@ make_pass_cse_sincos (gcc::context *ctxt) /* A symbolic number is used to detect byte permutation and selection patterns. Therefore the field N contains an artificial number - consisting of byte size markers: + consisting of octet sized markers: - 0 - byte has the value 0 - 1..size - byte contains the content of the byte - number indexed with that value minus one. + 0 - target byte has the value 0 + 1..size - marker value is the target byte index minus one. To detect permutations on memory sources (arrays and structures), a symbolic number is also associated a base address (the array or structure the load is @@ -1631,6 +1630,8 @@ struct symbolic_number { unsigned HOST_WIDE_INT range; }; +#define BITS_PER_MARKER 8 + /* The number which the find_bswap_or_nop_1 result should match in order to have a nop. The number is masked according to the size of the symbolic number before using it. */ @@ -1652,15 +1653,16 @@ do_shift_rotate (enum tree_code code, struct symbolic_number *n, int count) { - int bitsize = TYPE_PRECISION (n->type); + int size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; - if (count % 8 != 0) + if (count % BITS_PER_UNIT != 0) return false; + count = (count / BITS_PER_UNIT) * BITS_PER_MARKER; /* Zero out the extra bits of N in order to avoid them being shifted into the significant bits. */ - if (bitsize < 8 * (int)sizeof (int64_t)) - n->n &= ((uint64_t)1 << bitsize) - 1; + if (size < 64 / BITS_PER_MARKER) + n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; switch (code) { @@ -1670,22 +1672,22 @@ do_shift_rotate (enum tree_code code, case RSHIFT_EXPR: /* Arithmetic shift of signed type: result is dependent on the value. */ if (!TYPE_UNSIGNED (n->type) - && (n->n & ((uint64_t) 0xff << (bitsize - 8)))) + && (n->n & ((uint64_t) 0xff << ((size - 1) * BITS_PER_MARKER)))) return false; n->n >>= count; break; case LROTATE_EXPR: - n->n = (n->n << count) | (n->n >> (bitsize - count)); + n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count)); break; case RROTATE_EXPR: - n->n = (n->n >> count) | (n->n << (bitsize - count)); + n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count)); break; default: return false; } /* Zero unused bits for size. */ - if (bitsize < 8 * (int)sizeof (int64_t)) - n->n &= ((uint64_t)1 << bitsize) - 1; + if (size < 64 / BITS_PER_MARKER) + n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; return true; } @@ -1726,13 +1728,13 @@ init_symbolic_number (struct symbolic_number *n, tree src) if (size % BITS_PER_UNIT != 0) return false; size /= BITS_PER_UNIT; - if (size > (int)sizeof (uint64_t)) + if (size > 64 / BITS_PER_MARKER) return false; n->range = size; n->n = CMPNOP; - if (size < (int)sizeof (int64_t)) - n->n &= ((uint64_t)1 << (size * BITS_PER_UNIT)) - 1; + if (size < 64 / BITS_PER_MARKER) + n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; return true; } @@ -1870,15 +1872,17 @@ find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit) case BIT_AND_EXPR: { int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; - uint64_t val = int_cst_value (rhs2); - uint64_t tmp = val; + uint64_t val = int_cst_value (rhs2), mask = 0; + uint64_t tmp = (1 << BITS_PER_UNIT) - 1; /* Only constants masking full bytes are allowed. */ - for (i = 0; i < size; i++, tmp >>= BITS_PER_UNIT) - if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff) + for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT) + if ((val & tmp) != 0 && (val & tmp) != tmp) return NULL; + else if (val & tmp) + mask |= (uint64_t) 0xff << (i * BITS_PER_MARKER); - n->n &= val; + n->n &= mask; } break; case LSHIFT_EXPR: @@ -1897,25 +1901,27 @@ find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit) type_size = TYPE_PRECISION (type); if (type_size % BITS_PER_UNIT != 0) return NULL; - if (type_size > (int)sizeof (uint64_t) * 8) + type_size /= BITS_PER_UNIT; + if (type_size > 64 / BITS_PER_MARKER) return NULL; /* Sign extension: result is dependent on the value. */ - old_type_size = TYPE_PRECISION (n->type); + old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size - && n->n & ((uint64_t) 0xff << (old_type_size - 8))) + && n->n & ((uint64_t) 0xff << ((old_type_size - 1) + * BITS_PER_MARKER))) return NULL; - if (type_size / BITS_PER_UNIT < (int)(sizeof (int64_t))) + if (type_size < 64 / BITS_PER_MARKER) { /* If STMT casts to a smaller type mask out the bits not belonging to the target type. */ - n->n &= ((uint64_t)1 << type_size) - 1; + n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1; } n->type = type; if (!n->base_addr) - n->range = type_size / BITS_PER_UNIT; + n->range = type_size; } break; default: @@ -1965,7 +1971,6 @@ find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit) != gimple_assign_rhs1 (source_stmt2)) { int64_t inc, mask; - unsigned i; HOST_WIDE_INT off_sub; struct symbolic_number *n_ptr; @@ -1989,21 +1994,23 @@ find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit) off_sub = n2.bytepos - n1.bytepos; - /* Check that the range of memory covered < biggest int size. */ - if (off_sub + n2.range > (int) sizeof (int64_t)) + /* Check that the range of memory covered can be represented by + a symbolic number. */ + if (off_sub + n2.range > 64 / BITS_PER_MARKER) return NULL; n->range = n2.range + off_sub; /* Reinterpret byte marks in symbolic number holding the value of bigger weight according to target endianness. */ inc = BYTES_BIG_ENDIAN ? off_sub + n2.range - n1.range : off_sub; - mask = 0xFF; + size = TYPE_PRECISION (n1.type) / BITS_PER_UNIT; + mask = 0xff; if (BYTES_BIG_ENDIAN) n_ptr = &n1; else n_ptr = &n2; - for (i = 0; i < sizeof (int64_t); i++, inc <<= 8, - mask <<= 8) + for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER, + mask <<= BITS_PER_MARKER) { if (n_ptr->n & mask) n_ptr->n += inc; @@ -2023,7 +2030,7 @@ find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit) n->bytepos = n1.bytepos; n->type = n1.type; size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; - for (i = 0, mask = 0xff; i < size; i++, mask <<= BITS_PER_UNIT) + for (i = 0, mask = 0xff; i < size; i++, mask <<= BITS_PER_MARKER) { uint64_t masked1, masked2; @@ -2084,17 +2091,17 @@ find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap) int rsize; uint64_t tmpn; - for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_UNIT, rsize++); + for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++); n->range = rsize; } /* Zero out the extra bits of N and CMP*. */ - if (n->range < (int)sizeof (int64_t)) + if (n->range < (int) sizeof (int64_t)) { uint64_t mask; - mask = ((uint64_t)1 << (n->range * BITS_PER_UNIT)) - 1; - cmpxchg >>= (sizeof (int64_t) - n->range) * BITS_PER_UNIT; + mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1; + cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER; cmpnop &= mask; } Ok for trunk? Best regards, Thomas