Ping. FWIW, the SLP test failures it fixes were ICEs rather than code-quality tests, so this is a correctness fix rather than an optimisation.
Thanks, Richard Richard Sandiford <richard.sandif...@linaro.org> writes: > The SVE support for the new CONST_VECTOR encoding needs to be able > to extract the first N bits of the vector and duplicate it. This patch > adds a simplify_subreg rule for that. > > The code is covered by the gcc.target/aarch64/sve_slp_*.c tests. > > Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu. > Also tested by comparing the before and after assembly output for at > least one target per CPU directory. OK to install? > > Richard > > > 2018-01-04 Richard Sandiford <richard.sandif...@linaro.org> > > gcc/ > * simplify-rtx.c (simplify_immed_subreg): Add an inner_bytes > parameter and use it instead of GET_MODE_SIZE (innermode). Use > inner_bytes * BITS_PER_UNIT instead of GET_MODE_BITSIZE (innermode). > Use CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)) instead of > GET_MODE_NUNITS (innermode). Also add a first_elem parameter. > Change innermode from fixed_mode_size to machine_mode. > (simplify_subreg): Update call accordingly. Handle a constant-sized > subreg of a variable-length CONST_VECTOR. > > Index: gcc/simplify-rtx.c > =================================================================== > --- gcc/simplify-rtx.c 2018-01-03 21:42:44.569646782 +0000 > +++ gcc/simplify-rtx.c 2018-01-04 17:35:25.747473457 +0000 > @@ -5971,13 +5971,16 @@ simplify_ternary_operation (enum rtx_cod > or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or > CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR. > > - Works by unpacking OP into a collection of 8-bit values > + Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit > values > represented as a little-endian array of 'unsigned char', selecting by > BYTE, > - and then repacking them again for OUTERMODE. */ > + and then repacking them again for OUTERMODE. If OP is a CONST_VECTOR, > + FIRST_ELEM is the number of the first element to extract, otherwise > + FIRST_ELEM is ignored. */ > > static rtx > simplify_immed_subreg (fixed_size_mode outermode, rtx op, > - fixed_size_mode innermode, unsigned int byte) > + machine_mode innermode, unsigned int byte, > + unsigned int first_elem, unsigned int inner_bytes) > { > enum { > value_bit = 8, > @@ -6007,13 +6010,13 @@ simplify_immed_subreg (fixed_size_mode o > > /* We support any size mode. */ > max_bitsize = MAX (GET_MODE_BITSIZE (outermode), > - GET_MODE_BITSIZE (innermode)); > + inner_bytes * BITS_PER_UNIT); > > /* Unpack the value. */ > > if (GET_CODE (op) == CONST_VECTOR) > { > - num_elem = GET_MODE_NUNITS (innermode); > + num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)); > elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode); > } > else > @@ -6030,7 +6033,7 @@ simplify_immed_subreg (fixed_size_mode o > { > unsigned char * vp; > rtx el = (GET_CODE (op) == CONST_VECTOR > - ? CONST_VECTOR_ELT (op, elem) > + ? CONST_VECTOR_ELT (op, first_elem + elem) > : op); > > /* Vectors are kept in target memory order. (This is probably > @@ -6157,10 +6160,9 @@ simplify_immed_subreg (fixed_size_mode o > /* Renumber BYTE so that the least-significant byte is byte 0. A special > case is paradoxical SUBREGs, which shouldn't be adjusted since they > will already have offset 0. */ > - if (GET_MODE_SIZE (innermode) >= GET_MODE_SIZE (outermode)) > + if (inner_bytes >= GET_MODE_SIZE (outermode)) > { > - unsigned ibyte = (GET_MODE_SIZE (innermode) - GET_MODE_SIZE (outermode) > - - byte); > + unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte; > unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; > unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; > byte = (subword_byte % UNITS_PER_WORD > @@ -6169,7 +6171,7 @@ simplify_immed_subreg (fixed_size_mode o > > /* BYTE should still be inside OP. (Note that BYTE is unsigned, > so if it's become negative it will instead be very large.) */ > - gcc_assert (byte < GET_MODE_SIZE (innermode)); > + gcc_assert (byte < inner_bytes); > > /* Convert from bytes to chunks of size value_bit. */ > value_start = byte * (BITS_PER_UNIT / value_bit); > @@ -6358,7 +6360,18 @@ simplify_subreg (machine_mode outermode, > if (is_a <fixed_size_mode> (outermode, &fs_outermode) > && is_a <fixed_size_mode> (innermode, &fs_innermode) > && byte.is_constant (&cbyte)) > - return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte); > + return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte, > + 0, GET_MODE_SIZE (fs_innermode)); > + > + /* Handle constant-sized outer modes and variable-sized inner modes. > */ > + unsigned HOST_WIDE_INT first_elem; > + if (GET_CODE (op) == CONST_VECTOR > + && is_a <fixed_size_mode> (outermode, &fs_outermode) > + && constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode), > + &first_elem)) > + return simplify_immed_subreg (fs_outermode, op, innermode, 0, > + first_elem, > + GET_MODE_SIZE (fs_outermode)); > > return NULL_RTX; > }