Ping.

FWIW, the SLP test failures it fixes were ICEs rather than code-quality
tests, so this is a correctness fix rather than an optimisation.

Thanks,
Richard

Richard Sandiford <richard.sandif...@linaro.org> writes:
> The SVE support for the new CONST_VECTOR encoding needs to be able
> to extract the first N bits of the vector and duplicate it.  This patch
> adds a simplify_subreg rule for that.
>
> The code is covered by the gcc.target/aarch64/sve_slp_*.c tests.
>
> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
> Also tested by comparing the before and after assembly output for at
> least one target per CPU directory.  OK to install?
>
> Richard
>
>
> 2018-01-04  Richard Sandiford  <richard.sandif...@linaro.org>
>
> gcc/
>       * simplify-rtx.c (simplify_immed_subreg): Add an inner_bytes
>       parameter and use it instead of GET_MODE_SIZE (innermode).  Use
>       inner_bytes * BITS_PER_UNIT instead of GET_MODE_BITSIZE (innermode).
>       Use CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)) instead of
>       GET_MODE_NUNITS (innermode).  Also add a first_elem parameter.
>       Change innermode from fixed_mode_size to machine_mode.
>       (simplify_subreg): Update call accordingly.  Handle a constant-sized
>       subreg of a variable-length CONST_VECTOR.
>
> Index: gcc/simplify-rtx.c
> ===================================================================
> --- gcc/simplify-rtx.c        2018-01-03 21:42:44.569646782 +0000
> +++ gcc/simplify-rtx.c        2018-01-04 17:35:25.747473457 +0000
> @@ -5971,13 +5971,16 @@ simplify_ternary_operation (enum rtx_cod
>     or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or
>     CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR.
>  
> -   Works by unpacking OP into a collection of 8-bit values
> +   Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit 
> values
>     represented as a little-endian array of 'unsigned char', selecting by 
> BYTE,
> -   and then repacking them again for OUTERMODE.  */
> +   and then repacking them again for OUTERMODE.  If OP is a CONST_VECTOR,
> +   FIRST_ELEM is the number of the first element to extract, otherwise
> +   FIRST_ELEM is ignored.  */
>  
>  static rtx
>  simplify_immed_subreg (fixed_size_mode outermode, rtx op,
> -                    fixed_size_mode innermode, unsigned int byte)
> +                    machine_mode innermode, unsigned int byte,
> +                    unsigned int first_elem, unsigned int inner_bytes)
>  {
>    enum {
>      value_bit = 8,
> @@ -6007,13 +6010,13 @@ simplify_immed_subreg (fixed_size_mode o
>  
>    /* We support any size mode.  */
>    max_bitsize = MAX (GET_MODE_BITSIZE (outermode),
> -                  GET_MODE_BITSIZE (innermode));
> +                  inner_bytes * BITS_PER_UNIT);
>  
>    /* Unpack the value.  */
>  
>    if (GET_CODE (op) == CONST_VECTOR)
>      {
> -      num_elem = GET_MODE_NUNITS (innermode);
> +      num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode));
>        elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode);
>      }
>    else
> @@ -6030,7 +6033,7 @@ simplify_immed_subreg (fixed_size_mode o
>      {
>        unsigned char * vp;
>        rtx el = (GET_CODE (op) == CONST_VECTOR
> -             ? CONST_VECTOR_ELT (op, elem)
> +             ? CONST_VECTOR_ELT (op, first_elem + elem)
>               : op);
>  
>        /* Vectors are kept in target memory order.  (This is probably
> @@ -6157,10 +6160,9 @@ simplify_immed_subreg (fixed_size_mode o
>    /* Renumber BYTE so that the least-significant byte is byte 0.  A special
>       case is paradoxical SUBREGs, which shouldn't be adjusted since they
>       will already have offset 0.  */
> -  if (GET_MODE_SIZE (innermode) >= GET_MODE_SIZE (outermode))
> +  if (inner_bytes >= GET_MODE_SIZE (outermode))
>      {
> -      unsigned ibyte = (GET_MODE_SIZE (innermode) - GET_MODE_SIZE (outermode)
> -                     - byte);
> +      unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte;
>        unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
>        unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
>        byte = (subword_byte % UNITS_PER_WORD
> @@ -6169,7 +6171,7 @@ simplify_immed_subreg (fixed_size_mode o
>  
>    /* BYTE should still be inside OP.  (Note that BYTE is unsigned,
>       so if it's become negative it will instead be very large.)  */
> -  gcc_assert (byte < GET_MODE_SIZE (innermode));
> +  gcc_assert (byte < inner_bytes);
>  
>    /* Convert from bytes to chunks of size value_bit.  */
>    value_start = byte * (BITS_PER_UNIT / value_bit);
> @@ -6358,7 +6360,18 @@ simplify_subreg (machine_mode outermode,
>        if (is_a <fixed_size_mode> (outermode, &fs_outermode)
>         && is_a <fixed_size_mode> (innermode, &fs_innermode)
>         && byte.is_constant (&cbyte))
> -     return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte);
> +     return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte,
> +                                   0, GET_MODE_SIZE (fs_innermode));
> +
> +      /* Handle constant-sized outer modes and variable-sized inner modes.  
> */
> +      unsigned HOST_WIDE_INT first_elem;
> +      if (GET_CODE (op) == CONST_VECTOR
> +       && is_a <fixed_size_mode> (outermode, &fs_outermode)
> +       && constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode),
> +                               &first_elem))
> +     return simplify_immed_subreg (fs_outermode, op, innermode, 0,
> +                                   first_elem,
> +                                   GET_MODE_SIZE (fs_outermode));
>  
>        return NULL_RTX;
>      }

Reply via email to