Jeff Law <[email protected]> writes:
> On 7/12/19 1:44 AM, Richard Sandiford wrote:
>> Richard Sandiford <[email protected]> writes:
>>> This patch rewrites the way simplify_subreg handles constants.
>>> It uses similar native_encode/native_decode routines to the
>>> tree-level handling of VIEW_CONVERT_EXPR, meaning that we can
>>> move between rtx constants and the target memory image of them.
>>>
>>> The main point of this patch is to support subregs of constant-length
>>> vectors for VLA vectors, beyond the very simple cases that were already
>>> handled. Many of the new tests failed before the patch for variable-
>>> length vectors.
>>>
>>> The boolean side is tested more by the upcoming SVE ACLE work.
>>>
>>> Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.
>>> OK to install?
>> I made a last-minute change after testing, to use uintNN_t types
>> for target_unit rather than the original unsigned char/short/int.
>> Of course, that doesn't survive a libgcc build since <stdint.h>
>> isn't included there.
>>
>> Fixed below, and posted as tested this time.
>>
>> Richard
>>
>>
>> 2019-07-12 Richard Sandiford <[email protected]>
>>
>> gcc/
>> * defaults.h (TARGET_UNIT): New macro.
>> (target_unit): New type.
>> * rtl.h (native_encode_rtx, native_decode_rtx)
>> (native_decode_vector_rtx, subreg_size_lsb): Declare.
>> (subreg_lsb_1): Turn into an inline wrapper around subreg_size_lsb.
>> * rtlanal.c (subreg_lsb_1): Delete.
>> (subreg_size_lsb): New function.
>> * simplify-rtx.c: Include rtx-vector-builder.h
>> (simplify_immed_subreg): Delete.
>> (native_encode_rtx, native_decode_vector_rtx, native_decode_rtx)
>> (simplify_const_vector_byte_offset, simplify_const_vector_subreg): New
>> functions.
>> (simplify_subreg): Use them.
>> (test_vector_subregs_modes, test_vector_subregs_repeating)
>> (test_vector_subregs_fore_back, test_vector_subregs_stepped)
>> (test_vector_subregs): New functions.
>> (test_vector_ops): Call test_vector_subregs for integer vector
>> modes with at least 2 elements.
> This just turns out to be amazingly painful to work through and I don't
> particularly see any good breakdown which would make it obvious where
> the behavioral changes are vs just refactoring.
>
> Given your long history with GCC and your expertise in RTL as well as
> the SVE space I'm inclined to say go for it and we'll cope with any fallout.
Thanks. Here's what I (very) belatedly applied. Further cross-target
testing showed I needed some tweaks:
(1) Keep:
/* Some ports misuse CCmode. */
if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (op))
return op;
which unfortunately is still needed.
(2) The old version filled the undefined upper bits of a paradoxical
subreg with zeros, but some ports expected it to be sign-extended
for integers.
(3) In the self tests, skip over non-IEEE floating-point modes,
since bitcasting to and from others can drop bits.
Richard
2019-09-19 Richard Sandiford <[email protected]>
gcc/
* defaults.h (TARGET_UNIT): New macro.
(target_unit): New type.
* rtl.h (native_encode_rtx, native_decode_rtx)
(native_decode_vector_rtx, subreg_size_lsb): Declare.
(subreg_lsb_1): Turn into an inline wrapper around subreg_size_lsb.
* rtlanal.c (subreg_lsb_1): Delete.
(subreg_size_lsb): New function.
* simplify-rtx.c: Include rtx-vector-builder.h
(simplify_immed_subreg): Delete.
(native_encode_rtx, native_decode_vector_rtx, native_decode_rtx)
(simplify_const_vector_byte_offset, simplify_const_vector_subreg): New
functions.
(simplify_subreg): Use them.
(test_vector_subregs_modes, test_vector_subregs_repeating)
(test_vector_subregs_fore_back, test_vector_subregs_stepped)
(test_vector_subregs): New functions.
(test_vector_ops): Call test_vector_subregs for integer vector
modes with at least 2 elements.
Index: gcc/defaults.h
===================================================================
*** gcc/defaults.h 2019-07-12 08:53:06.000000000 +0100
--- gcc/defaults.h 2019-09-19 09:56:43.873352025 +0100
*************** #define TARGET_VTABLE_USES_DESCRIPTORS 0
*** 1459,1462 ****
--- 1459,1476 ----
#define DWARF_GNAT_ENCODINGS_DEFAULT DWARF_GNAT_ENCODINGS_GDB
#endif
+ #ifndef USED_FOR_TARGET
+ /* Done this way to keep gengtype happy. */
+ #if BITS_PER_UNIT == 8
+ #define TARGET_UNIT uint8_t
+ #elif BITS_PER_UNIT == 16
+ #define TARGET_UNIT uint16_t
+ #elif BITS_PER_UNIT == 32
+ #define TARGET_UNIT uint32_t
+ #else
+ #error Unknown BITS_PER_UNIT
+ #endif
+ typedef TARGET_UNIT target_unit;
+ #endif
+
#endif /* ! GCC_DEFAULTS_H */
Index: gcc/rtl.h
===================================================================
*** gcc/rtl.h 2019-09-12 10:52:56.000000000 +0100
--- gcc/rtl.h 2019-09-19 09:56:43.877351995 +0100
*************** extern int rtx_cost (rtx, machine_mode,
*** 2406,2417 ****
extern int address_cost (rtx, machine_mode, addr_space_t, bool);
extern void get_full_rtx_cost (rtx, machine_mode, enum rtx_code, int,
struct full_rtx_costs *);
extern poly_uint64 subreg_lsb (const_rtx);
! extern poly_uint64 subreg_lsb_1 (machine_mode, machine_mode, poly_uint64);
extern poly_uint64 subreg_size_offset_from_lsb (poly_uint64, poly_uint64,
poly_uint64);
extern bool read_modify_subreg_p (const_rtx);
/* Return the subreg byte offset for a subreg whose outer mode is
OUTER_MODE, whose inner mode is INNER_MODE, and where there are
LSB_SHIFT *bits* between the lsb of the outer value and the lsb of
--- 2406,2435 ----
extern int address_cost (rtx, machine_mode, addr_space_t, bool);
extern void get_full_rtx_cost (rtx, machine_mode, enum rtx_code, int,
struct full_rtx_costs *);
+ extern bool native_encode_rtx (machine_mode, rtx, vec<target_unit> &,
+ unsigned int, unsigned int);
+ extern rtx native_decode_rtx (machine_mode, vec<target_unit>,
+ unsigned int);
+ extern rtx native_decode_vector_rtx (machine_mode, vec<target_unit>,
+ unsigned int, unsigned int, unsigned int);
extern poly_uint64 subreg_lsb (const_rtx);
! extern poly_uint64 subreg_size_lsb (poly_uint64, poly_uint64, poly_uint64);
extern poly_uint64 subreg_size_offset_from_lsb (poly_uint64, poly_uint64,
poly_uint64);
extern bool read_modify_subreg_p (const_rtx);
+ /* Given a subreg's OUTER_MODE, INNER_MODE, and SUBREG_BYTE, return the
+ bit offset at which the subreg begins (counting from the least significant
+ bit of the operand). */
+
+ inline poly_uint64
+ subreg_lsb_1 (machine_mode outer_mode, machine_mode inner_mode,
+ poly_uint64 subreg_byte)
+ {
+ return subreg_size_lsb (GET_MODE_SIZE (outer_mode),
+ GET_MODE_SIZE (inner_mode), subreg_byte);
+ }
+
/* Return the subreg byte offset for a subreg whose outer mode is
OUTER_MODE, whose inner mode is INNER_MODE, and where there are
LSB_SHIFT *bits* between the lsb of the outer value and the lsb of
Index: gcc/rtlanal.c
===================================================================
*** gcc/rtlanal.c 2019-09-12 10:53:49.029892836 +0100
--- gcc/rtlanal.c 2019-09-19 09:56:43.877351995 +0100
*************** loc_mentioned_in_p (rtx *loc, const_rtx
*** 3637,3659 ****
return 0;
}
! /* Helper function for subreg_lsb. Given a subreg's OUTER_MODE, INNER_MODE,
! and SUBREG_BYTE, return the bit offset where the subreg begins
! (counting from the least significant bit of the operand). */
poly_uint64
! subreg_lsb_1 (machine_mode outer_mode,
! machine_mode inner_mode,
! poly_uint64 subreg_byte)
{
poly_uint64 subreg_end, trailing_bytes, byte_pos;
/* A paradoxical subreg begins at bit position 0. */
! if (paradoxical_subreg_p (outer_mode, inner_mode))
! return 0;
! subreg_end = subreg_byte + GET_MODE_SIZE (outer_mode);
! trailing_bytes = GET_MODE_SIZE (inner_mode) - subreg_end;
if (WORDS_BIG_ENDIAN && BYTES_BIG_ENDIAN)
byte_pos = trailing_bytes;
else if (!WORDS_BIG_ENDIAN && !BYTES_BIG_ENDIAN)
--- 3637,3667 ----
return 0;
}
! /* Reinterpret a subreg as a bit extraction from an integer and return
! the position of the least significant bit of the extracted value.
! In other words, if the extraction were performed as a shift right
! and mask, return the number of bits to shift right.
!
! The outer value of the subreg has OUTER_BYTES bytes and starts at
! byte offset SUBREG_BYTE within an inner value of INNER_BYTES bytes. */
poly_uint64
! subreg_size_lsb (poly_uint64 outer_bytes,
! poly_uint64 inner_bytes,
! poly_uint64 subreg_byte)
{
poly_uint64 subreg_end, trailing_bytes, byte_pos;
/* A paradoxical subreg begins at bit position 0. */
! gcc_checking_assert (ordered_p (outer_bytes, inner_bytes));
! if (maybe_gt (outer_bytes, inner_bytes))
! {
! gcc_checking_assert (known_eq (subreg_byte, 0U));
! return 0;
! }
! subreg_end = subreg_byte + outer_bytes;
! trailing_bytes = inner_bytes - subreg_end;
if (WORDS_BIG_ENDIAN && BYTES_BIG_ENDIAN)
byte_pos = trailing_bytes;
else if (!WORDS_BIG_ENDIAN && !BYTES_BIG_ENDIAN)
Index: gcc/simplify-rtx.c
===================================================================
*** gcc/simplify-rtx.c 2019-09-18 08:37:24.000000000 +0100
--- gcc/simplify-rtx.c 2019-09-19 09:56:43.881351964 +0100
*************** simplify_ternary_operation (enum rtx_cod
*** 6130,6471 ****
return 0;
}
! /* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE
! or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or
! CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR.
!
! Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit
values
! represented as a little-endian array of 'unsigned char', selecting by BYTE,
! and then repacking them again for OUTERMODE. If OP is a CONST_VECTOR,
! FIRST_ELEM is the number of the first element to extract, otherwise
! FIRST_ELEM is ignored. */
!
! static rtx
! simplify_immed_subreg (fixed_size_mode outermode, rtx op,
! machine_mode innermode, unsigned int byte,
! unsigned int first_elem, unsigned int inner_bytes)
{
! enum {
! value_bit = 8,
! value_mask = (1 << value_bit) - 1
! };
! unsigned char value[MAX_BITSIZE_MODE_ANY_MODE / value_bit];
! int value_start;
! int i;
! int elem;
!
! int num_elem;
! rtx * elems;
! int elem_bitsize;
! rtx result_s = NULL;
! rtvec result_v = NULL;
! enum mode_class outer_class;
! scalar_mode outer_submode;
! int max_bitsize;
! /* Some ports misuse CCmode. */
! if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (op))
! return op;
! /* We have no way to represent a complex constant at the rtl level. */
! if (COMPLEX_MODE_P (outermode))
! return NULL_RTX;
! /* We support any size mode. */
! max_bitsize = MAX (GET_MODE_BITSIZE (outermode),
! inner_bytes * BITS_PER_UNIT);
! /* Unpack the value. */
! if (GET_CODE (op) == CONST_VECTOR)
{
! num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode));
! elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode);
}
else
{
! num_elem = 1;
! elem_bitsize = max_bitsize;
}
! /* If this asserts, it is too complicated; reducing value_bit may help. */
! gcc_assert (BITS_PER_UNIT % value_bit == 0);
! /* I don't know how to handle endianness of sub-units. */
! gcc_assert (elem_bitsize % BITS_PER_UNIT == 0);
!
! for (elem = 0; elem < num_elem; elem++)
! {
! unsigned char * vp;
! rtx el = (GET_CODE (op) == CONST_VECTOR
! ? CONST_VECTOR_ELT (op, first_elem + elem)
! : op);
!
! /* Vectors are kept in target memory order. (This is probably
! a mistake.) */
! {
! unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT;
! unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize)
! / BITS_PER_UNIT);
! unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
! unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
! unsigned bytele = (subword_byte % UNITS_PER_WORD
! + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
! vp = value + (bytele * BITS_PER_UNIT) / value_bit;
! }
! switch (GET_CODE (el))
{
! case CONST_INT:
! for (i = 0;
! i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize;
! i += value_bit)
! *vp++ = INTVAL (el) >> i;
! /* CONST_INTs are always logically sign-extended. */
! for (; i < elem_bitsize; i += value_bit)
! *vp++ = INTVAL (el) < 0 ? -1 : 0;
! break;
!
! case CONST_WIDE_INT:
! {
! rtx_mode_t val = rtx_mode_t (el, GET_MODE_INNER (innermode));
! unsigned char extend = wi::sign_mask (val);
! int prec = wi::get_precision (val);
!
! for (i = 0; i < prec && i < elem_bitsize; i += value_bit)
! *vp++ = wi::extract_uhwi (val, i, value_bit);
! for (; i < elem_bitsize; i += value_bit)
! *vp++ = extend;
! }
! break;
! case CONST_DOUBLE:
! if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (el) == VOIDmode)
! {
! unsigned char extend = 0;
! /* If this triggers, someone should have generated a
! CONST_INT instead. */
! gcc_assert (elem_bitsize > HOST_BITS_PER_WIDE_INT);
!
! for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit)
! *vp++ = CONST_DOUBLE_LOW (el) >> i;
! while (i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize)
! {
! *vp++
! = CONST_DOUBLE_HIGH (el) >> (i - HOST_BITS_PER_WIDE_INT);
! i += value_bit;
! }
! if (CONST_DOUBLE_HIGH (el) >> (HOST_BITS_PER_WIDE_INT - 1))
! extend = -1;
! for (; i < elem_bitsize; i += value_bit)
! *vp++ = extend;
! }
! else
! {
! /* This is big enough for anything on the platform. */
! long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32];
! scalar_float_mode el_mode;
!
! el_mode = as_a <scalar_float_mode> (GET_MODE (el));
! int bitsize = GET_MODE_BITSIZE (el_mode);
!
! gcc_assert (bitsize <= elem_bitsize);
! gcc_assert (bitsize % value_bit == 0);
!
! real_to_target (tmp, CONST_DOUBLE_REAL_VALUE (el),
! GET_MODE (el));
!
! /* real_to_target produces its result in words affected by
! FLOAT_WORDS_BIG_ENDIAN. However, we ignore this,
! and use WORDS_BIG_ENDIAN instead; see the documentation
! of SUBREG in rtl.texi. */
! for (i = 0; i < bitsize; i += value_bit)
! {
! int ibase;
! if (WORDS_BIG_ENDIAN)
! ibase = bitsize - 1 - i;
! else
! ibase = i;
! *vp++ = tmp[ibase / 32] >> i % 32;
! }
! /* It shouldn't matter what's done here, so fill it with
! zero. */
! for (; i < elem_bitsize; i += value_bit)
! *vp++ = 0;
! }
! break;
! case CONST_FIXED:
! if (elem_bitsize <= HOST_BITS_PER_WIDE_INT)
! {
! for (i = 0; i < elem_bitsize; i += value_bit)
! *vp++ = CONST_FIXED_VALUE_LOW (el) >> i;
! }
else
! {
! for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit)
! *vp++ = CONST_FIXED_VALUE_LOW (el) >> i;
! for (; i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize;
! i += value_bit)
! *vp++ = CONST_FIXED_VALUE_HIGH (el)
! >> (i - HOST_BITS_PER_WIDE_INT);
! for (; i < elem_bitsize; i += value_bit)
! *vp++ = 0;
! }
! break;
!
! default:
! gcc_unreachable ();
}
}
! /* Now, pick the right byte to start with. */
! /* Renumber BYTE so that the least-significant byte is byte 0. A special
! case is paradoxical SUBREGs, which shouldn't be adjusted since they
! will already have offset 0. */
! if (inner_bytes >= GET_MODE_SIZE (outermode))
{
! unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte;
! unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
! unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
! byte = (subword_byte % UNITS_PER_WORD
! + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
}
! /* BYTE should still be inside OP. (Note that BYTE is unsigned,
! so if it's become negative it will instead be very large.) */
! gcc_assert (byte < inner_bytes);
! /* Convert from bytes to chunks of size value_bit. */
! value_start = byte * (BITS_PER_UNIT / value_bit);
! /* Re-pack the value. */
! num_elem = GET_MODE_NUNITS (outermode);
! if (VECTOR_MODE_P (outermode))
{
! result_v = rtvec_alloc (num_elem);
! elems = &RTVEC_ELT (result_v, 0);
}
! else
! elems = &result_s;
! outer_submode = GET_MODE_INNER (outermode);
! outer_class = GET_MODE_CLASS (outer_submode);
! elem_bitsize = GET_MODE_BITSIZE (outer_submode);
!
! gcc_assert (elem_bitsize % value_bit == 0);
! gcc_assert (elem_bitsize + value_start * value_bit <= max_bitsize);
!
! for (elem = 0; elem < num_elem; elem++)
! {
! unsigned char *vp;
!
! /* Vectors are stored in target memory order. (This is probably
! a mistake.) */
! {
! unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT;
! unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize)
! / BITS_PER_UNIT);
! unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
! unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
! unsigned bytele = (subword_byte % UNITS_PER_WORD
! + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
! vp = value + value_start + (bytele * BITS_PER_UNIT) / value_bit;
! }
! switch (outer_class)
! {
! case MODE_INT:
! case MODE_PARTIAL_INT:
! {
! int u;
! int base = 0;
! int units
! = (GET_MODE_BITSIZE (outer_submode) + HOST_BITS_PER_WIDE_INT - 1)
! / HOST_BITS_PER_WIDE_INT;
! HOST_WIDE_INT tmp[MAX_BITSIZE_MODE_ANY_INT /
HOST_BITS_PER_WIDE_INT];
! wide_int r;
!
! if (GET_MODE_PRECISION (outer_submode) > MAX_BITSIZE_MODE_ANY_INT)
! return NULL_RTX;
! for (u = 0; u < units; u++)
! {
! unsigned HOST_WIDE_INT buf = 0;
! for (i = 0;
! i < HOST_BITS_PER_WIDE_INT && base + i < elem_bitsize;
! i += value_bit)
! buf |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i;
!
! tmp[u] = buf;
! base += HOST_BITS_PER_WIDE_INT;
! }
! r = wide_int::from_array (tmp, units,
! GET_MODE_PRECISION (outer_submode));
! #if TARGET_SUPPORTS_WIDE_INT == 0
! /* Make sure r will fit into CONST_INT or CONST_DOUBLE. */
! if (wi::min_precision (r, SIGNED) > HOST_BITS_PER_DOUBLE_INT)
! return NULL_RTX;
! #endif
! elems[elem] = immed_wide_int_const (r, outer_submode);
! }
! break;
!
! case MODE_FLOAT:
! case MODE_DECIMAL_FLOAT:
! {
! REAL_VALUE_TYPE r;
! long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32] = { 0 };
!
! /* real_from_target wants its input in words affected by
! FLOAT_WORDS_BIG_ENDIAN. However, we ignore this,
! and use WORDS_BIG_ENDIAN instead; see the documentation
! of SUBREG in rtl.texi. */
! for (i = 0; i < elem_bitsize; i += value_bit)
! {
! int ibase;
! if (WORDS_BIG_ENDIAN)
! ibase = elem_bitsize - 1 - i;
! else
! ibase = i;
! tmp[ibase / 32] |= (*vp++ & value_mask) << i % 32;
! }
!
! real_from_target (&r, tmp, outer_submode);
! elems[elem] = const_double_from_real_value (r, outer_submode);
! }
! break;
!
! case MODE_FRACT:
! case MODE_UFRACT:
! case MODE_ACCUM:
! case MODE_UACCUM:
! {
! FIXED_VALUE_TYPE f;
! f.data.low = 0;
! f.data.high = 0;
! f.mode = outer_submode;
!
! for (i = 0;
! i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize;
! i += value_bit)
! f.data.low |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i;
! for (; i < elem_bitsize; i += value_bit)
! f.data.high |= ((unsigned HOST_WIDE_INT)(*vp++ & value_mask)
! << (i - HOST_BITS_PER_WIDE_INT));
!
! elems[elem] = CONST_FIXED_FROM_FIXED_VALUE (f, outer_submode);
! }
! break;
! default:
! gcc_unreachable ();
! }
}
- if (VECTOR_MODE_P (outermode))
- return gen_rtx_CONST_VECTOR (outermode, result_v);
else
! return result_s;
}
/* Simplify SUBREG:OUTERMODE(OP:INNERMODE, BYTE)
--- 6130,6595 ----
return 0;
}
! /* Try to calculate NUM_BYTES bytes of the target memory image of X,
! starting at byte FIRST_BYTE. Return true on success and add the
! bytes to BYTES, such that each byte has BITS_PER_UNIT bits and such
! that the bytes follow target memory order. Leave BYTES unmodified
! on failure.
!
! MODE is the mode of X. The caller must reserve NUM_BYTES bytes in
! BYTES before calling this function. */
!
! bool
! native_encode_rtx (machine_mode mode, rtx x, vec<target_unit> &bytes,
! unsigned int first_byte, unsigned int num_bytes)
{
! /* Check the mode is sensible. */
! gcc_assert (GET_MODE (x) == VOIDmode
! ? is_a <scalar_int_mode> (mode)
! : mode == GET_MODE (x));
! if (GET_CODE (x) == CONST_VECTOR)
! {
! /* CONST_VECTOR_ELT follows target memory order, so no shuffling
! is necessary. The only complication is that MODE_VECTOR_BOOL
! vectors can have several elements per byte. */
! unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
! GET_MODE_NUNITS (mode));
! unsigned int elt = first_byte * BITS_PER_UNIT / elt_bits;
! if (elt_bits < BITS_PER_UNIT)
! {
! /* This is the only case in which elements can be smaller than
! a byte. */
! gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
! for (unsigned int i = 0; i < num_bytes; ++i)
! {
! target_unit value = 0;
! for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits)
! {
! value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & 1) << j;
! elt += 1;
! }
! bytes.quick_push (value);
! }
! return true;
! }
! unsigned int start = bytes.length ();
! unsigned int elt_bytes = GET_MODE_UNIT_SIZE (mode);
! /* Make FIRST_BYTE relative to ELT. */
! first_byte %= elt_bytes;
! while (num_bytes > 0)
! {
! /* Work out how many bytes we want from element ELT. */
! unsigned int chunk_bytes = MIN (num_bytes, elt_bytes - first_byte);
! if (!native_encode_rtx (GET_MODE_INNER (mode),
! CONST_VECTOR_ELT (x, elt), bytes,
! first_byte, chunk_bytes))
! {
! bytes.truncate (start);
! return false;
! }
! elt += 1;
! first_byte = 0;
! num_bytes -= chunk_bytes;
! }
! return true;
! }
! /* All subsequent cases are limited to scalars. */
! scalar_mode smode;
! if (!is_a <scalar_mode> (mode, &smode))
! return false;
!
! /* Make sure that the region is in range. */
! unsigned int end_byte = first_byte + num_bytes;
! unsigned int mode_bytes = GET_MODE_SIZE (smode);
! gcc_assert (end_byte <= mode_bytes);
! if (CONST_SCALAR_INT_P (x))
! {
! /* The target memory layout is affected by both BYTES_BIG_ENDIAN
! and WORDS_BIG_ENDIAN. Use the subreg machinery to get the lsb
! position of each byte. */
! rtx_mode_t value (x, smode);
! wide_int_ref value_wi (value);
! for (unsigned int byte = first_byte; byte < end_byte; ++byte)
! {
! /* Always constant because the inputs are. */
! unsigned int lsb
! = subreg_size_lsb (1, mode_bytes, byte).to_constant ();
! /* Operate directly on the encoding rather than using
! wi::extract_uhwi, so that we preserve the sign or zero
! extension for modes that are not a whole number of bits in
! size. (Zero extension is only used for the combination of
! innermode == BImode && STORE_FLAG_VALUE == 1). */
! unsigned int elt = lsb / HOST_BITS_PER_WIDE_INT;
! unsigned int shift = lsb % HOST_BITS_PER_WIDE_INT;
! unsigned HOST_WIDE_INT uhwi = value_wi.elt (elt);
! bytes.quick_push (uhwi >> shift);
! }
! return true;
! }
! if (CONST_DOUBLE_P (x))
{
! /* real_to_target produces an array of integers in target memory order.
! All integers before the last one have 32 bits; the last one may
! have 32 bits or fewer, depending on whether the mode bitsize
! is divisible by 32. Each of these integers is then laid out
! in target memory as any other integer would be. */
! long el32[MAX_BITSIZE_MODE_ANY_MODE / 32];
! real_to_target (el32, CONST_DOUBLE_REAL_VALUE (x), smode);
!
! /* The (maximum) number of target bytes per element of el32. */
! unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT;
! gcc_assert (bytes_per_el32 != 0);
!
! /* Build up the integers in a similar way to the CONST_SCALAR_INT_P
! handling above. */
! for (unsigned int byte = first_byte; byte < end_byte; ++byte)
! {
! unsigned int index = byte / bytes_per_el32;
! unsigned int subbyte = byte % bytes_per_el32;
! unsigned int int_bytes = MIN (bytes_per_el32,
! mode_bytes - index * bytes_per_el32);
! /* Always constant because the inputs are. */
! unsigned int lsb
! = subreg_size_lsb (1, int_bytes, subbyte).to_constant ();
! bytes.quick_push ((unsigned long) el32[index] >> lsb);
! }
! return true;
! }
!
! if (GET_CODE (x) == CONST_FIXED)
! {
! for (unsigned int byte = first_byte; byte < end_byte; ++byte)
! {
! /* Always constant because the inputs are. */
! unsigned int lsb
! = subreg_size_lsb (1, mode_bytes, byte).to_constant ();
! unsigned HOST_WIDE_INT piece = CONST_FIXED_VALUE_LOW (x);
! if (lsb >= HOST_BITS_PER_WIDE_INT)
! {
! lsb -= HOST_BITS_PER_WIDE_INT;
! piece = CONST_FIXED_VALUE_HIGH (x);
! }
! bytes.quick_push (piece >> lsb);
! }
! return true;
! }
!
! return false;
! }
!
! /* Read a vector of mode MODE from the target memory image given by BYTES,
! starting at byte FIRST_BYTE. The vector is known to be encodable using
! NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each,
! and BYTES is known to have enough bytes to supply NPATTERNS *
! NELTS_PER_PATTERN vector elements. Each element of BYTES contains
! BITS_PER_UNIT bits and the bytes are in target memory order.
!
! Return the vector on success, otherwise return NULL_RTX. */
!
! rtx
! native_decode_vector_rtx (machine_mode mode, vec<target_unit> bytes,
! unsigned int first_byte, unsigned int npatterns,
! unsigned int nelts_per_pattern)
! {
! rtx_vector_builder builder (mode, npatterns, nelts_per_pattern);
!
! unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
! GET_MODE_NUNITS (mode));
! if (elt_bits < BITS_PER_UNIT)
! {
! /* This is the only case in which elements can be smaller than a byte.
! Element 0 is always in the lsb of the containing byte. */
! gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
! for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
! {
! unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits;
! unsigned int byte_index = bit_index / BITS_PER_UNIT;
! unsigned int lsb = bit_index % BITS_PER_UNIT;
! builder.quick_push (bytes[byte_index] & (1 << lsb)
! ? CONST1_RTX (BImode)
! : CONST0_RTX (BImode));
! }
}
else
{
! for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
! {
! rtx x = native_decode_rtx (GET_MODE_INNER (mode), bytes, first_byte);
! if (!x)
! return NULL_RTX;
! builder.quick_push (x);
! first_byte += elt_bits / BITS_PER_UNIT;
! }
}
! return builder.build ();
! }
!
! /* Read an rtx of mode MODE from the target memory image given by BYTES,
! starting at byte FIRST_BYTE. Each element of BYTES contains BITS_PER_UNIT
! bits and the bytes are in target memory order. The image has enough
! values to specify all bytes of MODE.
! Return the rtx on success, otherwise return NULL_RTX. */
!
! rtx
! native_decode_rtx (machine_mode mode, vec<target_unit> bytes,
! unsigned int first_byte)
! {
! if (VECTOR_MODE_P (mode))
! {
! /* If we know at compile time how many elements there are,
! pull each element directly from BYTES. */
! unsigned int nelts;
! if (GET_MODE_NUNITS (mode).is_constant (&nelts))
! return native_decode_vector_rtx (mode, bytes, first_byte, nelts, 1);
! return NULL_RTX;
! }
!
! scalar_int_mode imode;
! if (is_a <scalar_int_mode> (mode, &imode)
! && GET_MODE_PRECISION (imode) <= MAX_BITSIZE_MODE_ANY_INT)
! {
! /* Pull the bytes msb first, so that we can use simple
! shift-and-insert wide_int operations. */
! unsigned int size = GET_MODE_SIZE (imode);
! wide_int result (wi::zero (GET_MODE_PRECISION (imode)));
! for (unsigned int i = 0; i < size; ++i)
{
! unsigned int lsb = (size - i - 1) * BITS_PER_UNIT;
! /* Always constant because the inputs are. */
! unsigned int subbyte
! = subreg_size_offset_from_lsb (1, size, lsb).to_constant ();
! result <<= BITS_PER_UNIT;
! result |= bytes[first_byte + subbyte];
! }
! return immed_wide_int_const (result, imode);
! }
! scalar_float_mode fmode;
! if (is_a <scalar_float_mode> (mode, &fmode))
! {
! /* We need to build an array of integers in target memory order.
! All integers before the last one have 32 bits; the last one may
! have 32 bits or fewer, depending on whether the mode bitsize
! is divisible by 32. */
! long el32[MAX_BITSIZE_MODE_ANY_MODE / 32];
! unsigned int num_el32 = CEIL (GET_MODE_BITSIZE (fmode), 32);
! memset (el32, 0, num_el32 * sizeof (long));
!
! /* The (maximum) number of target bytes per element of el32. */
! unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT;
! gcc_assert (bytes_per_el32 != 0);
! unsigned int mode_bytes = GET_MODE_SIZE (fmode);
! for (unsigned int byte = 0; byte < mode_bytes; ++byte)
! {
! unsigned int index = byte / bytes_per_el32;
! unsigned int subbyte = byte % bytes_per_el32;
! unsigned int int_bytes = MIN (bytes_per_el32,
! mode_bytes - index * bytes_per_el32);
! /* Always constant because the inputs are. */
! unsigned int lsb
! = subreg_size_lsb (1, int_bytes, subbyte).to_constant ();
! el32[index] |= (unsigned long) bytes[first_byte + byte] << lsb;
! }
! REAL_VALUE_TYPE r;
! real_from_target (&r, el32, fmode);
! return const_double_from_real_value (r, fmode);
! }
! if (ALL_SCALAR_FIXED_POINT_MODE_P (mode))
! {
! scalar_mode smode = as_a <scalar_mode> (mode);
! FIXED_VALUE_TYPE f;
! f.data.low = 0;
! f.data.high = 0;
! f.mode = smode;
! unsigned int mode_bytes = GET_MODE_SIZE (smode);
! for (unsigned int byte = 0; byte < mode_bytes; ++byte)
! {
! /* Always constant because the inputs are. */
! unsigned int lsb
! = subreg_size_lsb (1, mode_bytes, byte).to_constant ();
! unsigned HOST_WIDE_INT unit = bytes[first_byte + byte];
! if (lsb >= HOST_BITS_PER_WIDE_INT)
! f.data.high |= unit << (lsb - HOST_BITS_PER_WIDE_INT);
else
! f.data.low |= unit << lsb;
}
+ return CONST_FIXED_FROM_FIXED_VALUE (f, mode);
}
! return NULL_RTX;
! }
!
! /* Simplify a byte offset BYTE into CONST_VECTOR X. The main purpose
! is to convert a runtime BYTE value into a constant one. */
!
! static poly_uint64
! simplify_const_vector_byte_offset (rtx x, poly_uint64 byte)
! {
! /* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */
! machine_mode mode = GET_MODE (x);
! unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
! GET_MODE_NUNITS (mode));
! /* The number of bits needed to encode one element from each pattern. */
! unsigned int sequence_bits = CONST_VECTOR_NPATTERNS (x) * elt_bits;
!
! /* Identify the start point in terms of a sequence number and a byte offset
! within that sequence. */
! poly_uint64 first_sequence;
! unsigned HOST_WIDE_INT subbit;
! if (can_div_trunc_p (byte * BITS_PER_UNIT, sequence_bits,
! &first_sequence, &subbit))
{
! unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
! if (nelts_per_pattern == 1)
! /* This is a duplicated vector, so the value of FIRST_SEQUENCE
! doesn't matter. */
! byte = subbit / BITS_PER_UNIT;
! else if (nelts_per_pattern == 2 && known_gt (first_sequence, 0U))
! {
! /* The subreg drops the first element from each pattern and
! only uses the second element. Find the first sequence
! that starts on a byte boundary. */
! subbit += least_common_multiple (sequence_bits, BITS_PER_UNIT);
! byte = subbit / BITS_PER_UNIT;
! }
}
+ return byte;
+ }
+
+ /* Subroutine of simplify_subreg in which:
! - X is known to be a CONST_VECTOR
! - OUTERMODE is known to be a vector mode
! Try to handle the subreg by operating on the CONST_VECTOR encoding
! rather than on each individual element of the CONST_VECTOR.
! Return the simplified subreg on success, otherwise return NULL_RTX. */
! static rtx
! simplify_const_vector_subreg (machine_mode outermode, rtx x,
! machine_mode innermode, unsigned int first_byte)
! {
! /* Paradoxical subregs of vectors have dubious semantics. */
! if (paradoxical_subreg_p (outermode, innermode))
! return NULL_RTX;
!
! /* We can only preserve the semantics of a stepped pattern if the new
! vector element is the same as the original one. */
! if (CONST_VECTOR_STEPPED_P (x)
! && GET_MODE_INNER (outermode) != GET_MODE_INNER (innermode))
! return NULL_RTX;
!
! /* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */
! unsigned int x_elt_bits
! = vector_element_size (GET_MODE_BITSIZE (innermode),
! GET_MODE_NUNITS (innermode));
! unsigned int out_elt_bits
! = vector_element_size (GET_MODE_BITSIZE (outermode),
! GET_MODE_NUNITS (outermode));
!
! /* The number of bits needed to encode one element from every pattern
! of the original vector. */
! unsigned int x_sequence_bits = CONST_VECTOR_NPATTERNS (x) * x_elt_bits;
!
! /* The number of bits needed to encode one element from every pattern
! of the result. */
! unsigned int out_sequence_bits
! = least_common_multiple (x_sequence_bits, out_elt_bits);
!
! /* Work out the number of interleaved patterns in the output vector
! and the number of encoded elements per pattern. */
! unsigned int out_npatterns = out_sequence_bits / out_elt_bits;
! unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
!
! /* The encoding scheme requires the number of elements to be a multiple
! of the number of patterns, so that each pattern appears at least once
! and so that the same number of elements appear from each pattern. */
! bool ok_p = multiple_p (GET_MODE_NUNITS (outermode), out_npatterns);
! unsigned int const_nunits;
! if (GET_MODE_NUNITS (outermode).is_constant (&const_nunits)
! && (!ok_p || out_npatterns * nelts_per_pattern > const_nunits))
{
! /* Either the encoding is invalid, or applying it would give us
! more elements than we need. Just encode each element directly. */
! out_npatterns = const_nunits;
! nelts_per_pattern = 1;
}
! else if (!ok_p)
! return NULL_RTX;
! /* Get enough bytes of X to form the new encoding. */
! unsigned int buffer_bits = out_npatterns * nelts_per_pattern * out_elt_bits;
! unsigned int buffer_bytes = CEIL (buffer_bits, BITS_PER_UNIT);
! auto_vec<target_unit, 128> buffer (buffer_bytes);
! if (!native_encode_rtx (innermode, x, buffer, first_byte, buffer_bytes))
! return NULL_RTX;
! /* Reencode the bytes as OUTERMODE. */
! return native_decode_vector_rtx (outermode, buffer, 0, out_npatterns,
! nelts_per_pattern);
! }
! /* Try to simplify a subreg of a constant by encoding the subreg region
! as a sequence of target bytes and reading them back in the new mode.
! Return the new value on success, otherwise return null.
!
! The subreg has outer mode OUTERMODE, inner mode INNERMODE, inner value X
! and byte offset FIRST_BYTE. */
!
! static rtx
! simplify_immed_subreg (fixed_size_mode outermode, rtx x,
! machine_mode innermode, unsigned int first_byte)
! {
! unsigned int buffer_bytes = GET_MODE_SIZE (outermode);
! auto_vec<target_unit, 128> buffer (buffer_bytes);
!
! /* Some ports misuse CCmode. */
! if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (x))
! return x;
!
! /* Paradoxical subregs read undefined values for bytes outside of the
! inner value. However, we have traditionally always sign-extended
! integer constants and zero-extended others. */
! unsigned int inner_bytes = buffer_bytes;
! if (paradoxical_subreg_p (outermode, innermode))
! {
! if (!GET_MODE_SIZE (innermode).is_constant (&inner_bytes))
! return NULL_RTX;
!
! target_unit filler = 0;
! if (CONST_SCALAR_INT_P (x) && wi::neg_p (rtx_mode_t (x, innermode)))
! filler = -1;
!
! /* Add any leading bytes due to big-endian layout. The number of
! bytes must be constant because both modes have constant size. */
! unsigned int leading_bytes
! = -byte_lowpart_offset (outermode, innermode).to_constant ();
! for (unsigned int i = 0; i < leading_bytes; ++i)
! buffer.quick_push (filler);
!
! if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes))
! return NULL_RTX;
!
! /* Add any trailing bytes due to little-endian layout. */
! while (buffer.length () < buffer_bytes)
! buffer.quick_push (filler);
}
else
! {
! if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes))
! return NULL_RTX;
! }
! return native_decode_rtx (outermode, buffer, 0);
}
/* Simplify SUBREG:OUTERMODE(OP:INNERMODE, BYTE)
*************** simplify_subreg (machine_mode outermode,
*** 6494,6499 ****
--- 6618,6626 ----
if (outermode == innermode && known_eq (byte, 0U))
return op;
+ if (GET_CODE (op) == CONST_VECTOR)
+ byte = simplify_const_vector_byte_offset (op, byte);
+
if (multiple_p (byte, GET_MODE_UNIT_SIZE (innermode)))
{
rtx elt;
*************** simplify_subreg (machine_mode outermode,
*** 6513,6542 ****
|| CONST_FIXED_P (op)
|| GET_CODE (op) == CONST_VECTOR)
{
- /* simplify_immed_subreg deconstructs OP into bytes and constructs
- the result from bytes, so it only works if the sizes of the modes
- and the value of the offset are known at compile time. Cases that
- that apply to general modes and offsets should be handled here
- before calling simplify_immed_subreg. */
- fixed_size_mode fs_outermode, fs_innermode;
unsigned HOST_WIDE_INT cbyte;
! if (is_a <fixed_size_mode> (outermode, &fs_outermode)
! && is_a <fixed_size_mode> (innermode, &fs_innermode)
! && byte.is_constant (&cbyte))
! return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte,
! 0, GET_MODE_SIZE (fs_innermode));
!
! /* Handle constant-sized outer modes and variable-sized inner modes. */
! unsigned HOST_WIDE_INT first_elem;
! if (GET_CODE (op) == CONST_VECTOR
! && is_a <fixed_size_mode> (outermode, &fs_outermode)
! && constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode),
! &first_elem))
! return simplify_immed_subreg (fs_outermode, op, innermode, 0,
! first_elem,
! GET_MODE_SIZE (fs_outermode));
! return NULL_RTX;
}
/* Changing mode twice with SUBREG => just change it once,
--- 6640,6660 ----
|| CONST_FIXED_P (op)
|| GET_CODE (op) == CONST_VECTOR)
{
unsigned HOST_WIDE_INT cbyte;
! if (byte.is_constant (&cbyte))
! {
! if (GET_CODE (op) == CONST_VECTOR && VECTOR_MODE_P (outermode))
! {
! rtx tmp = simplify_const_vector_subreg (outermode, op,
! innermode, cbyte);
! if (tmp)
! return tmp;
! }
! fixed_size_mode fs_outermode;
! if (is_a <fixed_size_mode> (outermode, &fs_outermode))
! return simplify_immed_subreg (fs_outermode, op, innermode, cbyte);
! }
}
/* Changing mode twice with SUBREG => just change it once,
*************** test_vec_merge (machine_mode mode)
*** 7179,7184 ****
--- 7297,7461 ----
simplify_rtx (nvm));
}
+ /* Test subregs of integer vector constant X, trying elements in
+ the range [ELT_BIAS, ELT_BIAS + constant_lower_bound (NELTS)),
+ where NELTS is the number of elements in X. Subregs involving
+ elements [ELT_BIAS, ELT_BIAS + FIRST_VALID) are expected to fail. */
+
+ static void
+ test_vector_subregs_modes (rtx x, poly_uint64 elt_bias = 0,
+ unsigned int first_valid = 0)
+ {
+ machine_mode inner_mode = GET_MODE (x);
+ scalar_mode int_mode = GET_MODE_INNER (inner_mode);
+
+ for (unsigned int modei = 0; modei < NUM_MACHINE_MODES; ++modei)
+ {
+ machine_mode outer_mode = (machine_mode) modei;
+ if (!VECTOR_MODE_P (outer_mode))
+ continue;
+
+ unsigned int outer_nunits;
+ if (GET_MODE_INNER (outer_mode) == int_mode
+ && GET_MODE_NUNITS (outer_mode).is_constant (&outer_nunits)
+ && multiple_p (GET_MODE_NUNITS (inner_mode), outer_nunits))
+ {
+ /* Test subregs in which the outer mode is a smaller,
+ constant-sized vector of the same element type. */
+ unsigned int limit
+ = constant_lower_bound (GET_MODE_NUNITS (inner_mode));
+ for (unsigned int elt = 0; elt < limit; elt += outer_nunits)
+ {
+ rtx expected = NULL_RTX;
+ if (elt >= first_valid)
+ {
+ rtx_vector_builder builder (outer_mode, outer_nunits, 1);
+ for (unsigned int i = 0; i < outer_nunits; ++i)
+ builder.quick_push (CONST_VECTOR_ELT (x, elt + i));
+ expected = builder.build ();
+ }
+ poly_uint64 byte = (elt_bias + elt) * GET_MODE_SIZE (int_mode);
+ ASSERT_RTX_EQ (expected,
+ simplify_subreg (outer_mode, x,
+ inner_mode, byte));
+ }
+ }
+ else if (known_eq (GET_MODE_SIZE (outer_mode),
+ GET_MODE_SIZE (inner_mode))
+ && known_eq (elt_bias, 0U)
+ && (GET_MODE_CLASS (outer_mode) != MODE_VECTOR_BOOL
+ || known_eq (GET_MODE_BITSIZE (outer_mode),
+ GET_MODE_NUNITS (outer_mode)))
+ && (!FLOAT_MODE_P (outer_mode)
+ || (FLOAT_MODE_FORMAT (outer_mode)->ieee_bits
+ == GET_MODE_UNIT_PRECISION (outer_mode)))
+ && (GET_MODE_SIZE (inner_mode).is_constant ()
+ || !CONST_VECTOR_STEPPED_P (x)))
+ {
+ /* Try converting to OUTER_MODE and back. */
+ rtx outer_x = simplify_subreg (outer_mode, x, inner_mode, 0);
+ ASSERT_TRUE (outer_x != NULL_RTX);
+ ASSERT_RTX_EQ (x, simplify_subreg (inner_mode, outer_x,
+ outer_mode, 0));
+ }
+ }
+
+ if (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
+ {
+ /* Test each byte in the element range. */
+ unsigned int limit
+ = constant_lower_bound (GET_MODE_SIZE (inner_mode));
+ for (unsigned int i = 0; i < limit; ++i)
+ {
+ unsigned int elt = i / GET_MODE_SIZE (int_mode);
+ rtx expected = NULL_RTX;
+ if (elt >= first_valid)
+ {
+ unsigned int byte_shift = i % GET_MODE_SIZE (int_mode);
+ if (BYTES_BIG_ENDIAN)
+ byte_shift = GET_MODE_SIZE (int_mode) - byte_shift - 1;
+ rtx_mode_t vec_elt (CONST_VECTOR_ELT (x, elt), int_mode);
+ wide_int shifted_elt
+ = wi::lrshift (vec_elt, byte_shift * BITS_PER_UNIT);
+ expected = immed_wide_int_const (shifted_elt, QImode);
+ }
+ poly_uint64 byte = elt_bias * GET_MODE_SIZE (int_mode) + i;
+ ASSERT_RTX_EQ (expected,
+ simplify_subreg (QImode, x, inner_mode, byte));
+ }
+ }
+ }
+
+ /* Test constant subregs of integer vector mode INNER_MODE, using 1
+ element per pattern. */
+
+ static void
+ test_vector_subregs_repeating (machine_mode inner_mode)
+ {
+ poly_uint64 nunits = GET_MODE_NUNITS (inner_mode);
+ unsigned int min_nunits = constant_lower_bound (nunits);
+ scalar_mode int_mode = GET_MODE_INNER (inner_mode);
+ unsigned int count = gcd (min_nunits, 8);
+
+ rtx_vector_builder builder (inner_mode, count, 1);
+ for (unsigned int i = 0; i < count; ++i)
+ builder.quick_push (gen_int_mode (8 - i, int_mode));
+ rtx x = builder.build ();
+
+ test_vector_subregs_modes (x);
+ if (!nunits.is_constant ())
+ test_vector_subregs_modes (x, nunits - min_nunits);
+ }
+
+ /* Test constant subregs of integer vector mode INNER_MODE, using 2
+ elements per pattern. */
+
+ static void
+ test_vector_subregs_fore_back (machine_mode inner_mode)
+ {
+ poly_uint64 nunits = GET_MODE_NUNITS (inner_mode);
+ unsigned int min_nunits = constant_lower_bound (nunits);
+ scalar_mode int_mode = GET_MODE_INNER (inner_mode);
+ unsigned int count = gcd (min_nunits, 4);
+
+ rtx_vector_builder builder (inner_mode, count, 2);
+ for (unsigned int i = 0; i < count; ++i)
+ builder.quick_push (gen_int_mode (i, int_mode));
+ for (unsigned int i = 0; i < count; ++i)
+ builder.quick_push (gen_int_mode (-(int) i, int_mode));
+ rtx x = builder.build ();
+
+ test_vector_subregs_modes (x);
+ if (!nunits.is_constant ())
+ test_vector_subregs_modes (x, nunits - min_nunits, count);
+ }
+
+ /* Test constant subregs of integer vector mode INNER_MODE, using 3
+ elements per pattern. */
+
+ static void
+ test_vector_subregs_stepped (machine_mode inner_mode)
+ {
+ /* Build { 0, 1, 2, 3, ... }. */
+ scalar_mode int_mode = GET_MODE_INNER (inner_mode);
+ rtx_vector_builder builder (inner_mode, 1, 3);
+ for (unsigned int i = 0; i < 3; ++i)
+ builder.quick_push (gen_int_mode (i, int_mode));
+ rtx x = builder.build ();
+
+ test_vector_subregs_modes (x);
+ }
+
+ /* Test constant subregs of integer vector mode INNER_MODE. */
+
+ static void
+ test_vector_subregs (machine_mode inner_mode)
+ {
+ test_vector_subregs_repeating (inner_mode);
+ test_vector_subregs_fore_back (inner_mode);
+ test_vector_subregs_stepped (inner_mode);
+ }
+
/* Verify some simplifications involving vectors. */
static void
*************** test_vector_ops ()
*** 7193,7199 ****
test_vector_ops_duplicate (mode, scalar_reg);
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
&& maybe_gt (GET_MODE_NUNITS (mode), 2))
! test_vector_ops_series (mode, scalar_reg);
test_vec_merge (mode);
}
}
--- 7470,7479 ----
test_vector_ops_duplicate (mode, scalar_reg);
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
&& maybe_gt (GET_MODE_NUNITS (mode), 2))
! {
! test_vector_ops_series (mode, scalar_reg);
! test_vector_subregs (mode);
! }
test_vec_merge (mode);
}
}