This patch adds an rtl representation of a vector linear series of the form:
a[I] = BASE + I * STEP Like vec_duplicate; - the new rtx can be used for both constant and non-constant vectors - when used for constant vectors it is wrapped in a (const ...) - the constant form is only used for variable-length vectors; fixed-length vectors still use CONST_VECTOR At the moment the code is restricted to integer elements, to avoid concerns over floating-point rounding. 2017-10-23 Richard Sandiford <richard.sandif...@linaro.org> Alan Hayward <alan.hayw...@arm.com> David Sherwood <david.sherw...@arm.com> gcc/ * doc/rtl.texi (vec_series): Document. (const): Say that the operand can be a vec_series. * rtl.def (VEC_SERIES): New rtx code. * rtl.h (const_vec_series_p_1): Declare. (const_vec_series_p): New function. * emit-rtl.h (gen_const_vec_series): Declare. (gen_vec_series): Likewise. * emit-rtl.c (const_vec_series_p_1, gen_const_vec_series) (gen_vec_series): Likewise. * optabs.c (expand_mult_highpart): Use gen_const_vec_series. * simplify-rtx.c (simplify_unary_operation): Handle negations of vector series. (simplify_binary_operation_series): New function. (simplify_binary_operation_1): Use it. Handle VEC_SERIES. (test_vector_ops_series): New function. (test_vector_ops): Call it. * config/powerpcspe/altivec.md (altivec_lvsl): Use gen_const_vec_series. (altivec_lvsr): Likewise. * config/rs6000/altivec.md (altivec_lvsl, altivec_lvsr): Likewise. Index: gcc/doc/rtl.texi =================================================================== --- gcc/doc/rtl.texi 2017-10-23 11:41:39.185050437 +0100 +++ gcc/doc/rtl.texi 2017-10-23 11:41:41.547050496 +0100 @@ -1677,7 +1677,8 @@ are target-specific and typically repres operator. @var{m} should be a valid address mode. The second use of @code{const} is to wrap a vector operation. -In this case @var{exp} must be a @code{vec_duplicate} expression. +In this case @var{exp} must be a @code{vec_duplicate} or +@code{vec_series} expression. @findex high @item (high:@var{m} @var{exp}) @@ -2722,6 +2723,10 @@ the same submodes as the input vector mo number of output parts must be an integer multiple of the number of input parts. +@findex vec_series +@item (vec_series:@var{m} @var{base} @var{step}) +This operation creates a vector in which element @var{i} is equal to +@samp{@var{base} + @var{i}*@var{step}}. @var{m} must be a vector integer mode. @end table @node Conversions Index: gcc/rtl.def =================================================================== --- gcc/rtl.def 2017-10-23 11:40:11.378243915 +0100 +++ gcc/rtl.def 2017-10-23 11:41:41.549050496 +0100 @@ -710,6 +710,11 @@ DEF_RTL_EXPR(VEC_CONCAT, "vec_concat", " an integer multiple of the number of input parts. */ DEF_RTL_EXPR(VEC_DUPLICATE, "vec_duplicate", "e", RTX_UNARY) +/* Creation of a vector in which element I has the value BASE + I * STEP, + where BASE is the first operand and STEP is the second. The result + must have a vector integer mode. */ +DEF_RTL_EXPR(VEC_SERIES, "vec_series", "ee", RTX_BIN_ARITH) + /* Addition with signed saturation */ DEF_RTL_EXPR(SS_PLUS, "ss_plus", "ee", RTX_COMM_ARITH) Index: gcc/rtl.h =================================================================== --- gcc/rtl.h 2017-10-23 11:41:39.188050437 +0100 +++ gcc/rtl.h 2017-10-23 11:41:41.549050496 +0100 @@ -2816,6 +2816,51 @@ unwrap_const_vec_duplicate (T x) return x; } +/* In emit-rtl.c. */ +extern bool const_vec_series_p_1 (const_rtx, rtx *, rtx *); + +/* Return true if X is a constant vector that contains a linear series + of the form: + + { B, B + S, B + 2 * S, B + 3 * S, ... } + + for a nonzero S. Store B and S in *BASE_OUT and *STEP_OUT on sucess. */ + +inline bool +const_vec_series_p (const_rtx x, rtx *base_out, rtx *step_out) +{ + if (GET_CODE (x) == CONST_VECTOR + && GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT) + return const_vec_series_p_1 (x, base_out, step_out); + if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == VEC_SERIES) + { + *base_out = XEXP (XEXP (x, 0), 0); + *step_out = XEXP (XEXP (x, 0), 1); + return true; + } + return false; +} + +/* Return true if X is a vector that contains a linear series of the + form: + + { B, B + S, B + 2 * S, B + 3 * S, ... } + + where B and S are constant or nonconstant. Store B and S in + *BASE_OUT and *STEP_OUT on sucess. */ + +inline bool +vec_series_p (const_rtx x, rtx *base_out, rtx *step_out) +{ + if (GET_CODE (x) == VEC_SERIES) + { + *base_out = XEXP (x, 0); + *step_out = XEXP (x, 1); + return true; + } + return const_vec_series_p (x, base_out, step_out); +} + /* Return the unpromoted (outer) mode of SUBREG_PROMOTED_VAR_P subreg X. */ inline scalar_int_mode Index: gcc/emit-rtl.h =================================================================== --- gcc/emit-rtl.h 2017-10-23 11:41:32.369050264 +0100 +++ gcc/emit-rtl.h 2017-10-23 11:41:41.548050496 +0100 @@ -441,6 +441,9 @@ get_max_uid (void) extern rtx gen_const_vec_duplicate (machine_mode, rtx); extern rtx gen_vec_duplicate (machine_mode, rtx); +extern rtx gen_const_vec_series (machine_mode, rtx, rtx); +extern rtx gen_vec_series (machine_mode, rtx, rtx); + extern void set_decl_incoming_rtl (tree, rtx, bool); /* Return a memory reference like MEMREF, but with its mode changed Index: gcc/emit-rtl.c =================================================================== --- gcc/emit-rtl.c 2017-10-23 11:41:39.186050437 +0100 +++ gcc/emit-rtl.c 2017-10-23 11:41:41.548050496 +0100 @@ -5796,6 +5796,69 @@ gen_vec_duplicate (machine_mode mode, rt return gen_rtx_VEC_DUPLICATE (mode, x); } +/* A subroutine of const_vec_series_p that handles the case in which + X is known to be an integer CONST_VECTOR. */ + +bool +const_vec_series_p_1 (const_rtx x, rtx *base_out, rtx *step_out) +{ + unsigned int nelts = CONST_VECTOR_NUNITS (x); + if (nelts < 2) + return false; + + scalar_mode inner = GET_MODE_INNER (GET_MODE (x)); + rtx base = CONST_VECTOR_ELT (x, 0); + rtx step = simplify_binary_operation (MINUS, inner, + CONST_VECTOR_ELT (x, 1), base); + if (rtx_equal_p (step, CONST0_RTX (inner))) + return false; + + for (unsigned int i = 2; i < nelts; ++i) + { + rtx diff = simplify_binary_operation (MINUS, inner, + CONST_VECTOR_ELT (x, i), + CONST_VECTOR_ELT (x, i - 1)); + if (!rtx_equal_p (step, diff)) + return false; + } + + *base_out = base; + *step_out = step; + return true; +} + +/* Generate a vector constant of mode MODE in which element I has + the value BASE + I * STEP. */ + +rtx +gen_const_vec_series (machine_mode mode, rtx base, rtx step) +{ + gcc_assert (CONSTANT_P (base) && CONSTANT_P (step)); + + int nunits = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (nunits); + scalar_mode inner_mode = GET_MODE_INNER (mode); + RTVEC_ELT (v, 0) = base; + for (int i = 1; i < nunits; ++i) + RTVEC_ELT (v, i) = simplify_gen_binary (PLUS, inner_mode, + RTVEC_ELT (v, i - 1), step); + return gen_rtx_raw_CONST_VECTOR (mode, v); +} + +/* Generate a vector of mode MODE in which element I has the value + BASE + I * STEP. The result will be a constant if BASE and STEP + are both constants. */ + +rtx +gen_vec_series (machine_mode mode, rtx base, rtx step) +{ + if (step == const0_rtx) + return gen_vec_duplicate (mode, base); + if (CONSTANT_P (base) && CONSTANT_P (step)) + return gen_const_vec_series (mode, base, step); + return gen_rtx_VEC_SERIES (mode, base, step); +} + /* Generate a new vector constant for mode MODE and constant value CONSTANT. */ Index: gcc/optabs.c =================================================================== --- gcc/optabs.c 2017-10-23 11:41:32.369050264 +0100 +++ gcc/optabs.c 2017-10-23 11:41:41.549050496 +0100 @@ -5784,13 +5784,13 @@ expand_mult_highpart (machine_mode mode, for (i = 0; i < nunits; ++i) RTVEC_ELT (v, i) = GEN_INT (!BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0)); + perm = gen_rtx_CONST_VECTOR (mode, v); } else { - for (i = 0; i < nunits; ++i) - RTVEC_ELT (v, i) = GEN_INT (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1)); + int base = BYTES_BIG_ENDIAN ? 0 : 1; + perm = gen_const_vec_series (mode, GEN_INT (base), GEN_INT (2)); } - perm = gen_rtx_CONST_VECTOR (mode, v); return expand_vec_perm (mode, m1, m2, perm, target); } Index: gcc/simplify-rtx.c =================================================================== --- gcc/simplify-rtx.c 2017-10-23 11:41:36.309050364 +0100 +++ gcc/simplify-rtx.c 2017-10-23 11:41:41.550050496 +0100 @@ -927,7 +927,7 @@ exact_int_to_float_conversion_p (const_r simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) { enum rtx_code reversed; - rtx temp, elt; + rtx temp, elt, base, step; scalar_int_mode inner, int_mode, op_mode, op0_mode; switch (code) @@ -1185,6 +1185,22 @@ simplify_unary_operation_1 (enum rtx_cod return simplify_gen_unary (TRUNCATE, int_mode, temp, inner); } } + + if (vec_series_p (op, &base, &step)) + { + /* Only create a new series if we can simplify both parts. In other + cases this isn't really a simplification, and it's not necessarily + a win to replace a vector operation with a scalar operation. */ + scalar_mode inner_mode = GET_MODE_INNER (mode); + base = simplify_unary_operation (NEG, inner_mode, base, inner_mode); + if (base) + { + step = simplify_unary_operation (NEG, inner_mode, + step, inner_mode); + if (step) + return gen_vec_series (mode, base, step); + } + } break; case TRUNCATE: @@ -2153,6 +2169,46 @@ simplify_binary_operation (enum rtx_code return NULL_RTX; } +/* Subroutine of simplify_binary_operation_1 that looks for cases in + which OP0 and OP1 are both vector series or vector duplicates + (which are really just series with a step of 0). If so, try to + form a new series by applying CODE to the bases and to the steps. + Return null if no simplification is possible. + + MODE is the mode of the operation and is known to be a vector + integer mode. */ + +static rtx +simplify_binary_operation_series (rtx_code code, machine_mode mode, + rtx op0, rtx op1) +{ + rtx base0, step0; + if (vec_duplicate_p (op0, &base0)) + step0 = const0_rtx; + else if (!vec_series_p (op0, &base0, &step0)) + return NULL_RTX; + + rtx base1, step1; + if (vec_duplicate_p (op1, &base1)) + step1 = const0_rtx; + else if (!vec_series_p (op1, &base1, &step1)) + return NULL_RTX; + + /* Only create a new series if we can simplify both parts. In other + cases this isn't really a simplification, and it's not necessarily + a win to replace a vector operation with a scalar operation. */ + scalar_mode inner_mode = GET_MODE_INNER (mode); + rtx new_base = simplify_binary_operation (code, inner_mode, base0, base1); + if (!new_base) + return NULL_RTX; + + rtx new_step = simplify_binary_operation (code, inner_mode, step0, step1); + if (!new_step) + return NULL_RTX; + + return gen_vec_series (mode, new_base, new_step); +} + /* Subroutine of simplify_binary_operation. Simplify a binary operation CODE with result mode MODE, operating on OP0 and OP1. If OP0 and/or OP1 are constant pool references, TRUEOP0 and TRUEOP1 represent the @@ -2333,6 +2389,14 @@ simplify_binary_operation_1 (enum rtx_co if (tem) return tem; } + + /* Handle vector series. */ + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + tem = simplify_binary_operation_series (code, mode, op0, op1); + if (tem) + return tem; + } break; case COMPARE: @@ -2544,6 +2608,14 @@ simplify_binary_operation_1 (enum rtx_co || plus_minus_operand_p (op1)) && (tem = simplify_plus_minus (code, mode, op0, op1)) != 0) return tem; + + /* Handle vector series. */ + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + tem = simplify_binary_operation_series (code, mode, op0, op1); + if (tem) + return tem; + } break; case MULT: @@ -3495,6 +3567,11 @@ simplify_binary_operation_1 (enum rtx_co /* ??? There are simplifications that can be done. */ return 0; + case VEC_SERIES: + if (op1 == CONST0_RTX (GET_MODE_INNER (mode))) + return gen_vec_duplicate (mode, op0); + return 0; + case VEC_SELECT: if (!VECTOR_MODE_P (mode)) { @@ -6490,6 +6567,60 @@ test_vector_ops_duplicate (machine_mode } } +/* Test vector simplifications involving VEC_SERIES in which the + operands and result have vector mode MODE. SCALAR_REG is a pseudo + register that holds one element of MODE. */ + +static void +test_vector_ops_series (machine_mode mode, rtx scalar_reg) +{ + /* Test unary cases with VEC_SERIES arguments. */ + scalar_mode inner_mode = GET_MODE_INNER (mode); + rtx duplicate = gen_rtx_VEC_DUPLICATE (mode, scalar_reg); + rtx neg_scalar_reg = gen_rtx_NEG (inner_mode, scalar_reg); + rtx series_0_r = gen_rtx_VEC_SERIES (mode, const0_rtx, scalar_reg); + rtx series_0_nr = gen_rtx_VEC_SERIES (mode, const0_rtx, neg_scalar_reg); + rtx series_nr_1 = gen_rtx_VEC_SERIES (mode, neg_scalar_reg, const1_rtx); + rtx series_r_m1 = gen_rtx_VEC_SERIES (mode, scalar_reg, constm1_rtx); + rtx series_r_r = gen_rtx_VEC_SERIES (mode, scalar_reg, scalar_reg); + rtx series_nr_nr = gen_rtx_VEC_SERIES (mode, neg_scalar_reg, + neg_scalar_reg); + ASSERT_RTX_EQ (series_0_r, + simplify_unary_operation (NEG, mode, series_0_nr, mode)); + ASSERT_RTX_EQ (series_r_m1, + simplify_unary_operation (NEG, mode, series_nr_1, mode)); + ASSERT_RTX_EQ (series_r_r, + simplify_unary_operation (NEG, mode, series_nr_nr, mode)); + + /* Test that a VEC_SERIES with a zero step is simplified away. */ + ASSERT_RTX_EQ (duplicate, + simplify_binary_operation (VEC_SERIES, mode, + scalar_reg, const0_rtx)); + + /* Test PLUS and MINUS with VEC_SERIES. */ + rtx series_0_1 = gen_const_vec_series (mode, const0_rtx, const1_rtx); + rtx series_0_m1 = gen_const_vec_series (mode, const0_rtx, constm1_rtx); + rtx series_r_1 = gen_rtx_VEC_SERIES (mode, scalar_reg, const1_rtx); + ASSERT_RTX_EQ (series_r_r, + simplify_binary_operation (PLUS, mode, series_0_r, + duplicate)); + ASSERT_RTX_EQ (series_r_1, + simplify_binary_operation (PLUS, mode, duplicate, + series_0_1)); + ASSERT_RTX_EQ (series_r_m1, + simplify_binary_operation (PLUS, mode, duplicate, + series_0_m1)); + ASSERT_RTX_EQ (series_0_r, + simplify_binary_operation (MINUS, mode, series_r_r, + duplicate)); + ASSERT_RTX_EQ (series_r_m1, + simplify_binary_operation (MINUS, mode, duplicate, + series_0_1)); + ASSERT_RTX_EQ (series_r_1, + simplify_binary_operation (MINUS, mode, duplicate, + series_0_m1)); +} + /* Verify some simplifications involving vectors. */ static void @@ -6502,6 +6633,9 @@ test_vector_ops () { rtx scalar_reg = make_test_reg (GET_MODE_INNER (mode)); test_vector_ops_duplicate (mode, scalar_reg); + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + && GET_MODE_NUNITS (mode) > 2) + test_vector_ops_series (mode, scalar_reg); } } } Index: gcc/config/powerpcspe/altivec.md =================================================================== --- gcc/config/powerpcspe/altivec.md 2017-10-23 11:41:32.366050264 +0100 +++ gcc/config/powerpcspe/altivec.md 2017-10-23 11:41:41.546050496 +0100 @@ -2456,13 +2456,10 @@ (define_expand "altivec_lvsl" emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1])); else { - int i; - rtx mask, perm[16], constv, vperm; + rtx mask, constv, vperm; mask = gen_reg_rtx (V16QImode); emit_insn (gen_altivec_lvsl_direct (mask, operands[1])); - for (i = 0; i < 16; ++i) - perm[i] = GEN_INT (i); - constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = gen_const_vec_series (V16QImode, const0_rtx, const1_rtx); constv = force_reg (V16QImode, constv); vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), UNSPEC_VPERM); @@ -2488,13 +2485,10 @@ (define_expand "altivec_lvsr" emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1])); else { - int i; - rtx mask, perm[16], constv, vperm; + rtx mask, constv, vperm; mask = gen_reg_rtx (V16QImode); emit_insn (gen_altivec_lvsr_direct (mask, operands[1])); - for (i = 0; i < 16; ++i) - perm[i] = GEN_INT (i); - constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = gen_const_vec_series (V16QImode, const0_rtx, const1_rtx); constv = force_reg (V16QImode, constv); vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), UNSPEC_VPERM); Index: gcc/config/rs6000/altivec.md =================================================================== --- gcc/config/rs6000/altivec.md 2017-10-23 11:41:32.366050264 +0100 +++ gcc/config/rs6000/altivec.md 2017-10-23 11:41:41.547050496 +0100 @@ -2573,13 +2573,10 @@ (define_expand "altivec_lvsl" emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1])); else { - int i; - rtx mask, perm[16], constv, vperm; + rtx mask, constv, vperm; mask = gen_reg_rtx (V16QImode); emit_insn (gen_altivec_lvsl_direct (mask, operands[1])); - for (i = 0; i < 16; ++i) - perm[i] = GEN_INT (i); - constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = gen_const_vec_series (V16QImode, const0_rtx, const1_rtx); constv = force_reg (V16QImode, constv); vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), UNSPEC_VPERM); @@ -2614,13 +2611,10 @@ (define_expand "altivec_lvsr" emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1])); else { - int i; - rtx mask, perm[16], constv, vperm; + rtx mask, constv, vperm; mask = gen_reg_rtx (V16QImode); emit_insn (gen_altivec_lvsr_direct (mask, operands[1])); - for (i = 0; i < 16; ++i) - perm[i] = GEN_INT (i); - constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = gen_const_vec_series (V16QImode, const0_rtx, const1_rtx); constv = force_reg (V16QImode, constv); vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), UNSPEC_VPERM);