Hi! This patch allows us to vectorize attached testcases again, rot1.c and rot3.c e.g. with -O3 -mavx, rot2.c only with -O3 -mavx2.
The problem with rot2.c is that the rotation count is there vect_external_def, but when we insert some stmts into the pattern def seq (the (-j) & 31), we turn it into vec_internal_def, we have right now no easy way to say, if you want to vectorize this, just push these statements before the loop as scalar and use those as external def. Well, perhaps the pattern recognizer could in that case emit the stmts before the loop directly, and if vectorization won't be successful, just let it be DCEd again. Perhaps something to be handled as a follow-up. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Not sure about if something should go into the testsuite/, and in what form (just runtime testcases with verification, or vect_shift target tests? Note I couldn't find any target supports stuff that would check if vector by vector shifts are supported). 2013-05-10 Jakub Jelinek <ja...@redhat.com> * tree-vectorizer.h (NUM_PATTERNS): Increment. * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add vect_recog_rotate_pattern. (vect_recog_rotate_pattern): New function. --- gcc/tree-vectorizer.h.jj 2013-04-24 12:07:12.000000000 +0200 +++ gcc/tree-vectorizer.h 2013-05-10 13:38:14.658985201 +0200 @@ -1005,7 +1005,7 @@ extern void vect_slp_transform_bb (basic Additional pattern recognition functions can (and will) be added in the future. */ typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *); -#define NUM_PATTERNS 10 +#define NUM_PATTERNS 11 void vect_pattern_recog (loop_vec_info, bb_vec_info); /* In tree-vectorizer.c. */ --- gcc/tree-vect-patterns.c.jj 2013-01-11 09:03:01.000000000 +0100 +++ gcc/tree-vect-patterns.c 2013-05-10 14:59:35.756019753 +0200 @@ -50,6 +50,7 @@ static gimple vect_recog_over_widening_p tree *); static gimple vect_recog_widen_shift_pattern (vec<gimple> *, tree *, tree *); +static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *); static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *, tree *, tree *); static gimple vect_recog_divmod_pattern (vec<gimple> *, @@ -64,6 +65,7 @@ static vect_recog_func_ptr vect_vect_rec vect_recog_pow_pattern, vect_recog_widen_shift_pattern, vect_recog_over_widening_pattern, + vect_recog_rotate_pattern, vect_recog_vector_vector_shift_pattern, vect_recog_divmod_pattern, vect_recog_mixed_size_cond_pattern, @@ -1446,6 +1448,218 @@ vect_recog_widen_shift_pattern (vec<gimp if (dump_enabled_p ()) dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0); + + stmts->safe_push (last_stmt); + return pattern_stmt; +} + +/* Detect a rotate pattern wouldn't be otherwise vectorized: + + type a_t, b_t, c_t; + + S0 a_t = b_t r<< c_t; + + Input/Output: + + * STMTS: Contains a stmt from which the pattern search begins, + i.e. the shift/rotate stmt. The original stmt (S0) is replaced + with a sequence: + + S1 d_t = -c_t; + S2 e_t = d_t & (B - 1); + S3 f_t = b_t << c_t; + S4 g_t = b_t >> e_t; + S0 a_t = f_t | g_t; + + where B is element bitsize of type. + + Output: + + * TYPE_IN: The type of the input arguments to the pattern. + + * TYPE_OUT: The type of the output of this pattern. + + * Return value: A new stmt that will be used to replace the rotate + S0 stmt. */ + +static gimple +vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out) +{ + gimple last_stmt = stmts->pop (); + tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2; + gimple pattern_stmt, def_stmt; + enum tree_code rhs_code; + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); + enum vect_def_type dt; + optab optab1, optab2; + + if (!is_gimple_assign (last_stmt)) + return NULL; + + rhs_code = gimple_assign_rhs_code (last_stmt); + switch (rhs_code) + { + case LROTATE_EXPR: + case RROTATE_EXPR: + break; + default: + return NULL; + } + + if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) + return NULL; + + lhs = gimple_assign_lhs (last_stmt); + oprnd0 = gimple_assign_rhs1 (last_stmt); + type = TREE_TYPE (oprnd0); + oprnd1 = gimple_assign_rhs2 (last_stmt); + if (TREE_CODE (oprnd0) != SSA_NAME + || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type) + || !INTEGRAL_TYPE_P (type) + || !TYPE_UNSIGNED (type)) + return NULL; + + if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt, + &def, &dt)) + return NULL; + + if (dt != vect_internal_def + && dt != vect_constant_def + && dt != vect_external_def) + return NULL; + + vectype = get_vectype_for_scalar_type (type); + if (vectype == NULL_TREE) + return NULL; + + /* If vector/vector or vector/scalar rotate is supported by the target, + don't do anything here. */ + optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector); + if (optab1 + && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing) + return NULL; + + if (bb_vinfo != NULL || dt != vect_internal_def) + { + optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar); + if (optab2 + && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing) + return NULL; + } + + /* If vector/vector or vector/scalar shifts aren't supported by the target, + don't do anything here either. */ + optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector); + optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector); + if (!optab1 + || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing + || !optab2 + || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing) + { + if (bb_vinfo == NULL && dt == vect_internal_def) + return NULL; + optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar); + optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar); + if (!optab1 + || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing + || !optab2 + || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing) + return NULL; + } + + *type_in = vectype; + *type_out = vectype; + if (*type_in == NULL_TREE) + return NULL; + + def = NULL_TREE; + if (TREE_CODE (oprnd1) == INTEGER_CST + || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type)) + def = oprnd1; + else if (def_stmt && gimple_assign_cast_p (def_stmt)) + { + tree rhs1 = gimple_assign_rhs1 (def_stmt); + if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type) + && TYPE_PRECISION (TREE_TYPE (rhs1)) + == TYPE_PRECISION (type)) + def = rhs1; + } + + STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; + if (def == NULL_TREE) + { + def = vect_recog_temp_ssa_var (type, NULL); + def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1, + NULL_TREE); + append_pattern_def_seq (stmt_vinfo, def_stmt); + } + stype = TREE_TYPE (def); + + if (TREE_CODE (def) == INTEGER_CST) + { + if (!host_integerp (def, 1) + || (unsigned HOST_WIDE_INT) tree_low_cst (def, 1) + >= GET_MODE_PRECISION (TYPE_MODE (type)) + || integer_zerop (def)) + return NULL; + def2 = build_int_cst (stype, + GET_MODE_PRECISION (TYPE_MODE (type)) + - tree_low_cst (def, 1)); + } + else + { + tree vecstype = get_vectype_for_scalar_type (stype); + stmt_vec_info def_stmt_vinfo; + + if (vecstype == NULL_TREE) + return NULL; + def2 = vect_recog_temp_ssa_var (stype, NULL); + def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def, + NULL_TREE); + def_stmt_vinfo + = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype; + append_pattern_def_seq (stmt_vinfo, def_stmt); + + def2 = vect_recog_temp_ssa_var (stype, NULL); + tree mask + = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1); + def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2, + gimple_assign_lhs (def_stmt), + mask); + def_stmt_vinfo + = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype; + append_pattern_def_seq (stmt_vinfo, def_stmt); + } + + var1 = vect_recog_temp_ssa_var (type, NULL); + def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR + ? LSHIFT_EXPR : RSHIFT_EXPR, + var1, oprnd0, def); + append_pattern_def_seq (stmt_vinfo, def_stmt); + + var2 = vect_recog_temp_ssa_var (type, NULL); + def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR + ? RSHIFT_EXPR : LSHIFT_EXPR, + var2, oprnd0, def2); + append_pattern_def_seq (stmt_vinfo, def_stmt); + + /* Pattern detected. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, + "vect_recog_rotate_pattern: detected: "); + + /* Pattern supported. Create a stmt to be used to replace the pattern. */ + var = vect_recog_temp_ssa_var (type, NULL); + pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2); + + if (dump_enabled_p ()) + dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0); stmts->safe_push (last_stmt); return pattern_stmt; Jakub
unsigned int a[1024] = { 0, 1, 2, 3, 4, 5, 6 }; __attribute__((noinline, noclone)) void foo (void) { int i; for (i = 0; i < 1024; i++) { int j = i & 31; a[i] = (a[i] << j) ^ (a[i] >> ((-j) & 31)); } } int main () { int i; for (i = 0; i < 1000000; i++) foo (); return 0; }
unsigned int a[1024] = { 0, 1, 2, 3, 4, 5, 6 }; __attribute__((noinline, noclone)) void foo (int j) { int i; for (i = 0; i < 1024; i++) a[i] = (a[i] << j) ^ (a[i] >> ((-j) & 31)); } int main () { int i; for (i = 0; i < 1000000; i++) foo (3); return 0; }
unsigned int a[1024] = { 0, 1, 2, 3, 4, 5, 6 }; __attribute__((noinline, noclone)) void foo (void) { int i, j = 3; for (i = 0; i < 1024; i++) a[i] = (a[i] << j) ^ (a[i] >> ((-j) & 31)); } int main () { int i; for (i = 0; i < 1000000; i++) foo (); return 0; }