Hi, As PR69848 reported, GCC vectorizer now generates comparison outside of VEC_COND_EXPR for COND_REDUCTION case, as below:
_20 = vect__1.6_8 != { 0, 0, 0, 0 }; vect_c_2.8_16 = VEC_COND_EXPR <_20, { 0, 0, 0, 0 }, vect_c_2.7_13>; _21 = VEC_COND_EXPR <_20, ivtmp_17, _19>; This results in inefficient expanding. With IR like: vect_c_2.8_16 = VEC_COND_EXPR <vect__1.6_8 != { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, vect_c_2.7_13>; _21 = VEC_COND_EXPR <vect__1.6_8 != { 0, 0, 0, 0 }, ivtmp_17, _19>; We can do: 1) Expanding time optimization, for example, reverting comparison operator by switching VEC_COND_EXPR operands. This is useful when backend only supports some comparison operators. 2) For backend not supporting vcond_mask patterns, saving one LT_EXPR instruction which introduced by expand_vec_cond_expr. This patch fixes this by propagating comparison into VEC_COND_EXPR even if it's used multiple times. For now, GCC does single_use_only propagation. Ideally, we may duplicate the comparison before each use statement just before expanding, so that TER can successfully backtrack it from each VEC_COND_EXPR. Unfortunately I didn't find a good pass to do this. Tree-vect-generic.c looks like a good candidate, but it's so early that following CSE could undo the transform. Another possible fix is to generate comparison inside VEC_COND_EXPR directly in function vectorizable_reduction. As for possible comparison CSE opportunities, I checked that it's simple enough to be handled by RTL CSE. Bootstrap and test on x86_64 and AArch64. Any comments? Thanks, bin 2016-05-12 Bin Cheng <bin.ch...@arm.com> PR tree-optimization/69848 * optabs-tree.c (expand_vcond_mask_p, expand_vcond_p): New. (expand_vec_cmp_expr_p): Call above functions. * optabs-tree.h (expand_vcond_mask_p, expand_vcond_p): New. * tree-ssa-forwprop.c (optabs-tree.h): Include header file. (forward_propagate_into_cond): Propgate multiple uses for VEC_COND_EXPR.
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c index faac087..0ccdbdb 100644 --- a/gcc/optabs-tree.c +++ b/gcc/optabs-tree.c @@ -314,25 +314,50 @@ expand_vec_cmp_expr_p (tree value_type, tree mask_type) } /* Return TRUE iff, appropriate vector insns are available - for vector cond expr with vector type VALUE_TYPE and a comparison + for VCOND_MASK pattern with vector type VALUE_TYPE and a comparison with operand vector types in CMP_OP_TYPE. */ bool -expand_vec_cond_expr_p (tree value_type, tree cmp_op_type) +expand_vcond_mask_p (tree value_type, tree cmp_op_type) { - machine_mode value_mode = TYPE_MODE (value_type); - machine_mode cmp_op_mode = TYPE_MODE (cmp_op_type); if (VECTOR_BOOLEAN_TYPE_P (cmp_op_type) && get_vcond_mask_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type)) != CODE_FOR_nothing) return true; - if (GET_MODE_SIZE (value_mode) != GET_MODE_SIZE (cmp_op_mode) - || GET_MODE_NUNITS (value_mode) != GET_MODE_NUNITS (cmp_op_mode) - || get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type), - TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing) - return false; - return true; + return false; +} + +/* Return TRUE iff, appropriate vector insns are available + for VCOND pattern with vector type VALUE_TYPE and a comparison + with operand vector types in CMP_OP_TYPE. */ + +bool +expand_vcond_p (tree value_type, tree cmp_op_type) +{ + machine_mode value_mode = TYPE_MODE (value_type); + machine_mode cmp_op_mode = TYPE_MODE (cmp_op_type); + if (GET_MODE_SIZE (value_mode) == GET_MODE_SIZE (cmp_op_mode) + && GET_MODE_NUNITS (value_mode) == GET_MODE_NUNITS (cmp_op_mode) + && get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type), + TYPE_UNSIGNED (cmp_op_type)) != CODE_FOR_nothing) + return true; + + return false; +} + +/* Return TRUE iff, appropriate vector insns are available + for vector cond expr with vector type VALUE_TYPE and a comparison + with operand vector types in CMP_OP_TYPE. */ + +bool +expand_vec_cond_expr_p (tree value_type, tree cmp_op_type) +{ + if (expand_vcond_mask_p (value_type, cmp_op_type) + || expand_vcond_p (value_type, cmp_op_type)) + return true; + + return false; } /* Use the current target and options to initialize diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h index c3b9280..feab40f 100644 --- a/gcc/optabs-tree.h +++ b/gcc/optabs-tree.h @@ -39,6 +39,8 @@ optab optab_for_tree_code (enum tree_code, const_tree, enum optab_subtype); bool supportable_convert_operation (enum tree_code, tree, tree, tree *, enum tree_code *); bool expand_vec_cmp_expr_p (tree, tree); +bool expand_vcond_mask_p (tree, tree); +bool expand_vcond_p (tree, tree); bool expand_vec_cond_expr_p (tree, tree); void init_tree_optimization_optabs (tree); diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index c40f9e2..40f023f 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-pass.h" #include "ssa.h" #include "expmed.h" +#include "optabs-tree.h" #include "optabs-query.h" #include "gimple-pretty-print.h" #include "fold-const.h" @@ -585,7 +586,10 @@ forward_propagate_into_cond (gimple_stmt_iterator *gsi_p) { enum tree_code def_code; tree name = cond; - gimple *def_stmt = get_prop_source_stmt (name, true, NULL); + bool single_use_only = (code != VEC_COND_EXPR + || !expand_vcond_p (gimple_expr_type (stmt), + TREE_TYPE (cond))); + gimple *def_stmt = get_prop_source_stmt (name, single_use_only, NULL); if (!def_stmt || !can_propagate_from (def_stmt)) return 0;