Hi! Since Richard's changes recently to allow different modes in vcond patterns (so far on i?86/x86_64 only I think) we can vectorize more COND_EXPRs than before, and this patch improves it a tiny bit more - even i?86/x86_64 support vconds only if the sizes of vector element modes are the same. With this patch we can optimize even if it is wider or narrower, by vectorizing it as the COND_EXPR in integer mode matching the size of the comparsion operands and then a cast.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2011-10-06 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/50596 * tree-vectorizer.h (vect_is_simple_cond): New prototype. (NUM_PATTERNS): Change to 6. * tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): New function. (vect_vect_recog_func_ptrs): Add vect_recog_mixed_size_cond_pattern. (vect_mark_pattern_stmts): Don't create stmt_vinfo for def_stmt if it already has one, and don't set STMT_VINFO_VECTYPE in it if it is already set. * tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Handle COND_EXPR and VEC_COND_EXPR in pattern stmts. (vect_is_simple_cond): No longer static. * lib/target-supports.exp (check_effective_target_vect_cond_mixed): New. * gcc.dg/vect/vect-cond-8.c: New test. --- gcc/tree-vectorizer.h.jj 2011-09-26 14:06:52.000000000 +0200 +++ gcc/tree-vectorizer.h 2011-10-06 10:04:03.000000000 +0200 @@ -1,5 +1,5 @@ /* Vectorizer - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. Contributed by Dorit Naishlos <do...@il.ibm.com> @@ -818,6 +818,7 @@ extern bool vect_transform_stmt (gimple, bool *, slp_tree, slp_instance); extern void vect_remove_stores (gimple); extern bool vect_analyze_stmt (gimple, bool *, slp_tree); +extern bool vect_is_simple_cond (tree, loop_vec_info, tree *); extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *, tree, int); extern void vect_get_load_cost (struct data_reference *, int, bool, @@ -902,7 +903,7 @@ extern void vect_slp_transform_bb (basic Additional pattern recognition functions can (and will) be added in the future. */ typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); -#define NUM_PATTERNS 5 +#define NUM_PATTERNS 6 void vect_pattern_recog (loop_vec_info); /* In tree-vectorizer.c. */ --- gcc/tree-vect-patterns.c.jj 2011-10-06 09:14:17.000000000 +0200 +++ gcc/tree-vect-patterns.c 2011-10-06 14:37:12.000000000 +0200 @@ -49,12 +49,15 @@ static gimple vect_recog_dot_prod_patter static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *, tree *); +static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **, + tree *, tree *); static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern, vect_recog_dot_prod_pattern, vect_recog_pow_pattern, - vect_recog_over_widening_pattern}; + vect_recog_over_widening_pattern, + vect_recog_mixed_size_cond_pattern}; /* Function widened_name_p @@ -1218,6 +1214,120 @@ vect_recog_over_widening_pattern (VEC (g } +/* Function vect_recog_mixed_size_cond_pattern + + Try to find the following pattern: + + type x_t, y_t; + TYPE a_T, b_T, c_T; + loop: + S1 a_T = x_t CMP y_t ? b_T : c_T; + + where type 'TYPE' is an integral type which has different size + from 'type'. b_T and c_T are constants and if 'TYPE' is wider + than 'type', the constants need to fit into an integer type + with the same width as 'type'. + + Input: + + * LAST_STMT: A stmt from which the pattern search begins. + + Output: + + * TYPE_IN: The type of the input arguments to the pattern. + + * TYPE_OUT: The type of the output of this pattern. + + * Return value: A new stmt that will be used to replace the pattern. + Additionally a def_stmt is added. + + a_it = x_t CMP y_t ? b_it : c_it; + a_T = (TYPE) a_it; */ + +static gimple +vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in, + tree *type_out) +{ + gimple last_stmt = VEC_index (gimple, *stmts, 0); + tree cond_expr, then_clause, else_clause; + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info; + tree type, vectype, comp_vectype, itype, vecitype; + enum machine_mode cmpmode; + gimple pattern_stmt, def_stmt; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + + if (!is_gimple_assign (last_stmt) + || gimple_assign_rhs_code (last_stmt) != COND_EXPR + || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) + return NULL; + + cond_expr = gimple_assign_rhs1 (last_stmt); + then_clause = gimple_assign_rhs2 (last_stmt); + else_clause = gimple_assign_rhs3 (last_stmt); + + if (TREE_CODE (then_clause) != INTEGER_CST + || TREE_CODE (else_clause) != INTEGER_CST) + return NULL; + + if (!vect_is_simple_cond (cond_expr, loop_vinfo, &comp_vectype) + || !comp_vectype) + return NULL; + + type = gimple_expr_type (last_stmt); + cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype)); + + if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode)) + return NULL; + + vectype = get_vectype_for_scalar_type (type); + if (vectype == NULL_TREE) + return NULL; + + if (expand_vec_cond_expr_p (vectype, comp_vectype)) + return NULL; + + itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode), + TYPE_UNSIGNED (type)); + if (itype == NULL_TREE + || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode)) + return NULL; + + vecitype = get_vectype_for_scalar_type (itype); + if (vecitype == NULL_TREE) + return NULL; + + if (!expand_vec_cond_expr_p (vecitype, comp_vectype)) + return NULL; + + if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode)) + { + if (!int_fits_type_p (then_clause, itype) + || !int_fits_type_p (else_clause, itype)) + return NULL; + } + + def_stmt + = gimple_build_assign_with_ops3 (COND_EXPR, + vect_recog_temp_ssa_var (itype, NULL), + unshare_expr (cond_expr), + fold_convert (itype, then_clause), + fold_convert (itype, else_clause)); + pattern_stmt + = gimple_build_assign_with_ops (NOP_EXPR, + vect_recog_temp_ssa_var (type, NULL), + gimple_assign_lhs (def_stmt), NULL_TREE); + + STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt; + def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); + set_vinfo_for_stmt (def_stmt, def_stmt_info); + STMT_VINFO_VECTYPE (def_stmt_info) = vecitype; + *type_in = vecitype; + *type_out = vectype; + + return pattern_stmt; +} + + /* Mark statements that are involved in a pattern. */ static inline void @@ -1245,14 +1355,18 @@ vect_mark_pattern_stmts (gimple orig_stm if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)) { def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info); - set_vinfo_for_stmt (def_stmt, - new_stmt_vec_info (def_stmt, loop_vinfo, NULL)); - gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); def_stmt_info = vinfo_for_stmt (def_stmt); + if (def_stmt_info == NULL) + { + def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); + set_vinfo_for_stmt (def_stmt, def_stmt_info); + } + gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt; STMT_VINFO_DEF_TYPE (def_stmt_info) = STMT_VINFO_DEF_TYPE (orig_stmt_info); - STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; + if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE) + STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; } } --- gcc/tree-vect-stmts.c.jj 2011-09-29 14:25:46.000000000 +0200 +++ gcc/tree-vect-stmts.c 2011-10-06 12:16:43.000000000 +0200 @@ -652,9 +652,26 @@ vect_mark_stmts_to_be_vectorized (loop_v have to scan the RHS or function arguments instead. */ if (is_gimple_assign (stmt)) { - for (i = 1; i < gimple_num_ops (stmt); i++) + enum tree_code rhs_code = gimple_assign_rhs_code (stmt); + tree op = gimple_assign_rhs1 (stmt); + + i = 1; + if ((rhs_code == COND_EXPR || rhs_code == VEC_COND_EXPR) + && COMPARISON_CLASS_P (op)) + { + if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo, + live_p, relevant, &worklist) + || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo, + live_p, relevant, &worklist)) + { + VEC_free (gimple, heap, worklist); + return false; + } + i = 2; + } + for (; i < gimple_num_ops (stmt); i++) { - tree op = gimple_op (stmt, i); + op = gimple_op (stmt, i); if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist)) { @@ -4682,7 +4699,7 @@ vectorizable_load (gimple stmt, gimple_s Returns whether a COND can be vectorized. Checks whether condition operands are supportable using vec_is_simple_use. */ -static bool +bool vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, tree *comp_vectype) { tree lhs, rhs; --- gcc/testsuite/lib/target-supports.exp.jj 2011-10-04 10:18:31.000000000 +0200 +++ gcc/testsuite/lib/target-supports.exp 2011-10-06 15:18:28.000000000 +0200 @@ -3234,6 +3234,26 @@ proc check_effective_target_vect_conditi return $et_vect_cond_saved } +# Return 1 if the target supports vector conditional operations where +# the comparison has different type from the lhs, 0 otherwise. + +proc check_effective_target_vect_cond_mixed { } { + global et_vect_cond_mixed_saved + + if [info exists et_vect_cond_mixed_saved] { + verbose "check_effective_target_vect_cond_mixed: using cached result" 2 + } else { + set et_vect_cond_mixed_saved 0 + if { [istarget i?86-*-*] + || [istarget x86_64-*-*] } { + set et_vect_cond_mixed_saved 1 + } + } + + verbose "check_effective_target_vect_cond_mixed: returning $et_vect_cond_mixed_saved" 2 + return $et_vect_cond_mixed_saved +} + # Return 1 if the target supports vector char multiplication, 0 otherwise. proc check_effective_target_vect_char_mult { } { --- gcc/testsuite/gcc.dg/vect/vect-cond-8.c.jj 2011-10-06 14:50:25.000000000 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-cond-8.c 2011-10-06 15:17:12.000000000 +0200 @@ -0,0 +1,122 @@ +/* { dg-require-effective-target vect_cond_mixed } */ + +#include "tree-vect.h" + +#define N 1024 +float a[N], b[N], c[N]; +int d[N], e[N], f[N]; +unsigned char k[N]; + +__attribute__((noinline, noclone)) void +f1 (void) +{ + int i; + for (i = 0; i < N; ++i) + k[i] = a[i] < b[i] ? 17 : 0; +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + int i; + for (i = 0; i < N; ++i) + k[i] = a[i] < b[i] ? 0 : 24; +} + +__attribute__((noinline, noclone)) void +f3 (void) +{ + int i; + for (i = 0; i < N; ++i) + k[i] = a[i] < b[i] ? 51 : 12; +} + +__attribute__((noinline, noclone)) void +f4 (void) +{ + int i; + for (i = 0; i < N; ++i) + { + int d2 = d[i], e2 = e[i]; + f[i] = a[i] < b[i] ? d2 : e2; + } +} + +__attribute__((noinline, noclone)) void +f5 (void) +{ + int i; + for (i = 0; i < N; ++i) + { + float a2 = a[i], b2 = b[i]; + c[i] = d[i] < e[i] ? a2 : b2; + } +} + +int +main () +{ + int i; + + check_vect (); + + for (i = 0; i < N; i++) + { + switch (i % 9) + { + case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break; + case 1: a[i] = 0; b[i] = 0; break; + case 2: a[i] = i + 1; b[i] = - i - 1; break; + case 3: a[i] = i; b[i] = i + 7; break; + case 4: a[i] = i; b[i] = i; break; + case 5: a[i] = i + 16; b[i] = i + 3; break; + case 6: a[i] = - i - 5; b[i] = - i; break; + case 7: a[i] = - i; b[i] = - i; break; + case 8: a[i] = - i; b[i] = - i - 7; break; + } + d[i] = i; + e[i] = 2 * i; + } + f1 (); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 ? 17 : 0)) + abort (); + f2 (); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 ? 0 : 24)) + abort (); + f3 (); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 ? 51 : 12)) + abort (); + f4 (); + for (i = 0; i < N; i++) + if (f[i] != ((i % 3) == 0 ? d[i] : e[i])) + abort (); + for (i = 0; i < N; i++) + { + switch (i % 9) + { + case 0: asm (""); d[i] = - i - 1; e[i] = i + 1; break; + case 1: d[i] = 0; e[i] = 0; break; + case 2: d[i] = i + 1; e[i] = - i - 1; break; + case 3: d[i] = i; e[i] = i + 7; break; + case 4: d[i] = i; e[i] = i; break; + case 5: d[i] = i + 16; e[i] = i + 3; break; + case 6: d[i] = - i - 5; e[i] = - i; break; + case 7: d[i] = - i; e[i] = - i; break; + case 8: d[i] = - i; e[i] = - i - 7; break; + } + a[i] = i; + b[i] = i / 2; + } + f5 (); + for (i = 0; i < N; i++) + if (c[i] != ((i % 3) == 0 ? a[i] : b[i])) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 5 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ Jakub