I've had a patch in my dev tree for quite a while that lowers uniform vector stmts to scalar stmts. This also mitigates PR58497 so I decided to push it out now.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2015-10-22 Richard Biener <rguent...@suse.de> PR tree-optimization/58497 * tree-vect-generic.c (ssa_uniform_vector_p): New helper. (expand_vector_operations_1): Use it. Lower operations on all uniform vectors to scalar operations if the HW supports it. * gcc.dg/tree-ssa/vector-5.c: New testcase. Index: gcc/tree-vect-generic.c =================================================================== *** gcc/tree-vect-generic.c (revision 229167) --- gcc/tree-vect-generic.c (working copy) *************** lower_vec_perm (gimple_stmt_iterator *gs *** 1339,1344 **** --- 1339,1361 ---- update_stmt (gsi_stmt (*gsi)); } + /* If OP is a uniform vector return the element it is a splat from. */ + + static tree + ssa_uniform_vector_p (tree op) + { + if (TREE_CODE (op) == VECTOR_CST + || TREE_CODE (op) == CONSTRUCTOR) + return uniform_vector_p (op); + if (TREE_CODE (op) == SSA_NAME) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (op); + if (gimple_assign_single_p (def_stmt)) + return uniform_vector_p (gimple_assign_rhs1 (def_stmt)); + } + return NULL_TREE; + } + /* Return type in which CODE operation with optab OP can be computed. */ *************** expand_vector_operations_1 (gimple_stmt_ *** 1505,1510 **** --- 1522,1550 ---- if (TREE_CODE (type) != VECTOR_TYPE) return; + /* If the vector operation is operating on all same vector elements + implement it with a scalar operation and a splat if the target + supports the scalar operation. */ + tree srhs1, srhs2 = NULL_TREE; + if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE + && (rhs2 == NULL_TREE + || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE) + /* As we query direct optabs restrict to non-convert operations. */ + && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1))) + { + op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar); + if (optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing) + { + tree slhs = make_ssa_name (TREE_TYPE (srhs1)); + gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2); + gsi_insert_before (gsi, repl, GSI_SAME_STMT); + gimple_assign_set_rhs_from_tree (gsi, + build_vector_from_val (type, slhs)); + update_stmt (stmt); + return; + } + } + /* A scalar operation pretending to be a vector one. */ if (VECTOR_BOOLEAN_TYPE_P (type) && !VECTOR_MODE_P (TYPE_MODE (type)) *************** expand_vector_operations_1 (gimple_stmt_ *** 1554,1568 **** if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) { tree first; - gimple *def_stmt; ! if ((TREE_CODE (rhs2) == VECTOR_CST ! && (first = uniform_vector_p (rhs2)) != NULL_TREE) ! || (TREE_CODE (rhs2) == SSA_NAME ! && (def_stmt = SSA_NAME_DEF_STMT (rhs2)) ! && gimple_assign_single_p (def_stmt) ! && (first = uniform_vector_p ! (gimple_assign_rhs1 (def_stmt))) != NULL_TREE)) { gimple_assign_set_rhs2 (stmt, first); update_stmt (stmt); --- 1594,1601 ---- if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) { tree first; ! if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE) { gimple_assign_set_rhs2 (stmt, first); update_stmt (stmt); Index: gcc/testsuite/gcc.dg/tree-ssa/vector-5.c =================================================================== *** gcc/testsuite/gcc.dg/tree-ssa/vector-5.c (revision 0) --- gcc/testsuite/gcc.dg/tree-ssa/vector-5.c (working copy) *************** *** 0 **** --- 1,15 ---- + /* { dg-do compile } */ + /* { dg-options "-O -fdump-tree-optimized" } */ + + typedef int v4si __attribute__((vector_size(4*sizeof (int)))); + + v4si v; + int foo (int i) + { + v4si v1 = (v4si) { i, i, i, i }; + v4si v2 = (v4si) { 3, 3, 3, 3 }; + v = v1 * v2; + } + + /* The operation should be carried out as scalar op. */ + /* { dg-final { scan-tree-dump-times " \* 3;" 1 "optimized" } } */