On March 16, 2015 5:21:02 PM GMT+01:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >On the following testcase, gimple LIM creates a vector COND_EXPR >(scalar >condition, vector lhs, rhs2 and rhs3), but if we don't have >corresponding >vector mode for it, we ICE trying to expand the BLKmode COND_EXPR, as >it is >unprepared for that. > >This patch lowers those (parallel or piecewise). >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Though maybe LIM should not create these for cost reasons? I also wonder if we should lower them to control flow? Thanks, Richard. >2015-03-16 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/65427 > * tree-vect-generic.c (do_cond, expand_vector_scalar_condition): New > functions. > (expand_vector_operations_1): Handle BLKmode vector COND_EXPR. > > * gcc.c-torture/execute/pr65427.c: New test. > >--- gcc/tree-vect-generic.c.jj 2015-01-15 20:25:40.000000000 +0100 >+++ gcc/tree-vect-generic.c 2015-03-16 14:25:37.391932269 +0100 >@@ -1417,6 +1417,57 @@ count_type_subparts (tree type) > return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1; > } > >+static tree >+do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, >+ tree bitpos, tree bitsize, enum tree_code code) >+{ >+ if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) >+ a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); >+ if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) >+ b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); >+ tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi)); >+ return gimplify_build3 (gsi, code, inner_type, cond, a, b); >+} >+ >+/* Expand a vector COND_EXPR to scalars, piecewise. */ >+static void >+expand_vector_scalar_condition (gimple_stmt_iterator *gsi) >+{ >+ gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); >+ tree type = gimple_expr_type (stmt); >+ tree compute_type = get_compute_type (COND_EXPR, mov_optab, type); >+ machine_mode compute_mode = TYPE_MODE (compute_type); >+ gcc_assert (compute_mode != BLKmode); >+ tree lhs = gimple_assign_lhs (stmt); >+ tree rhs2 = gimple_assign_rhs2 (stmt); >+ tree rhs3 = gimple_assign_rhs3 (stmt); >+ tree new_rhs; >+ >+ /* If the compute mode is not a vector mode (hence we are not >decomposing >+ a BLKmode vector to smaller, hardware-supported vectors), we may >want >+ to expand the operations in parallel. */ >+ if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT >+ && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT >+ && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT >+ && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT >+ && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM >+ && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) >+ new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3, >+ COND_EXPR); >+ else >+ new_rhs = expand_vector_piecewise (gsi, do_cond, type, >compute_type, >+ rhs2, rhs3, COND_EXPR); >+ if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE >(new_rhs))) >+ new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE >(lhs), >+ new_rhs); >+ >+ /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One >+ way to do it is change expand_vector_operation and its callees to >+ return a tree_code, RHS1 and RHS2 instead of a tree. */ >+ gimple_assign_set_rhs_from_tree (gsi, new_rhs); >+ update_stmt (gsi_stmt (*gsi)); >+} >+ >/* Process one statement. If we identify a vector operation, expand >it. */ > > static void >@@ -1449,6 +1500,14 @@ expand_vector_operations_1 (gimple_stmt_ > return; > } > >+ if (code == COND_EXPR >+ && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == >VECTOR_TYPE >+ && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode) >+ { >+ expand_vector_scalar_condition (gsi); >+ return; >+ } >+ > if (code == CONSTRUCTOR > && TREE_CODE (lhs) == SSA_NAME > && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs))) >--- gcc/testsuite/gcc.c-torture/execute/pr65427.c.jj 2015-03-16 >14:36:29.489254701 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr65427.c 2015-03-16 >14:40:58.789851433 +0100 >@@ -0,0 +1,34 @@ >+/* PR tree-optimization/65427 */ >+ >+typedef int V __attribute__ ((vector_size (8 * sizeof (int)))); >+V a, b, c, d, e, f; >+ >+__attribute__((noinline, noclone)) void >+foo (int x, int y) >+{ >+ do >+ { >+ if (x) >+ d = a ^ c; >+ else >+ d = a ^ b; >+ } >+ while (y); >+} >+ >+int >+main () >+{ >+ a = (V) { 1, 2, 3, 4, 5, 6, 7, 8 }; >+ b = (V) { 0x40, 0x80, 0x40, 0x80, 0x40, 0x80, 0x40, 0x80 }; >+ e = (V) { 0x41, 0x82, 0x43, 0x84, 0x45, 0x86, 0x47, 0x88 }; >+ foo (0, 0); >+ if (__builtin_memcmp (&d, &e, sizeof (V)) != 0) >+ __builtin_abort (); >+ c = (V) { 0x80, 0x40, 0x80, 0x40, 0x80, 0x40, 0x80, 0x40 }; >+ f = (V) { 0x81, 0x42, 0x83, 0x44, 0x85, 0x46, 0x87, 0x48 }; >+ foo (1, 0); >+ if (__builtin_memcmp (&d, &f, sizeof (V)) != 0) >+ __builtin_abort (); >+ return 0; >+} > > Jakub