Hi, Add support for early gimple folding of the vec_mergee() and vec_mergeo() intrinsics.
Testcases posted separately. Tested across assorted power linux platforms. OK for trunk? Thanks, -Will [gcc] 2018-10-09 Will Schmidt <will_schm...@vnet.ibm.com> * config/rs6000/rs6000.c: (fold_mergeeo_helper): New helper function. (rs6000_gimple_fold_builtin): Add hooks for vec_mergee and vec_mergeo intrinsics. diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 5c7ab2b..a77049e 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -15161,10 +15161,57 @@ fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high) gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute); gimple_set_location (g, gimple_location (stmt)); gsi_replace (gsi, g, true); } +/* Helper function to handle the vector merge[eo] built-ins. + * The permute vector contains even or odd values that index + * across both arg1 and arg2. The even/odd-ness is handled via the + * shift argument passed in. */ +static void +fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int shift) +{ + tree arg0 = gimple_call_arg (stmt, 0); + tree arg1 = gimple_call_arg (stmt, 1); + tree lhs = gimple_call_lhs (stmt); + tree lhs_type = TREE_TYPE (lhs); + int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type); + + /* The permute_type will match the lhs for integral types. For double and + float types, the permute type needs to map to the V2 or V4 type that + matches size. */ + tree permute_type; + if (INTEGRAL_TYPE_P (TREE_TYPE (lhs_type))) + permute_type = lhs_type; + else + { + if (types_compatible_p (TREE_TYPE (lhs_type), + TREE_TYPE (V2DF_type_node))) + permute_type = V2DI_type_node; + else if (types_compatible_p (TREE_TYPE (lhs_type), + TREE_TYPE (V4SF_type_node))) + permute_type = V4SI_type_node; + else + gcc_unreachable (); + } + tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1); + + /* Build the permute vector. */ + for (int i = 0; i < n_elts / 2; i++) + { + elts.safe_push (build_int_cst (TREE_TYPE (permute_type), + 2*i + shift)); + elts.safe_push (build_int_cst (TREE_TYPE (permute_type), + 2*i + shift + n_elts)); + } + + tree permute = elts.build (); + + gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); +} /* Fold a machine-dependent built-in in GIMPLE. (For folding into a constant, use rs6000_fold_builtin.) */ bool @@ -15862,10 +15909,25 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case VSX_BUILTIN_XXMRGHW_4SF: case VSX_BUILTIN_VEC_MERGEH_V2DF: fold_mergehl_helper (gsi, stmt, 0); return true; + /* Flavors of vec_mergee. */ + case P8V_BUILTIN_VMRGEW_V4SI: + case P8V_BUILTIN_VMRGEW_V2DI: + case P8V_BUILTIN_VMRGEW_V4SF: + case P8V_BUILTIN_VMRGEW_V2DF: + fold_mergeeo_helper (gsi, stmt, 0); + return true; + /* Flavors of vec_mergeo. */ + case P8V_BUILTIN_VMRGOW_V4SI: + case P8V_BUILTIN_VMRGOW_V2DI: + case P8V_BUILTIN_VMRGOW_V4SF: + case P8V_BUILTIN_VMRGOW_V2DF: + fold_mergeeo_helper (gsi, stmt, 1); + return true; + /* d = vec_pack (a, b) */ case P8V_BUILTIN_VPKUDUM: case ALTIVEC_BUILTIN_VPKUHUM: case ALTIVEC_BUILTIN_VPKUWUM: {