Hi,
  Add support for early gimple folding of the vec_mergee() and vec_mergeo()
intrinsics.

Testcases posted separately.
Tested across assorted power linux platforms.

OK for trunk?

Thanks,
-Will
    
[gcc]

2018-10-09  Will Schmidt <will_schm...@vnet.ibm.com>

        * config/rs6000/rs6000.c: (fold_mergeeo_helper): New helper function.
        (rs6000_gimple_fold_builtin): Add hooks for vec_mergee and vec_mergeo
        intrinsics.

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 5c7ab2b..a77049e 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -15161,10 +15161,57 @@ fold_mergehl_helper (gimple_stmt_iterator *gsi, 
gimple *stmt, int use_high)
 
   gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
   gimple_set_location (g, gimple_location (stmt));
   gsi_replace (gsi, g, true);
 }
+/* Helper function to handle the vector merge[eo] built-ins.
+ * The permute vector contains even or odd values that index
+ * across both arg1 and arg2.  The even/odd-ness is handled via the
+ * shift argument passed in.  */
+static void
+fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int shift)
+{
+  tree arg0 = gimple_call_arg (stmt, 0);
+  tree arg1 = gimple_call_arg (stmt, 1);
+  tree lhs = gimple_call_lhs (stmt);
+  tree lhs_type = TREE_TYPE (lhs);
+  int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
+
+  /* The permute_type will match the lhs for integral types.  For double and
+     float types, the permute type needs to map to the V2 or V4 type that
+     matches size.  */
+  tree permute_type;
+  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)))
+    permute_type = lhs_type;
+  else
+    {
+      if (types_compatible_p (TREE_TYPE (lhs_type),
+                             TREE_TYPE (V2DF_type_node)))
+       permute_type = V2DI_type_node;
+      else if (types_compatible_p (TREE_TYPE (lhs_type),
+                                  TREE_TYPE (V4SF_type_node)))
+       permute_type = V4SI_type_node;
+      else
+       gcc_unreachable ();
+    }
+  tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
+
+ /* Build the permute vector.  */
+  for (int i = 0; i < n_elts / 2; i++)
+  {
+           elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
+                                          2*i + shift));
+           elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
+                                          2*i + shift + n_elts));
+  }
+
+  tree permute = elts.build ();
+
+  gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
+  gimple_set_location (g, gimple_location (stmt));
+  gsi_replace (gsi, g, true);
+}
 
 /* Fold a machine-dependent built-in in GIMPLE.  (For folding into
    a constant, use rs6000_fold_builtin.)  */
 
 bool
@@ -15862,10 +15909,25 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case VSX_BUILTIN_XXMRGHW_4SF:
     case VSX_BUILTIN_VEC_MERGEH_V2DF:
        fold_mergehl_helper (gsi, stmt, 0);
        return true;
 
+    /* Flavors of vec_mergee.  */
+    case P8V_BUILTIN_VMRGEW_V4SI:
+    case P8V_BUILTIN_VMRGEW_V2DI:
+    case P8V_BUILTIN_VMRGEW_V4SF:
+    case P8V_BUILTIN_VMRGEW_V2DF:
+       fold_mergeeo_helper (gsi, stmt, 0);
+      return true;
+    /* Flavors of vec_mergeo.  */
+    case P8V_BUILTIN_VMRGOW_V4SI:
+    case P8V_BUILTIN_VMRGOW_V2DI:
+    case P8V_BUILTIN_VMRGOW_V4SF:
+    case P8V_BUILTIN_VMRGOW_V2DF:
+       fold_mergeeo_helper (gsi, stmt, 1);
+      return true;
+
     /* d = vec_pack (a, b) */
     case P8V_BUILTIN_VPKUDUM:
     case ALTIVEC_BUILTIN_VPKUHUM:
     case ALTIVEC_BUILTIN_VPKUWUM:
       {


Reply via email to