The following fixes a typo in the vec_perm to bit_insert pattern
and adjusts forwprop heuristic to make it trigger.  This improves
code-gen for the testcase back to a simple movhpd like we emitted
with GCC 9.

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Richard.

2019-12-05  Richard Biener  <rguent...@suse.de>

        PR middle-end/92818
        * tree-ssa-forwprop.c (simplify_vector_constructor): Improve
        heuristics on what don't care element to choose.
        * match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Fix typo.

        * gcc.target/i386/pr92818.c: New testcase.

Index: gcc/tree-ssa-forwprop.c
===================================================================
--- gcc/tree-ssa-forwprop.c     (revision 278992)
+++ gcc/tree-ssa-forwprop.c     (working copy)
@@ -2265,9 +2265,12 @@ simplify_vector_constructor (gimple_stmt
        sel.quick_push (elts[i].second + elts[i].first * refnelts);
       /* And fill the tail with "something".  It's really don't care,
          and ideally we'd allow VEC_PERM to have a smaller destination
-        vector.  */
+        vector.  As heuristic try to preserve a uniform orig[0] which
+        facilitates later pattern-matching VEC_PERM_EXPR to a
+        BIT_INSERT_EXPR.  */
       for (; i < refnelts; ++i)
-       sel.quick_push (i - elts.length ());
+       sel.quick_push ((elts[0].second == 0 && elts[0].first == 0
+                        ? 0 : refnelts) + i);
       vec_perm_indices indices (sel, orig[1] ? 2 : 1, refnelts);
       if (!can_vec_perm_const_p (TYPE_MODE (perm_type), indices))
        return false;
Index: gcc/match.pd
===================================================================
--- gcc/match.pd        (revision 278992)
+++ gcc/match.pd        (working copy)
@@ -6049,7 +6049,7 @@ (define_operator_list COND_TERNARY
                    break;
                if (at < encoded_nelts && sel.series_p (at + 1, 1, at + 1, 1))
                  {
-                   if (known_lt (at, nelts))
+                   if (known_lt (poly_uint64 (sel[at]), nelts))
                      ins = fold_read_from_vector (cop0, sel[at]);
                    else
                      ins = fold_read_from_vector (cop1, sel[at] - nelts);
Index: gcc/testsuite/gcc.target/i386/pr92818.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr92818.c     (nonexistent)
+++ gcc/testsuite/gcc.target/i386/pr92818.c     (working copy)
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mavx2 -fdump-tree-forwprop1" } */
+
+typedef double v4df __attribute__((vector_size (32)));
+typedef double v2df __attribute__((vector_size (16)));
+
+v2df
+bar (v4df x, double *p)
+{
+  return (v2df) { x[0], *p };
+}
+
+/* { dg-final { scan-tree-dump "BIT_INSERT_EXPR" "forwprop1" } } */
+/* { dg-final { scan-assembler "movhpd" } } */

Reply via email to