It might be argued that this is a reload bug (since it runs on LRA), but
sometimes it is simpler to place a simpler work around in the machine dependent
code.  If the maintainers decide that it should be fixed in reload instead of
via this patch, that is fine.

PR 71294 involves vectorization where the compiler is forming a V2DI vector
from 2 DI elements.  The elements are the same value (a stack address), so the
register allocator copies the address over to the VSX register file, and does
an XXPERMDI.  Because of the -fstack-protector, frame addresses are modified,
and become an ADD operation, and the direct move fails.

I added a splitter for DImode so that if a virtual register or frame address
register was attempted to be splatted to the VSX register file, it would copy
the value to a pseudo register, and do a direct move on that.

I have done a bootstrap and regression test with these patches and there were
no regressions.  Are the patches ok to install in the trunk?

[gcc]
2016-05-26  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        PR target/71294
        * config/rs6000/predicates.md (virtual_or_frame_reg_operand): New
        predicate to return true if the operand is a virtual or frame
        register.
        * config/rs6000/vsx.md (move splat splitters): Add splitters to
        copy a frame related pointer into a new pseudo register during the
        first split pass, so that we don't confuse the register allocator.

[gcc/testsuite]
2016-05-26  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        PR target/71294
        * g++.dg/pr71294.C: New test.


-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/predicates.md
===================================================================
--- gcc/config/rs6000/predicates.md     (revision 236800)
+++ gcc/config/rs6000/predicates.md     (working copy)
@@ -1959,3 +1959,20 @@ (define_predicate "fusion_offsettable_me
 
   return offsettable_nonstrict_memref_p (op);
 })
+
+;; Return true if the operand is a virtual or frame register.  The register
+;; allocator gets confused if a virtual/frame register is used in a splat
+;; operation when -fstack-protector is used.
+(define_predicate "virtual_or_frame_reg_operand"
+  (match_code "reg,subreg")
+{
+  HOST_WIDE_INT r;
+  if (SUBREG_P (op))
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  r = REGNO (op);
+  return REGNO_PTR_FRAME_P (r);
+})
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md    (revision 236800)
+++ gcc/config/rs6000/vsx.md    (working copy)
@@ -2397,6 +2397,20 @@ (define_insn "vsx_splat_<mode>"
    lxvdsx %x0,%y1"
   [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
 
+;; Virtual/frame registers cause problems because they are replaced by a PLUS
+;; operation which confuses RELOAD if -fstack-protector is used.  Add a
+;; splitter to copy such registers to a temporary
+(define_split
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "")
+       (vec_duplicate:V2DI
+        (match_operand:DI 1 "virtual_or_frame_reg_operand" "")))]
+  "TARGET_VSX && TARGET_POWERPC64 && can_create_pseudo_p ()"
+  [(match_dup 2) (match_dup 1)
+   (match_dup 0) (vec_duplicate:VSX_D (match_dup 2))]
+{
+  operands[2] = gen_reg_rtx (DImode);
+})
+
 ;; V4SI splat (ISA 3.0)
 ;; When SI's are allowed in VSX registers, add XXSPLTW support
 (define_expand "vsx_splat_<mode>"
@@ -2411,6 +2425,17 @@ (define_expand "vsx_splat_<mode>"
     operands[1] = force_reg (<VS_scalar>mode, operands[1]);
 })
 
+(define_split
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "")
+       (vec_duplicate:V4SI
+        (match_operand:SI 1 "virtual_or_frame_reg_operand" "")))]
+  "TARGET_P9_VECTOR && !TARGET_POWERPC64 && can_create_pseudo_p ()"
+  [(match_dup 2) (match_dup 1)
+   (match_dup 0) (vec_duplicate:VSX_D (match_dup 2))]
+{
+  operands[2] = gen_reg_rtx (SImode);
+})
+
 (define_insn "*vsx_splat_v4si_internal"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
        (vec_duplicate:V4SI
Index: gcc/testsuite/g++.dg/pr71294.C
===================================================================
--- gcc/testsuite/g++.dg/pr71294.C      (revision 0)
+++ gcc/testsuite/g++.dg/pr71294.C      (revision 0)
@@ -0,0 +1,56 @@
+/* { dg-do compile { target { powerpc64*-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power8" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O3 -fstack-protector" } */
+
+class A;
+template <typename _Tp, int m, int n> class B {
+public:
+  _Tp val[m * n];
+};
+class C {
+public:
+  C(A);
+};
+struct D {
+  D();
+  unsigned long &operator[](int);
+  unsigned long *p;
+};
+class A {
+public:
+  template <typename _Tp, int m, int n> A(const B<_Tp, m, n> &, bool);
+  int rows, cols;
+  unsigned char *data;
+  unsigned char *datastart;
+  unsigned char *dataend;
+  unsigned char *datalimit;
+  D step;
+};
+template <typename _Tp, int m, int n>
+A::A(const B<_Tp, m, n> &p1, bool)
+    : rows(m), cols(n) {
+  step[0] = cols * sizeof(_Tp);
+  datastart = data = (unsigned char *)p1.val;
+  datalimit = dataend = datastart + rows * step[0];
+}
+class F {
+public:
+  static void compute(C);
+  template <typename _Tp, int m, int n, int nm>
+  static void compute(const B<_Tp, m, n> &, B<_Tp, nm, 1> &, B<_Tp, m, nm> &,
+                      B<_Tp, n, nm> &);
+};
+D::D() {}
+unsigned long &D::operator[](int p1) { return p[p1]; }
+template <typename _Tp, int m, int n, int nm>
+void F::compute(const B<_Tp, m, n> &, B<_Tp, nm, 1> &, B<_Tp, m, nm> &,
+                B<_Tp, n, nm> &p4) {
+  A a(p4, false);
+  compute(a);
+}
+void fn1() {
+  B<double, 4, 4> b, c, e;
+  B<double, 4, 1> d;
+  F::compute(b, d, c, e);
+}

Reply via email to