It might be argued that this is a reload bug (since it runs on LRA), but sometimes it is simpler to place a simpler work around in the machine dependent code. If the maintainers decide that it should be fixed in reload instead of via this patch, that is fine.
PR 71294 involves vectorization where the compiler is forming a V2DI vector from 2 DI elements. The elements are the same value (a stack address), so the register allocator copies the address over to the VSX register file, and does an XXPERMDI. Because of the -fstack-protector, frame addresses are modified, and become an ADD operation, and the direct move fails. I added a splitter for DImode so that if a virtual register or frame address register was attempted to be splatted to the VSX register file, it would copy the value to a pseudo register, and do a direct move on that. I have done a bootstrap and regression test with these patches and there were no regressions. Are the patches ok to install in the trunk? [gcc] 2016-05-26 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/71294 * config/rs6000/predicates.md (virtual_or_frame_reg_operand): New predicate to return true if the operand is a virtual or frame register. * config/rs6000/vsx.md (move splat splitters): Add splitters to copy a frame related pointer into a new pseudo register during the first split pass, so that we don't confuse the register allocator. [gcc/testsuite] 2016-05-26 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/71294 * g++.dg/pr71294.C: New test. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/predicates.md =================================================================== --- gcc/config/rs6000/predicates.md (revision 236800) +++ gcc/config/rs6000/predicates.md (working copy) @@ -1959,3 +1959,20 @@ (define_predicate "fusion_offsettable_me return offsettable_nonstrict_memref_p (op); }) + +;; Return true if the operand is a virtual or frame register. The register +;; allocator gets confused if a virtual/frame register is used in a splat +;; operation when -fstack-protector is used. +(define_predicate "virtual_or_frame_reg_operand" + (match_code "reg,subreg") +{ + HOST_WIDE_INT r; + if (SUBREG_P (op)) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + r = REGNO (op); + return REGNO_PTR_FRAME_P (r); +}) Index: gcc/config/rs6000/vsx.md =================================================================== --- gcc/config/rs6000/vsx.md (revision 236800) +++ gcc/config/rs6000/vsx.md (working copy) @@ -2397,6 +2397,20 @@ (define_insn "vsx_splat_<mode>" lxvdsx %x0,%y1" [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")]) +;; Virtual/frame registers cause problems because they are replaced by a PLUS +;; operation which confuses RELOAD if -fstack-protector is used. Add a +;; splitter to copy such registers to a temporary +(define_split + [(set (match_operand:V2DI 0 "vsx_register_operand" "") + (vec_duplicate:V2DI + (match_operand:DI 1 "virtual_or_frame_reg_operand" "")))] + "TARGET_VSX && TARGET_POWERPC64 && can_create_pseudo_p ()" + [(match_dup 2) (match_dup 1) + (match_dup 0) (vec_duplicate:VSX_D (match_dup 2))] +{ + operands[2] = gen_reg_rtx (DImode); +}) + ;; V4SI splat (ISA 3.0) ;; When SI's are allowed in VSX registers, add XXSPLTW support (define_expand "vsx_splat_<mode>" @@ -2411,6 +2425,17 @@ (define_expand "vsx_splat_<mode>" operands[1] = force_reg (<VS_scalar>mode, operands[1]); }) +(define_split + [(set (match_operand:V4SI 0 "vsx_register_operand" "") + (vec_duplicate:V4SI + (match_operand:SI 1 "virtual_or_frame_reg_operand" "")))] + "TARGET_P9_VECTOR && !TARGET_POWERPC64 && can_create_pseudo_p ()" + [(match_dup 2) (match_dup 1) + (match_dup 0) (vec_duplicate:VSX_D (match_dup 2))] +{ + operands[2] = gen_reg_rtx (SImode); +}) + (define_insn "*vsx_splat_v4si_internal" [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa") (vec_duplicate:V4SI Index: gcc/testsuite/g++.dg/pr71294.C =================================================================== --- gcc/testsuite/g++.dg/pr71294.C (revision 0) +++ gcc/testsuite/g++.dg/pr71294.C (revision 0) @@ -0,0 +1,56 @@ +/* { dg-do compile { target { powerpc64*-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power8 -O3 -fstack-protector" } */ + +class A; +template <typename _Tp, int m, int n> class B { +public: + _Tp val[m * n]; +}; +class C { +public: + C(A); +}; +struct D { + D(); + unsigned long &operator[](int); + unsigned long *p; +}; +class A { +public: + template <typename _Tp, int m, int n> A(const B<_Tp, m, n> &, bool); + int rows, cols; + unsigned char *data; + unsigned char *datastart; + unsigned char *dataend; + unsigned char *datalimit; + D step; +}; +template <typename _Tp, int m, int n> +A::A(const B<_Tp, m, n> &p1, bool) + : rows(m), cols(n) { + step[0] = cols * sizeof(_Tp); + datastart = data = (unsigned char *)p1.val; + datalimit = dataend = datastart + rows * step[0]; +} +class F { +public: + static void compute(C); + template <typename _Tp, int m, int n, int nm> + static void compute(const B<_Tp, m, n> &, B<_Tp, nm, 1> &, B<_Tp, m, nm> &, + B<_Tp, n, nm> &); +}; +D::D() {} +unsigned long &D::operator[](int p1) { return p[p1]; } +template <typename _Tp, int m, int n, int nm> +void F::compute(const B<_Tp, m, n> &, B<_Tp, nm, 1> &, B<_Tp, m, nm> &, + B<_Tp, n, nm> &p4) { + A a(p4, false); + compute(a); +} +void fn1() { + B<double, 4, 4> b, c, e; + B<double, 4, 1> d; + F::compute(b, d, c, e); +}