On Thu, Jul 16, 2015 at 12:17 AM, Uros Bizjak <ubiz...@gmail.com> wrote: > Attached patch fixes PR 66866. The problem was in ix86_expand_pinsr, > where we didn't account for non-lowpart source register and just > blindly took a SUBREG of it.
Attached patch is the version for release branches. As suggested by Richi in the PR, the patch simply rejects non-lowpart source subregs. 2015-07-17 Uros Bizjak <ubiz...@gmail.com> PR target/66866 * config/i386/i386.c (ix86_expand_pinsr): Reject non-lowpart source subregs. testsuite/ChangeLog: 2015-07-17 Uros Bizjak <ubiz...@gmail.com> PR target/66866 * g++.dg/pr66866.C: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}, will be committed to active release branches. Uros.
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 225932) +++ config/i386/i386.c (working copy) @@ -50335,6 +50335,14 @@ ix86_expand_pinsr (rtx *operands) unsigned int size = INTVAL (operands[1]); unsigned int pos = INTVAL (operands[2]); + if (GET_CODE (src) == SUBREG) + { + /* Reject non-lowpart subregs. */ + if (SUBREG_BYTE (src) != 0) + return false; + src = SUBREG_REG (src); + } + if (GET_CODE (dst) == SUBREG) { pos += SUBREG_BYTE (dst) * BITS_PER_UNIT; @@ -50341,9 +50349,6 @@ ix86_expand_pinsr (rtx *operands) dst = SUBREG_REG (dst); } - if (GET_CODE (src) == SUBREG) - src = SUBREG_REG (src); - switch (GET_MODE (dst)) { case V16QImode: Index: testsuite/g++.dg/pr66866.C =================================================================== --- testsuite/g++.dg/pr66866.C (revision 0) +++ testsuite/g++.dg/pr66866.C (working copy) @@ -0,0 +1,29 @@ +// { dg-do run { target i?86-*-* x86_64-*-* } } +// { dg-require-effective-target sse2_runtime } +// { dg-options "-O -msse2" } + +extern "C" void abort (void); + +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef short A __attribute__((__may_alias__)); + +__m128i __attribute__((noinline)) +shuf(const __m128i v) +{ + __m128i r; + + reinterpret_cast<A *>(&r)[5] = reinterpret_cast<const A *>(&v)[4]; + return r; +} + +int main() +{ + __attribute__((aligned(16))) short mem[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + + *reinterpret_cast<__m128i *>(mem) = shuf (*reinterpret_cast<__m128i *>(mem)); + + if (mem[5] != 4) + abort (); + + return 0; +}