Attached patch fixes PR 66866. The problem was in ix86_expand_pinsr,
where we didn't account for non-lowpart source register and just
blindly took a SUBREG of it.

The patch introduces ix86_expand_pextr and passes non-lowpart source
subreg through the new expander. Instead of chickening out, the
patched gcc compiles the testcase to:

       vpextrw $4, %xmm0, %eax
       vpxor   %xmm0, %xmm0, %xmm0
       vpinsrw $5, %eax, %xmm0, %xmm0
       ret

Also, ix86_expand_pextr is used in extzv<mode> expander for extracts
from vector registers.

2015-07-16  Uros Bizjak  <ubiz...@gmail.com>

    PR target/66866
    * config/i386/i386-protos.h (ix86_expand_pextr): New prototype.
    * config/i386/i386.c (ix86_expand_pextr): New function.
    (ix86_expand_pinsr): Handle V1TI and TI modes.  Call ix86_expand_pextr
    for non-lowpart subregs.
    * config/i386/i386.md (extzv<mode>): Expand with ix86_expand_pextr.
    (insv<mode>): Use SWI248 mode iterator.
    (insv<mode>_1): Ditto.

testsuite/ChangeLog:

2015-07-16  Uros Bizjak  <ubiz...@gmail.com>

    PR target/66866
    * g++.dg/pr66866.C: New test.

Patch was bootstrapped and regression tested on x86_64-linux-gnu
{,-m32} w/ and w/o --with-fpmath=avx.

Patch was committed to mainline and will be backported to release
branches once they are open.

Uros.
Index: config/i386/i386-protos.h
===================================================================
--- config/i386/i386-protos.h   (revision 225844)
+++ config/i386/i386-protos.h   (working copy)
@@ -223,6 +223,7 @@ extern void ix86_expand_vector_extract (bool, rtx,
 extern void ix86_expand_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx);
 
 extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
+extern bool ix86_expand_pextr (rtx *);
 extern bool ix86_expand_pinsr (rtx *);
 extern void ix86_expand_mul_widen_evenodd (rtx, rtx, rtx, bool, bool);
 extern void ix86_expand_mul_widen_hilo (rtx, rtx, rtx, bool, bool);
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 225844)
+++ config/i386/i386.c  (working copy)
@@ -50509,6 +50509,105 @@ ix86_expand_sse2_abs (rtx target, rtx input)
     emit_move_insn (target, x);
 }
 
+/* Expand an extract from a vector register through pextr insn.
+   Return true if successful.  */
+
+bool
+ix86_expand_pextr (rtx *operands)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  unsigned int size = INTVAL (operands[2]);
+  unsigned int pos = INTVAL (operands[3]);
+
+  if (GET_CODE (dst) == SUBREG)
+    {
+      /* Reject non-lowpart subregs.  */
+      if (SUBREG_BYTE (dst) > 0)
+       return false;
+      dst = SUBREG_REG (dst);
+    }
+       
+  if (GET_CODE (src) == SUBREG)
+    {
+      pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
+      src = SUBREG_REG (src);
+    }
+
+  switch (GET_MODE (src))
+    {
+    case V16QImode:
+    case V8HImode:
+    case V4SImode:
+    case V2DImode:
+    case V1TImode:
+    case TImode:
+      {
+       machine_mode srcmode, dstmode;
+       rtx d, pat;
+
+       dstmode = mode_for_size (size, MODE_INT, 0);
+
+       switch (dstmode)
+         {
+         case QImode:
+           if (!TARGET_SSE4_1)
+             return false;
+           srcmode = V16QImode;
+           break;
+
+         case HImode:
+           if (!TARGET_SSE2)
+             return false;
+           srcmode = V8HImode;
+           break;
+
+         case SImode:
+           if (!TARGET_SSE4_1)
+             return false;
+           srcmode = V4SImode;
+           break;
+
+         case DImode:
+           gcc_assert (TARGET_64BIT);
+           if (!TARGET_SSE4_1)
+             return false;
+           srcmode = V2DImode;
+           break;
+
+         default:
+           return false;
+         }
+
+       if (GET_MODE (dst) == dstmode)
+         d = dst;
+       else
+         d = gen_reg_rtx (dstmode);
+
+       /* Construct insn pattern.  */
+       pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
+       pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
+
+       /* Let the rtl optimizers know about the zero extension performed.  */
+       if (dstmode == QImode || dstmode == HImode)
+         {
+           pat = gen_rtx_ZERO_EXTEND (SImode, pat);
+           d = gen_lowpart (SImode, d);
+         }
+
+       emit_insn (gen_rtx_SET (d, pat));
+
+       if (d != dst)
+         emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
+       return true;
+      }
+
+    default:
+      return false;
+    }
+}
+
 /* Expand an insert into a vector register through pinsr insn.
    Return true if successful.  */
 
@@ -50527,9 +50626,6 @@ ix86_expand_pinsr (rtx *operands)
       dst = SUBREG_REG (dst);
     }
 
-  if (GET_CODE (src) == SUBREG)
-    src = SUBREG_REG (src);
-
   switch (GET_MODE (dst))
     {
     case V16QImode:
@@ -50536,9 +50632,12 @@ ix86_expand_pinsr (rtx *operands)
     case V8HImode:
     case V4SImode:
     case V2DImode:
+    case V1TImode:
+    case TImode:
       {
        machine_mode srcmode, dstmode;
        rtx (*pinsr)(rtx, rtx, rtx, rtx);
+       rtx d;
 
        srcmode = mode_for_size (size, MODE_INT, 0);
 
@@ -50577,15 +50676,36 @@ ix86_expand_pinsr (rtx *operands)
            return false;
          }
 
-       rtx d = dst;
-       if (GET_MODE (dst) != dstmode)
+       if (GET_CODE (src) == SUBREG)
+         {
+           unsigned int srcpos = SUBREG_BYTE (src);
+
+           if (srcpos > 0)
+             {
+               rtx extr_ops[4];
+
+               extr_ops[0] = gen_reg_rtx (srcmode);
+               extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
+               extr_ops[2] = GEN_INT (size);
+               extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
+
+               if (!ix86_expand_pextr (extr_ops))
+                 return false;
+
+               src = extr_ops[0];
+             }
+           else
+             src = gen_lowpart (srcmode, SUBREG_REG (src));
+         }
+
+       if (GET_MODE (dst) == dstmode)
+         d = dst;
+       else
          d = gen_reg_rtx (dstmode);
-       src = gen_lowpart (srcmode, src);
 
-       pos /= size;
-
-       emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
-                         GEN_INT (1 << pos)));
+       emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
+                         gen_lowpart (srcmode, src),
+                         GEN_INT (1 << (pos / size))));
        if (d != dst)
          emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
        return true;
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 225844)
+++ config/i386/i386.md (working copy)
@@ -2734,6 +2734,9 @@
                             (match_operand:SI 3 "const_int_operand")))]
   ""
 {
+  if (ix86_expand_pextr (operands))
+    DONE;
+
   /* Handle extractions from %ah et al.  */
   if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
     FAIL;
@@ -2781,10 +2784,10 @@
        (const_string "QI")))])
 
 (define_expand "insv<mode>"
-  [(set (zero_extract:SWI48 (match_operand:SWI48 0 "register_operand")
-                           (match_operand:SI 1 "const_int_operand")
-                           (match_operand:SI 2 "const_int_operand"))
-        (match_operand:SWI48 3 "register_operand"))]
+  [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand")
+                            (match_operand:SI 1 "const_int_operand")
+                            (match_operand:SI 2 "const_int_operand"))
+        (match_operand:SWI248 3 "register_operand"))]
   ""
 {
   rtx dst;
@@ -2811,10 +2814,10 @@
 })
 
 (define_insn "insv<mode>_1"
-  [(set (zero_extract:SWI48 (match_operand 0 "ext_register_operand" "+Q,Q")
-                           (const_int 8)
-                           (const_int 8))
-       (match_operand:SWI48 1 "general_x64nomem_operand" "Qn,m"))]
+  [(set (zero_extract:SWI248 (match_operand 0 "ext_register_operand" "+Q,Q")
+                            (const_int 8)
+                            (const_int 8))
+       (match_operand:SWI248 1 "general_x64nomem_operand" "Qn,m"))]
   ""
 {
   if (CONST_INT_P (operands[1]))
Index: testsuite/g++.dg/pr66866.C
===================================================================
--- testsuite/g++.dg/pr66866.C  (revision 0)
+++ testsuite/g++.dg/pr66866.C  (working copy)
@@ -0,0 +1,29 @@
+// { dg-do run { target i?86-*-* x86_64-*-* } }
+// { dg-require-effective-target sse2_runtime }
+// { dg-options "-O -msse2" }
+
+extern "C" void abort (void);
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16), 
__may_alias__));
+typedef short A __attribute__((__may_alias__));
+
+__m128i __attribute__((noinline))
+shuf(const __m128i v)
+{
+  __m128i r;
+
+  reinterpret_cast<A *>(&r)[5] = reinterpret_cast<const A *>(&v)[4];
+  return r;
+}
+
+int main()
+{
+  __attribute__((aligned(16))) short mem[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+
+  *reinterpret_cast<__m128i *>(mem) = shuf (*reinterpret_cast<__m128i *>(mem));
+
+  if (mem[5] != 4)
+    abort ();
+
+  return 0;
+}

Reply via email to