To generate sane code a SSE4.1 variable PBLENDV instruction is needed.

2021-06-17  Uroš Bizjak  <ubiz...@gmail.com>

gcc/
    PR target/97194
    * config/i386/i386-expand.c (expand_vector_set_var):
    Handle V2FS mode remapping.  Pass TARGET_MMX_WITH_SSE to
    ix86_expand_vector_init_duplicate.
    (ix86_expand_vector_init_duplicate): Emit insv_1 for
    QImode for !TARGET_PARTIAL_REG_STALL.
    * config/i386/predicates.md (vec_setm_mmx_operand): New predicate.
    * config/i386/mmx.md (vec_setv2sf): Use vec_setm_mmx_operand
    as operand 2 predicate.  Call ix86_expand_vector_set_var
    for non-constant index operand.
    (vec_setv2si): Ditto.
    (vec_setv4hi): Ditto.
    (vec_setv8qi): ditto.

gcc/testsuite/

    PR target/97194
    * gcc.target/i386/sse4_1-vec-set-1.c: New test.
    * gcc.target/i386/sse4_1-vec-set-2.c: ditto.
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index eb6f9b0684e..8f4e4e4d884 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -13811,10 +13811,17 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, 
machine_mode mode,
        wsmode = GET_MODE_INNER (wvmode);
 
        val = convert_modes (wsmode, smode, val, true);
-       x = expand_simple_binop (wsmode, ASHIFT, val,
-                                GEN_INT (GET_MODE_BITSIZE (smode)),
-                                NULL_RTX, 1, OPTAB_LIB_WIDEN);
-       val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
+
+       if (smode == QImode && !TARGET_PARTIAL_REG_STALL)
+         emit_insn (gen_insv_1 (wsmode, val, val));
+       else
+         {
+           x = expand_simple_binop (wsmode, ASHIFT, val,
+                                    GEN_INT (GET_MODE_BITSIZE (smode)),
+                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
+           val = expand_simple_binop (wsmode, IOR, val, x, x, 1,
+                                      OPTAB_LIB_WIDEN);
+         }
 
        x = gen_reg_rtx (wvmode);
        ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
@@ -14788,6 +14795,9 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx 
idx)
        case E_V8DFmode:
          cmp_mode = V8DImode;
          break;
+       case E_V2SFmode:
+         cmp_mode = V2SImode;
+         break;
        case E_V4SFmode:
          cmp_mode = V4SImode;
          break;
@@ -14809,9 +14819,11 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx 
idx)
   idxv = gen_reg_rtx (cmp_mode);
   idx_tmp = convert_to_mode (GET_MODE_INNER (cmp_mode), idx, 1);
 
-  ok = ix86_expand_vector_init_duplicate (false, mode, valv, val);
+  ok = ix86_expand_vector_init_duplicate (TARGET_MMX_WITH_SSE,
+                                         mode, valv, val);
   gcc_assert (ok);
-  ok = ix86_expand_vector_init_duplicate (false, cmp_mode, idxv, idx_tmp);
+  ok = ix86_expand_vector_init_duplicate (TARGET_MMX_WITH_SSE,
+                                         cmp_mode, idxv, idx_tmp);
   gcc_assert (ok);
   vec[0] = target;
   vec[1] = valv;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 59a16f4cd50..a107ac5ccb4 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1279,11 +1279,14 @@ (define_insn "*mmx_concatv2sf"
 (define_expand "vec_setv2sf"
   [(match_operand:V2SF 0 "register_operand")
    (match_operand:SF 1 "register_operand")
-   (match_operand 2 "const_int_operand")]
+   (match_operand 2 "vec_setm_mmx_operand")]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
-  ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
-                         INTVAL (operands[2]));
+  if (CONST_INT_P (operands[2]))
+    ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+                           INTVAL (operands[2]));
+  else
+    ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
   DONE;
 })
 
@@ -2989,11 +2992,14 @@ (define_insn "*mmx_concatv2si"
 (define_expand "vec_setv2si"
   [(match_operand:V2SI 0 "register_operand")
    (match_operand:SI 1 "register_operand")
-   (match_operand 2 "const_int_operand")]
+   (match_operand 2 "vec_setm_mmx_operand")]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
-  ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
-                         INTVAL (operands[2]));
+  if (CONST_INT_P (operands[2]))
+    ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+                           INTVAL (operands[2]));
+  else
+    ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
   DONE;
 })
 
@@ -3145,11 +3151,14 @@ (define_expand "vec_initv2sisi"
 (define_expand "vec_setv4hi"
   [(match_operand:V4HI 0 "register_operand")
    (match_operand:HI 1 "register_operand")
-   (match_operand 2 "const_int_operand")]
+   (match_operand 2 "vec_setm_mmx_operand")]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
-  ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
-                         INTVAL (operands[2]));
+  if (CONST_INT_P (operands[2]))
+    ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+                           INTVAL (operands[2]));
+  else
+    ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
   DONE;
 })
 
@@ -3177,11 +3186,14 @@ (define_expand "vec_initv4hihi"
 (define_expand "vec_setv8qi"
   [(match_operand:V8QI 0 "register_operand")
    (match_operand:QI 1 "register_operand")
-   (match_operand 2 "const_int_operand")]
+   (match_operand 2 "vec_setm_mmx_operand")]
   "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
 {
-  ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
-                         INTVAL (operands[2]));
+  if (CONST_INT_P (operands[2]))
+    ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+                           INTVAL (operands[2]));
+  else
+    ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
   DONE;
 })
 
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 3dd134e7f22..e7a896874d6 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1026,6 +1026,12 @@ (define_predicate "vec_setm_operand"
            (match_test "TARGET_AVX2"))
        (match_code "const_int")))
 
+(define_predicate "vec_setm_mmx_operand"
+  (ior (and (match_operand 0 "register_operand")
+           (match_test "TARGET_SSE4_1")
+           (match_test "TARGET_MMX_WITH_SSE"))
+       (match_code "const_int")))
+
 ;; True for registers, or 1 or -1.  Used to optimize double-word shifts.
 (define_predicate "reg_or_pm1_operand"
   (ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1.c 
b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1.c
new file mode 100644
index 00000000000..7c7fd34bbc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-msse4.1 -O2" } */
+/* { dg-final { scan-assembler-times {(?n)v?pcmpeq[bwd]} 4 } } */
+/* { dg-final { scan-assembler-times {(?n)v?p?blendv} 4 } } */
+
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef float v2sf __attribute__ ((vector_size (8)));
+
+#define FOO(VTYPE, TYPE)                       \
+  VTYPE                                                \
+  __attribute__ ((noipa))                      \
+  foo_##VTYPE (VTYPE a, TYPE b, unsigned int c)        \
+  {                                            \
+    a[c] = b;                                  \
+    return a;                                  \
+  }                                            \
+
+FOO (v8qi, char);
+
+FOO (v4hi, short);
+
+FOO (v2si, int);
+
+FOO (v2sf, float);
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2.c 
b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2.c
new file mode 100644
index 00000000000..24f80414761
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+
+#ifndef CHECK
+#define CHECK "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK
+
+#include "sse4_1-vec-set-1.c"
+
+#define CALC_TEST(vtype, type, N, idx)                         \
+do                                                             \
+  {                                                            \
+    int i,val = idx * idx - idx * 3 + 16;                      \
+    type res[N],exp[N];                                                \
+    vtype resv;                                                        \
+    for (i = 0; i < N; i++)                                    \
+      {                                                                \
+       res[i] = i * i - i * 3 + 15;                            \
+       exp[i] = res[i];                                        \
+      }                                                                \
+    exp[idx] = val;                                            \
+    resv = foo_##vtype (*(vtype *)&res[0], val, idx);          \
+    for (i = 0; i < N; i++)                                    \
+      {                                                                \
+       if (resv[i] != exp[i])                                  \
+         abort ();                                             \
+      }                                                                \
+  }                                                            \
+while (0)
+
+static void
+TEST (void)
+{
+  CALC_TEST (v8qi, char, 8, 5);
+  CALC_TEST (v4hi, short, 4, 2);
+  CALC_TEST (v2si, int, 2, 1);
+}

Reply via email to