A representation with vec_merge and a 0..255 constant is incorrect.
Both 128-bit lanes are merged with the same pattern, thus the rtl-level
vec_merge operand should replicate the imm8 operand into two bytes.

I created an expander with the old name and old interface so that we
can continue using it to implement the builtin.

Tested with the intel sde.  Committed.


r~
        * config/i386/predicates.md (avx2_pblendw_operand): New.
        * config/i386/sse.md (sse4_1_pblendw): Un-macroize.
        (avx2_pblendw, *avx2_pblendw): New expander and insn.



diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 89cc8a7..9ac3f9d 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1210,3 +1210,12 @@
       return false;
   return true;
 })
+
+;; Return true if OP is a proper third operand to vpblendw256.
+(define_predicate "avx2_pblendw_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT val = INTVAL (op);
+  HOST_WIDE_INT low = val & 0xff;
+  return val == (low << 8) | low;
+})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a7df221..9dc9b46 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -9417,11 +9417,11 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<sse4_1_avx2>_pblendw"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
-       (vec_merge:VI2_AVX2
-         (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")
-         (match_operand:VI2_AVX2 1 "register_operand" "0,x")
+(define_insn "sse4_1_pblendw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+       (vec_merge:V8HI
+         (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
+         (match_operand:V8HI 1 "register_operand" "0,x")
          (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
   "TARGET_SSE4_1"
   "@
@@ -9432,7 +9432,37 @@
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "orig,vex")
-   (set_attr "mode" "<sseinsnmode>")])
+   (set_attr "mode" "TI")])
+
+;; The builtin uses an 8-bit immediate.  Expand that.
+(define_expand "avx2_pblendw"
+  [(set (match_operand:V16HI 0 "register_operand" "")
+       (vec_merge:V16HI
+         (match_operand:V16HI 2 "nonimmediate_operand" "")
+         (match_operand:V16HI 1 "register_operand" "")
+         (match_operand:SI 3 "const_0_to_255_operand" "")))]
+  "TARGET_AVX2"
+{
+  HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
+  operands[3] = GEN_INT (val << 8 | val);
+})
+
+(define_insn "*avx2_pblendw"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+       (vec_merge:V16HI
+         (match_operand:V16HI 2 "nonimmediate_operand" "xm")
+         (match_operand:V16HI 1 "register_operand" "x")
+         (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
+  "TARGET_SSE4_1"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
+  return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
 
 (define_insn "avx2_pblendd<mode>"
   [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")

Reply via email to