A representation with vec_merge and a 0..255 constant is incorrect. Both 128-bit lanes are merged with the same pattern, thus the rtl-level vec_merge operand should replicate the imm8 operand into two bytes.
I created an expander with the old name and old interface so that we can continue using it to implement the builtin. Tested with the intel sde. Committed. r~
* config/i386/predicates.md (avx2_pblendw_operand): New. * config/i386/sse.md (sse4_1_pblendw): Un-macroize. (avx2_pblendw, *avx2_pblendw): New expander and insn. diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 89cc8a7..9ac3f9d 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1210,3 +1210,12 @@ return false; return true; }) + +;; Return true if OP is a proper third operand to vpblendw256. +(define_predicate "avx2_pblendw_operand" + (match_code "const_int") +{ + HOST_WIDE_INT val = INTVAL (op); + HOST_WIDE_INT low = val & 0xff; + return val == (low << 8) | low; +}) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a7df221..9dc9b46 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -9417,11 +9417,11 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<sse4_1_avx2>_pblendw" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") - (vec_merge:VI2_AVX2 - (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm") - (match_operand:VI2_AVX2 1 "register_operand" "0,x") +(define_insn "sse4_1_pblendw" + [(set (match_operand:V8HI 0 "register_operand" "=x,x") + (vec_merge:V8HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") + (match_operand:V8HI 1 "register_operand" "0,x") (match_operand:SI 3 "const_0_to_255_operand" "n,n")))] "TARGET_SSE4_1" "@ @@ -9432,7 +9432,37 @@ (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "orig,vex") - (set_attr "mode" "<sseinsnmode>")]) + (set_attr "mode" "TI")]) + +;; The builtin uses an 8-bit immediate. Expand that. +(define_expand "avx2_pblendw" + [(set (match_operand:V16HI 0 "register_operand" "") + (vec_merge:V16HI + (match_operand:V16HI 2 "nonimmediate_operand" "") + (match_operand:V16HI 1 "register_operand" "") + (match_operand:SI 3 "const_0_to_255_operand" "")))] + "TARGET_AVX2" +{ + HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff; + operands[3] = GEN_INT (val << 8 | val); +}) + +(define_insn "*avx2_pblendw" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (vec_merge:V16HI + (match_operand:V16HI 2 "nonimmediate_operand" "xm") + (match_operand:V16HI 1 "register_operand" "x") + (match_operand:SI 3 "avx2_pblendw_operand" "n")))] + "TARGET_SSE4_1" +{ + operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff); + return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; +} + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) (define_insn "avx2_pblendd<mode>" [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")