https://gcc.gnu.org/g:a51f2fc0d80869ab079a93cc3858f24a1fd28237
commit r15-3498-ga51f2fc0d80869ab079a93cc3858f24a1fd28237 Author: liuhongt <hongtao....@intel.com> Date: Wed Sep 4 15:39:17 2024 +0800 Handle const0_operand for *avx2_pcmp<mode>3_1. *<avx512>_eq<mode>3<mask_scalar_merge_name>_1 supports nonimm_or_0_operand for op1 and op2, pass_combine would fail to lower avx512 comparision back to avx2 one when op1/op2 is const0_rtx. It's because the splitter only support nonimmediate_operand. Failed to match this instruction: (set (reg/i:V16QI 20 xmm0) (vec_merge:V16QI (const_vector:V16QI [ (const_int -1 [0xffffffffffffffff]) repeated x16 ]) (const_vector:V16QI [ (const_int 0 [0]) repeated x16 ]) (unspec:HI [ (reg:V16QI 105 [ a ]) (const_vector:V16QI [ (const_int 0 [0]) repeated x16 ]) (const_int 0 [0]) ] UNSPEC_PCMP))) The patch extend predicates of the splitter to handles that. gcc/ChangeLog: PR target/115517 * config/i386/sse.md (*avx2_pcmp<mode>3_1): Change predicate of operands[1] and operands[2] from nonimmdiate_operand to nonimm_or_0_operand. gcc/testsuite/ChangeLog: * gcc.target/i386/pr115517.c: New test. Diff: --- gcc/config/i386/sse.md | 9 ++++++-- gcc/testsuite/gcc.target/i386/pr115517.c | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 3bf95f0b0e5..1946d3513be 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17908,8 +17908,8 @@ (match_operand:VI_128_256 1 "vector_all_ones_operand") (match_operand:VI_128_256 2 "const0_operand") (unspec:<avx512fmaskmode> - [(match_operand:VI_128_256 3 "nonimmediate_operand") - (match_operand:VI_128_256 4 "nonimmediate_operand") + [(match_operand:VI_128_256 3 "nonimm_or_0_operand") + (match_operand:VI_128_256 4 "nonimm_or_0_operand") (match_operand:SI 5 "const_0_to_7_operand")] UNSPEC_PCMP)))] "TARGET_AVX512VL && ix86_pre_reload_split () @@ -17928,6 +17928,11 @@ { if (INTVAL (operands[5]) == 1) std::swap (operands[3], operands[4]); + + operands[3] = force_reg (<MODE>mode, operands[3]); + if (operands[4] == CONST0_RTX (<MODE>mode)) + operands[4] = force_reg (<MODE>mode, operands[4]); + enum rtx_code code = INTVAL (operands[5]) ? GT : EQ; emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode, operands[3], operands[4])); diff --git a/gcc/testsuite/gcc.target/i386/pr115517.c b/gcc/testsuite/gcc.target/i386/pr115517.c new file mode 100644 index 00000000000..e91d2c23a6b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115517.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-times "vpcmpeq" 4 } } */ +/* { dg-final { scan-assembler-not {(?n)%k[0-9]} } } */ + +typedef char v16qi __attribute__((vector_size(16))); +typedef short v8hi __attribute__((vector_size(16))); +typedef int v4si __attribute__((vector_size(16))); +typedef long long v2di __attribute__((vector_size(16))); + +v16qi +foo (v16qi a) +{ + v16qi b = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + return a == b; +} + +v8hi +foo2 (v8hi a) +{ + v8hi b = {0, 0, 0, 0, 0, 0, 0, 0}; + return a == b; +} + +v4si +foo3 (v4si a) +{ + v4si b = {0, 0, 0, 0}; + return a == b; +} + +v2di +foo4 (v2di a) +{ + v2di b = {0, 0}; + return a == b; +} +