Hi! The following testcase is miscompiled with -O3 -mavx512bw, the combiner sees there isn't just *xorv32hi3 define_insn with no masking, but that there is one with masking as well (with the same name) and uses it, but 1) such instruction doesn't really exist, for masking we only have VPXORD and VPXORQ, there is no VPXORW or VPXORB 2) the pattern emits just non-masked string. In tmp-mddump.md we can see: ;; ../../gcc/config/i386/sse.md: 11825 (define_insn ("*xorv32hi3") [ (set (match_operand:V32HI 0 ("register_operand") ("=x,x,v")) (xor:V32HI (match_operand:V32HI 1 ("vector_operand") ("%0,x,v")) (match_operand:V32HI 2 ("vector_operand") ("xBm,xm,vm")))) ] ("(TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))) && (TARGET_AVX512F)") ("*{ ... ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; and ;; ../../gcc/config/i386/sse.md: 11825 (define_insn ("*xorv32hi3") [ (set (match_operand:V32HI 0 ("register_operand") ("=x,x,v")) (vec_merge:V32HI (xor:V32HI (match_operand:V32HI 1 ("vector_operand") ("%0,x,v")) (match_operand:V32HI 2 ("vector_operand") ("xBm,xm,vm"))) (match_operand:V32HI 3 ("vector_move_operand") ("0C,0C,0C")) (match_operand:SI 4 ("register_operand") ("Yk,Yk,Yk")))) ] ("(TARGET_AVX512F) && ((TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))) && (TARGET_AVX512F))") ("*{ ... ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; For the any_logic patterns we want masking only on the V*[SD][IF]mode patterns and not on V*[HQ]Imode ones and most of the pattern does it right, except there was <mask_prefix3> in set_attr rather than just orig,vex and even a single subst attribute anywhere forces substitution.
I'd done grep 'define_insn ' tmp-mddump.md | sort | uniq -c | sort -n | grep -v '^ *1 ' | less to look for other occurrences of similar bug and indeed found another pattern where we don't really want masking, in that case not because masking would not be provided for that operation, but because we have explicit masked define_insn for it, as we need two different for different sets of modes - e.g. for *addv32hi3 there is one non-masked and one incorrect masked define_insn created from sse.md: 10085 and explicit *addv32hi3_mask from sse.md: 10114, the incorrect masked one has wrong condition and comes first. There are many other define_insns with duplicated (or more) names in the above command, but most of them seemed due to :P iterator which is probably fine for same named patterns, there will be just one pattern enabled for 32-bit and one for 64-bit, so no need to obfuscate the names. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? This isn't a regression (except perhaps with -O3 -march=native on SKX), but it is a serious wrong-code. 2018-02-23 Jakub Jelinek <ja...@redhat.com> PR target/84524 * config/i386/sse.md (*<code><mode>3): Replace <mask_prefix3> with orig,vex. (*<plusminus_insn><mode>3): Likewise. Remove <mask_operand3> uses. * gcc.c-torture/execute/pr84524.c: New test. * gcc.target/i386/avx512bw-pr84524.c: New test. --- gcc/config/i386/sse.md.jj 2018-02-13 09:31:24.769607162 +0100 +++ gcc/config/i386/sse.md 2018-02-23 11:51:00.271477979 +0100 @@ -10090,11 +10090,11 @@ (define_insn "*<plusminus_insn><mode>3" "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "@ p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} - vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" + vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "<mask_prefix3>") + (set_attr "prefix" "orig,vex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "*<plusminus_insn><mode>3_mask" @@ -11899,7 +11899,7 @@ (define_insn "*<code><mode>3" (eq_attr "mode" "TI")) (const_string "1") (const_string "*"))) - (set_attr "prefix" "<mask_prefix3>,evex") + (set_attr "prefix" "orig,vex,evex") (set (attr "mode") (cond [(and (match_test "<MODE_SIZE> == 16") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) --- gcc/testsuite/gcc.c-torture/execute/pr84524.c.jj 2018-02-23 11:54:51.913492631 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr84524.c 2018-02-23 11:59:55.467511836 +0100 @@ -0,0 +1,41 @@ +/* PR target/84524 */ + +__attribute__((noipa)) void +foo (unsigned short *x) +{ + unsigned short i, v; + unsigned char j; + for (i = 0; i < 256; i++) + { + v = i << 8; + for (j = 0; j < 8; j++) + if (v & 0x8000) + v = (v << 1) ^ 0x1021; + else + v = v << 1; + x[i] = v; + } +} + +int +main () +{ + unsigned short a[256]; + + foo (a); + for (int i = 0; i < 256; i++) + { + unsigned short v = i << 8; + for (int j = 0; j < 8; j++) + { + asm volatile ("" : "+r" (v)); + if (v & 0x8000) + v = (v << 1) ^ 0x1021; + else + v = v << 1; + } + if (a[i] != v) + __builtin_abort (); + } + return 0; +} --- gcc/testsuite/gcc.target/i386/avx512bw-pr84524.c.jj 2018-02-23 11:58:16.919505601 +0100 +++ gcc/testsuite/gcc.target/i386/avx512bw-pr84524.c 2018-02-23 11:58:57.377508169 +0100 @@ -0,0 +1,14 @@ +/* PR target/84524 */ +/* { dg-do run { target avx512bw } } */ +/* { dg-options "-O3 -mavx512bw" } */ + +#include "avx512bw-check.h" + +#define main() do_main() +#include "../../gcc.c-torture/execute/pr84524.c" + +static void +avx512bw_test (void) +{ + do_main (); +} Jakub