Hi! The following testcase is miscompiled, because for vinserti32x4 the expander creates an incorrect VEC_MERGE selector; it matches what the define_insn later does, so often it works properly, but when the middle-end sees the RTL, it can try to simplify it (in this case fwprop1, but in theory also e.g. combiner when using simplify-rtx etc.). The bits in the VEC_MERGE selector are ordered lowest bit determines lowest vector element (from which of the first two VEC_MERGE operand it is taken), but the define_expand and define_insn were instead counting them from highest bit.
Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2018-04-03 Jakub Jelinek <ja...@redhat.com> PR target/85177 * config/i386/sse.md (<extract_type>_vinsert<shuffletype><extract_suf>_mask): Fix computation of the VEC_MERGE selector from mask. (<extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>): Fix decoding of the VEC_MERGE selector into mask. * gcc.target/i386/avx512f-pr85177.c: New test. --- gcc/config/i386/sse.md.jj 2018-04-01 08:03:37.771565040 +0200 +++ gcc/config/i386/sse.md 2018-04-03 16:41:23.001090852 +0200 @@ -12627,11 +12627,11 @@ (define_expand "<extract_type>_vinsert<s (match_operand:<avx512fmaskmode> 5 "register_operand")] "TARGET_AVX512F" { - int mask,selector; + int mask, selector; mask = INTVAL (operands[3]); - selector = GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? - 0xFFFF ^ (0xF000 >> mask * 4) - : 0xFF ^ (0xC0 >> mask * 2); + selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 + ? 0xFFFF ^ (0x000F << mask * 4) + : 0xFF ^ (0x03 << mask * 2)); emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask (operands[0], operands[1], operands[2], GEN_INT (selector), operands[4], operands[5])); @@ -12650,16 +12650,16 @@ (define_insn "<mask_codefor><extract_typ int mask; int selector = INTVAL (operands[3]); - if (selector == 0xFFF || selector == 0x3F) + if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC)) mask = 0; - else if ( selector == 0xF0FF || selector == 0xCF) + else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3)) mask = 1; - else if ( selector == 0xFF0F || selector == 0xF3) + else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF)) mask = 2; - else if ( selector == 0xFFF0 || selector == 0xFC) + else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F)) mask = 3; else - gcc_unreachable (); + gcc_unreachable (); operands[3] = GEN_INT (mask); --- gcc/testsuite/gcc.target/i386/avx512f-pr85177.c.jj 2018-04-03 17:05:54.166498062 +0200 +++ gcc/testsuite/gcc.target/i386/avx512f-pr85177.c 2018-04-03 17:06:16.695503852 +0200 @@ -0,0 +1,30 @@ +/* PR target/85177 */ +/* { dg-do run { target { avx512f && int128 } } } */ +/* { dg-options "-O -fno-tree-ccp -fno-tree-sra -mavx512f -mno-avx512bw" } */ + +#include "avx512f-check.h" + +typedef short U __attribute__ ((vector_size (64))); +typedef __int128 V __attribute__ ((vector_size (64))); + +static inline __attribute__((always_inline)) U +foo (int i, U u) +{ + u[i & 1] = 1; + return u; +} + +__attribute__((noipa)) int +bar () +{ + V x = (V) foo (0, (U) { }); + for (unsigned i = 0; i < 4; i++) + if (x[i] != (i == 0)) __builtin_abort (); + return 0; +} + +static void +avx512f_test (void) +{ + bar (); +} Jakub