Hi: This patch is about to add tune option for integer mask cmov, for some targets has both integer mask register and sse mask register, this tune indicates to use integer one. Currently it's default on for m_CORE_AVX512.
Bootstrap is ok, regression test on i386/x86_64 backends is ok. ok for trunk? Changelog gcc/ * config/i386/i386-expand.c (ix86_valid_mask_cmp_mode): Return false if target not prefer using integer mask cmov for 128/256-bit vector under avx512f. * config/i386/i386.h (TARGET_PREFER_INTEGER_MASK_CMOV): New macro. * config/i386/x86-tune.def (X86_TUNE_PREFER_INTEGER_MASK_CMOV): New tune. gcc/testsuite * gcc.target/i386/avx512bw-pr92686-movcc-1.c: Adjust test case. * gcc.target/i386/avx512bw-pr92686-movcc-2.c: Ditto. * gcc.target/i386/avx512bw-pr92686-vpcmp-1.c: Ditto. * gcc.target/i386/avx512bw-pr92686-vpcmp-2.c: Ditto. * gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto. * gcc.target/i386/avx512vl-pr92686-movcc-2.c: Ditto. * gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Ditto. * gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto. * gcc.target/i386/avx512vl-pr88547-1.c: Ditto. -- BR, Hongtao
From 716bdede7f23ef035d93fb1d4f6917e19cef5f3e Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Wed, 11 Dec 2019 16:38:04 +0800 Subject: [PATCH] Add tune option for integer mask cmov, enable this tune for m_CORE_AVX512 Changelog gcc/ * config/i386/i386-expand.c (ix86_valid_mask_cmp_mode): Return false if target not prefer using integer mask cmov for 128/256-bit vector under avx512f. * config/i386/i386.h (TARGET_PREFER_INTEGER_MASK_CMOV): New macro. * config/i386/x86-tune.def (X86_TUNE_PREFER_INTEGER_MASK_CMOV): New tune. gcc/testsuite * gcc.target/i386/avx512bw-pr92686-movcc-1.c: Adjust test case. * gcc.target/i386/avx512bw-pr92686-movcc-2.c: Ditto. * gcc.target/i386/avx512bw-pr92686-vpcmp-1.c: Ditto. * gcc.target/i386/avx512bw-pr92686-vpcmp-2.c: Ditto. * gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto. * gcc.target/i386/avx512vl-pr92686-movcc-2.c: Ditto. * gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Ditto. * gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto. * gcc.target/i386/avx512vl-pr88547-1.c: Ditto. --- gcc/config/i386/i386-expand.c | 4 ++++ gcc/config/i386/i386.h | 2 ++ gcc/config/i386/x86-tune.def | 10 ++++++++++ .../gcc.target/i386/avx512bw-pr92686-movcc-1.c | 2 +- .../gcc.target/i386/avx512bw-pr92686-movcc-2.c | 2 +- .../gcc.target/i386/avx512bw-pr92686-vpcmp-1.c | 2 +- .../gcc.target/i386/avx512bw-pr92686-vpcmp-2.c | 2 +- gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c | 6 +++--- .../gcc.target/i386/avx512vl-pr92686-movcc-1.c | 2 +- .../gcc.target/i386/avx512vl-pr92686-movcc-2.c | 2 +- .../gcc.target/i386/avx512vl-pr92686-vpcmp-1.c | 2 +- .../gcc.target/i386/avx512vl-pr92686-vpcmp-2.c | 2 +- 12 files changed, 27 insertions(+), 11 deletions(-) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index cbf4eb7..a627642 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -3431,6 +3431,10 @@ ix86_valid_mask_cmp_mode (machine_mode mode) if (TARGET_XOP && !TARGET_AVX512F) return false; + /* For 512-bit vector, only integer mask vcmp/vcmov is valid. */ + if (!TARGET_PREFER_INTEGER_MASK_CMOV && GET_MODE_SIZE (mode) != 64) + return false; + /* AVX512F is needed for mask operation. */ if (!(TARGET_AVX512F && VECTOR_MODE_P (mode))) return false; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 2542cb3..23d796e 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -596,6 +596,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE] #define TARGET_EMIT_VZEROUPPER \ ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER] +#define TARGET_PREFER_INTEGER_MASK_CMOV \ + ix86_tune_features[X86_TUNE_PREFER_INTEGER_MASK_CMOV] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 328535d..e944f39 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -467,6 +467,16 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2 DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512) /*****************************************************************************/ +/* AVX512 instruction selection tuning. */ +/*****************************************************************************/ + +/* X86_TUNE_PREFER_INTEGER_MASK_CMOV: Use integer mask vcmov/vcmp for + 128/256-bit vector under avx512f, there's are also instructions + using sse regs as mask under avx2 or xop. */ +DEF_TUNE (X86_TUNE_PREFER_INTEGER_MASK_CMOV, "prefer_integer_mask_cmov", + m_CORE_AVX512) + +/*****************************************************************************/ /* Historical relics: tuning flags that helps a specific old CPU designs */ /*****************************************************************************/ diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c index 2a89077..7afc37e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c @@ -1,6 +1,6 @@ /* PR target/92686 */ /* { dg-do compile } */ -/* { dg-options "-Ofast -mavx512bw -mno-avx512dq -mno-avx512vl -mno-xop -mprefer-vector-width=512" } */ +/* { dg-options "-Ofast -march=skylake-avx512 -mno-avx512dq -mno-avx512vl -mno-xop -mprefer-vector-width=512" } */ /* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */ /* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */ /* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c index 53a7da1..0386ea6 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c @@ -1,7 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512bw } */ /* { dg-require-effective-target avx512vl } */ -/* { dg-options "-Ofast -mavx512bw -mavx512vl -mprefer-vector-width=256" } */ +/* { dg-options "-Ofast -march=skylake-avx512 -mprefer-vector-width=256" } */ #ifndef CHECK #define CHECK "avx512f-helper.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c index 4fd3b36..b94c19d 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c @@ -1,6 +1,6 @@ /* PR target/92686 */ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx512bw -mno-avx512dq -mno-avx512vl -mno-xop" } */ +/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512dq -mno-avx512vl -mno-xop" } */ /* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */ /* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */ /* { dg-final { scan-assembler-times "vpmovm2\[bw\]\[\t ]" 8 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c index 0ea5b56..6e31c09 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c @@ -1,6 +1,6 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512bw } */ -/* { dg-options "-O2 -mavx512bw" } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ #ifndef CHECK #define CHECK "avx512f-helper.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c index a3ffeca..432a19e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c @@ -6,7 +6,7 @@ /* { dg-final { scan-assembler-times "vpminsb\[\t ]" 2 } } */ /* { dg-final { scan-assembler-times "vpminuw\[\t ]" 2 } } */ /* { dg-final { scan-assembler-times "vpminsw\[\t ]" 2 } } */ -/* { dg-final { scan-assembler-times "vpcmp\[dq\]\[\t ]" 4 } } */ -/* { dg-final { scan-assembler-times "vpcmpu\[dq\]\[\t ]" 4 } } */ -/* { dg-final { scan-assembler-times "vpternlog\[qd\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminsd\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminuq\[\t ]" 2 } } */ #include "avx2-pr88547-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c index 1b9644a..722ba9f 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c @@ -1,6 +1,6 @@ /* PR target/92686 */ /* { dg-do compile } */ -/* { dg-options "-Ofast -mavx512bw -mavx512vl -mno-xop -mprefer-vector-width=256" } */ +/* { dg-options "-Ofast -march=skylake-avx512 -mno-xop -mprefer-vector-width=256" } */ /* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */ /* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */ /* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c index 5f5562b..b9c1881 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c @@ -1,7 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512bw } */ /* { dg-require-effective-target avx512vl } */ -/* { dg-options "-Ofast -mavx512bw -mavx512vl -mprefer-vector-width=256" } */ +/* { dg-options "-Ofast -march=skylake-avx512 -mprefer-vector-width=256" } */ #ifndef CHECK #define CHECK "avx512f-helper.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c index 5b79d4d..5bf008e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c @@ -1,6 +1,6 @@ /* PR target/88547 */ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx512bw -mavx512vl -mno-avx512dq -mno-xop" } */ +/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512dq -mno-xop" } */ /* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */ /* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */ /* { dg-final { scan-assembler-times "vpmovm2\[bw\]\[\t ]" 8 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c index 6be24ff..f5cbdeb 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c @@ -1,7 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512bw } */ /* { dg-require-effective-target avx512vl } */ -/* { dg-options "-O2 -mavx512bw -mavx512vl" } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ #ifndef CHECK #define CHECK "avx512f-helper.h" -- 1.7.0.7