https://gcc.gnu.org/g:9c8cea8feb6cd54ef73113a0b74f1df7b60d09dc

commit r15-4233-g9c8cea8feb6cd54ef73113a0b74f1df7b60d09dc
Author: liuhongt <hongtao....@intel.com>
Date:   Tue Sep 24 15:53:14 2024 +0800

    Add new microarchitecture tune for SRF/GRR/CWF.
    
    For Crestmont, 4-operand vex blendv instructions come from MSROM and
    is slower than 3-instructions sequence (op1 & mask) | (op2 & ~mask).
    legacy blendv instruction can still be handled by the decoder.
    
    The patch add a new tune which is enabled for all processors except
    for SRF/CWF. It will use vpand + vpandn + vpor instead of
    vpblendvb(similar for vblendvps/vblendvpd) for SRF/CWF.
    
    gcc/ChangeLog:
    
            * config/i386/i386-expand.cc (ix86_expand_sse_movcc): Guard
            instruction blendv generation under new tune.
            * config/i386/i386.h (TARGET_SSE_MOVCC_USE_BLENDV): New Macro.
            * config/i386/x86-tune.def (X86_TUNE_SSE_MOVCC_USE_BLENDV):
            New tune.

Diff:
---
 gcc/config/i386/i386-expand.cc                     | 24 +++++++++++-----------
 gcc/config/i386/i386.h                             |  2 ++
 gcc/config/i386/x86-tune.def                       |  8 ++++++++
 .../gcc.target/i386/sse_movcc_use_blendv.c         | 12 +++++++++++
 4 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 32840113cf60..0734399e4955 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -4344,23 +4344,23 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
   switch (mode)
     {
     case E_V2SFmode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
        gen = gen_mmx_blendvps;
       break;
     case E_V4SFmode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
        gen = gen_sse4_1_blendvps;
       break;
     case E_V2DFmode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
        gen = gen_sse4_1_blendvpd;
       break;
     case E_SFmode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
        gen = gen_sse4_1_blendvss;
       break;
     case E_DFmode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
        gen = gen_sse4_1_blendvsd;
       break;
     case E_V8QImode:
@@ -4368,7 +4368,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
     case E_V4HFmode:
     case E_V4BFmode:
     case E_V2SImode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
        {
          gen = gen_mmx_pblendvb_v8qi;
          blend_mode = V8QImode;
@@ -4378,14 +4378,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
     case E_V2HImode:
     case E_V2HFmode:
     case E_V2BFmode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
        {
          gen = gen_mmx_pblendvb_v4qi;
          blend_mode = V4QImode;
        }
       break;
     case E_V2QImode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
        gen = gen_mmx_pblendvb_v2qi;
       break;
     case E_V16QImode:
@@ -4395,18 +4395,18 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
     case E_V4SImode:
     case E_V2DImode:
     case E_V1TImode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
        {
          gen = gen_sse4_1_pblendvb;
          blend_mode = V16QImode;
        }
       break;
     case E_V8SFmode:
-      if (TARGET_AVX)
+      if (TARGET_AVX && TARGET_SSE_MOVCC_USE_BLENDV)
        gen = gen_avx_blendvps256;
       break;
     case E_V4DFmode:
-      if (TARGET_AVX)
+      if (TARGET_AVX && TARGET_SSE_MOVCC_USE_BLENDV)
        gen = gen_avx_blendvpd256;
       break;
     case E_V32QImode:
@@ -4415,7 +4415,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
     case E_V16BFmode:
     case E_V8SImode:
     case E_V4DImode:
-      if (TARGET_AVX2)
+      if (TARGET_AVX2 && TARGET_SSE_MOVCC_USE_BLENDV)
        {
          gen = gen_avx2_pblendvb;
          blend_mode = V32QImode;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 82177b9d3839..d5d54ee66040 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -462,6 +462,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
        ix86_tune_features[X86_TUNE_DEST_FALSE_DEP_FOR_GLC]
 #define TARGET_SLOW_STC ix86_tune_features[X86_TUNE_SLOW_STC]
 #define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR]
+#define TARGET_SSE_MOVCC_USE_BLENDV \
+       ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 3d123da95f0c..b815b6dc255b 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -534,6 +534,14 @@ DEF_TUNE (X86_TUNE_AVOID_512FMA_CHAINS, 
"avoid_fma512_chains", m_ZNVER5)
 DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
          "v2df_reduction_prefer_haddpd", m_NONE)
 
+/* X86_TUNE_SSE_MOVCC_USE_BLENDV: Prefer blendv instructions to
+   3-instruction sequence (op1 & mask) | (op2 & ~mask)
+   for vector condition move.
+   For Crestmont, 4-operand vex blendv instructions come from MSROM
+   which is slow.  */
+DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV,
+         "sse_movcc_use_blendv", ~m_CORE_ATOM)
+
 /*****************************************************************************/
 /* AVX instruction selection tuning (some of SSE flags affects AVX, too)     */
 /*****************************************************************************/
diff --git a/gcc/testsuite/gcc.target/i386/sse_movcc_use_blendv.c 
b/gcc/testsuite/gcc.target/i386/sse_movcc_use_blendv.c
new file mode 100644
index 000000000000..ac9f15249491
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse_movcc_use_blendv.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=sierraforest -O2" } */
+/* { dg-final { scan-assembler-not {(?n)vp?blendv(b|ps|pd)} } } */
+
+void
+foo (int* a, int* b, int* __restrict c)
+{
+  for (int i = 0; i != 200; i++)
+    {
+      c[i] += a[i] > b[i] ? 1 : -1;
+    }
+}

Reply via email to