kmovd only uses port5 which is often the bottleneck of
performance. Also from latency perspective, spill and reload mostly
could be STLF or even MRN which only take 1 cycle.

So the patch increase move cost between gpr and mask to be the same as
gpr <-> sse register.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready to push to trunk.

gcc/ChangeLog:

        * config/i386/x86-tune-costs.h (skylake_cost): Increase gpr
        <-> mask cost from 5 to 6.
        (icelake_cost): Ditto.

gcc/testsuite/ChangeLog:
        * gcc.target/i386/spill_to_mask-1.c: New test.
---
 gcc/config/i386/x86-tune-costs.h                | 4 ++--
 gcc/testsuite/gcc.target/i386/spill_to_mask-1.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 017ffa69958..05cbd49ec87 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1866,7 +1866,7 @@ struct processor_costs skylake_cost = {
   {8, 8, 8, 12, 24},                   /* cost of storing SSE registers
                                           in 32,64,128,256 and 512-bit */
   6, 6,                                /* SSE->integer and integer->SSE moves 
*/
-  5, 5,                                /* mask->integer and integer->mask 
moves */
+  6, 6,                                /* mask->integer and integer->mask 
moves */
   {8, 8, 8},                           /* cost of loading mask register
                                           in QImode, HImode, SImode.  */
   {6, 6, 6},                           /* cost if storing mask register
@@ -1992,7 +1992,7 @@ struct processor_costs icelake_cost = {
   {8, 8, 8, 12, 24},                   /* cost of storing SSE registers
                                           in 32,64,128,256 and 512-bit */
   6, 6,                                /* SSE->integer and integer->SSE moves 
*/
-  5, 5,                                /* mask->integer and integer->mask 
moves */
+  6, 6,                                /* mask->integer and integer->mask 
moves */
   {8, 8, 8},                           /* cost of loading mask register
                                           in QImode, HImode, SImode.  */
   {6, 6, 6},                           /* cost if storing mask register
diff --git a/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c 
b/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c
index 94d6764fc56..be19239a685 100644
--- a/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c
+++ b/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c
@@ -120,7 +120,7 @@ void foo (DTYPE in[16], DTYPE out[8], const DTYPE C[16])
     out[7] += h;
 }
 
-/* { dg-final { scan-assembler "kmovd" } } */
+/* { dg-final { scan-assembler "kmovd" { xfail *-*-* } } } */
 /* { dg-final { scan-assembler-not "knot" } } */
 /* { dg-final { scan-assembler-not "kxor" } } */
 /* { dg-final { scan-assembler-not "kor" } } */
-- 
2.18.1

Reply via email to