kmovd only uses port5 which is often the bottleneck of performance. Also from latency perspective, spill and reload mostly could be STLF or even MRN which only take 1 cycle.
So the patch increase move cost between gpr and mask to be the same as gpr <-> sse register. Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ready to push to trunk. gcc/ChangeLog: * config/i386/x86-tune-costs.h (skylake_cost): Increase gpr <-> mask cost from 5 to 6. (icelake_cost): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/spill_to_mask-1.c: New test. --- gcc/config/i386/x86-tune-costs.h | 4 ++-- gcc/testsuite/gcc.target/i386/spill_to_mask-1.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 017ffa69958..05cbd49ec87 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -1866,7 +1866,7 @@ struct processor_costs skylake_cost = { {8, 8, 8, 12, 24}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 6, 6, /* SSE->integer and integer->SSE moves */ - 5, 5, /* mask->integer and integer->mask moves */ + 6, 6, /* mask->integer and integer->mask moves */ {8, 8, 8}, /* cost of loading mask register in QImode, HImode, SImode. */ {6, 6, 6}, /* cost if storing mask register @@ -1992,7 +1992,7 @@ struct processor_costs icelake_cost = { {8, 8, 8, 12, 24}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 6, 6, /* SSE->integer and integer->SSE moves */ - 5, 5, /* mask->integer and integer->mask moves */ + 6, 6, /* mask->integer and integer->mask moves */ {8, 8, 8}, /* cost of loading mask register in QImode, HImode, SImode. */ {6, 6, 6}, /* cost if storing mask register diff --git a/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c b/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c index 94d6764fc56..be19239a685 100644 --- a/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c +++ b/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c @@ -120,7 +120,7 @@ void foo (DTYPE in[16], DTYPE out[8], const DTYPE C[16]) out[7] += h; } -/* { dg-final { scan-assembler "kmovd" } } */ +/* { dg-final { scan-assembler "kmovd" { xfail *-*-* } } } */ /* { dg-final { scan-assembler-not "knot" } } */ /* { dg-final { scan-assembler-not "kxor" } } */ /* { dg-final { scan-assembler-not "kor" } } */ -- 2.18.1