Some AMD CPUs fuse "test" followed by a conditional branch into a single uop, but don't fuse "and" followed by a conditional branch. This patch makes the test-to-and peephole rules depend on not tuning for BDVER. This is a slight improvement in many cases, but it becomes more significant when combined with the rest of the patch at PR87104.
I think this could be improved further by enabling the peephole rule if the insn following the peephole is not a conditional branch, but I don't know whether NONJUMP_INSN_P (peep2_next_insn (...)) works or is the right approach. Bootstrapped, but "make check" produces errors which appear unrelated to this patch. 2018-09-18 Pip Cet <pip...@gmail.com> PR 87104 * config/i386/i386.h (TARGET_FUSE_TEST_AND_BRANCH): Add. * config/i386/i386.md (test to and peephole2s): Don't use for TARGET_FUSE_TEST_AND_BRANCH. * config/i386/x86-tune.def (TARGET_FUSE_TEST_AND_BRANCH): New. Define for AMD family 15h. --- gcc/ChangeLog | 9 +++++++++ gcc/config/i386/i386.h | 2 ++ gcc/config/i386/i386.md | 9 ++++++--- gcc/config/i386/x86-tune.def | 5 +++++ 4 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 92b878f2300..5b6a57cce4a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2018-09-18 Pip Cet <pipcet@gmail.com> + + PR 87104 + * config/i386/i386.h (TARGET_FUSE_TEST_AND_BRANCH): Add. + * config/i386/i386.md (test to and peephole2s): Don't use for + TARGET_FUSE_TEST_AND_BRANCH. + * config/i386/x86-tune.def (TARGET_FUSE_TEST_AND_BRANCH): New. + Define for AMD family 15h. + 2018-09-18 Segher Boessenkool <segher@kernel.crashing.org> * config/rs6000/rs6000.md: Remove old "Cygnus sibcall" comment. diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 01eba5dd01f..5d580d15d30 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -529,6 +529,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_FUSE_CMP_AND_BRANCH \ (TARGET_64BIT ? TARGET_FUSE_CMP_AND_BRANCH_64 \ : TARGET_FUSE_CMP_AND_BRANCH_32) +#define TARGET_FUSE_TEST_AND_BRANCH \ + ix86_tune_features[X86_TUNE_FUSE_TEST_AND_BRANCH] #define TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS \ ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS] #define TARGET_FUSE_ALU_AND_BRANCH \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e08b2b7c14b..77d560d390e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -18388,7 +18388,8 @@ [(and:SI (match_operand:SI 2 "register_operand") (match_operand:SI 3 "immediate_operand")) (const_int 0)]))] - "ix86_match_ccmode (insn, CCNOmode) + "(optimize_insn_for_size_p () || ! TARGET_FUSE_TEST_AND_BRANCH) + && ix86_match_ccmode (insn, CCNOmode) && (REGNO (operands[2]) != AX_REG || satisfies_constraint_K (operands[3])) && peep2_reg_dead_p (1, operands[2])" @@ -18408,7 +18409,8 @@ [(and:QI (match_operand:QI 2 "register_operand") (match_operand:QI 3 "immediate_operand")) (const_int 0)]))] - "! TARGET_PARTIAL_REG_STALL + "! TARGET_FUSE_TEST_AND_BRANCH + && ! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) && REGNO (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" @@ -18429,7 +18431,8 @@ (const_int 8)) 0) (match_operand 3 "const_int_operand")) (const_int 0)]))] - "! TARGET_PARTIAL_REG_STALL + "! TARGET_FUSE_TEST_AND_BRANCH + && ! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) && REGNO (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index a46450ad99d..ef0cc5a5a0f 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -113,6 +113,11 @@ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64", DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags", m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER | m_ZNVER1 | m_GENERIC) +/* X86_TUNE_FUSE_TEST_AND_BRANCH: Fuse test with a subsequent + conditional jump instruction. */ +DEF_TUNE (X86_TUNE_FUSE_TEST_AND_BRANCH, "fuse_test_and_branch", + m_BDVER) + /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional jump instruction when the alu instruction produces the CCFLAG consumed by the conditional jump instruction. */