Some AMD CPUs fuse "test" followed by a conditional branch into a
single uop, but don't fuse "and" followed by a conditional branch.
This patch makes the test-to-and peephole rules depend on not tuning
for BDVER. This is a slight improvement in many cases, but it becomes
more significant when combined with the rest of the patch at PR87104.

I think this could be improved further by enabling the peephole rule
if the insn following the peephole is not a conditional branch, but I
don't know whether NONJUMP_INSN_P (peep2_next_insn (...)) works or is
the right approach.

Bootstrapped, but "make check" produces errors which appear unrelated
to this patch.

2018-09-18  Pip Cet  <pip...@gmail.com>

    PR 87104
    * config/i386/i386.h (TARGET_FUSE_TEST_AND_BRANCH): Add.
    * config/i386/i386.md (test to and peephole2s): Don't use for
    TARGET_FUSE_TEST_AND_BRANCH.
    * config/i386/x86-tune.def (TARGET_FUSE_TEST_AND_BRANCH): New.
    Define for AMD family 15h.

---
 gcc/ChangeLog                | 9 +++++++++
 gcc/config/i386/i386.h       | 2 ++
 gcc/config/i386/i386.md      | 9 ++++++---
 gcc/config/i386/x86-tune.def | 5 +++++
 4 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 92b878f2300..5b6a57cce4a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2018-09-18  Pip Cet  <pipcet@gmail.com>
+
+	PR 87104
+	* config/i386/i386.h (TARGET_FUSE_TEST_AND_BRANCH): Add.
+	* config/i386/i386.md (test to and peephole2s): Don't use for
+	TARGET_FUSE_TEST_AND_BRANCH.
+	* config/i386/x86-tune.def (TARGET_FUSE_TEST_AND_BRANCH): New.
+	Define for AMD family 15h.
+
 2018-09-18  Segher Boessenkool  <segher@kernel.crashing.org>
 
 	* config/rs6000/rs6000.md: Remove old "Cygnus sibcall" comment.
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 01eba5dd01f..5d580d15d30 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -529,6 +529,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_FUSE_CMP_AND_BRANCH \
 	(TARGET_64BIT ? TARGET_FUSE_CMP_AND_BRANCH_64 \
 	 : TARGET_FUSE_CMP_AND_BRANCH_32)
+#define TARGET_FUSE_TEST_AND_BRANCH \
+        ix86_tune_features[X86_TUNE_FUSE_TEST_AND_BRANCH]
 #define TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS \
 	ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS]
 #define TARGET_FUSE_ALU_AND_BRANCH \
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e08b2b7c14b..77d560d390e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -18388,7 +18388,8 @@
 	  [(and:SI (match_operand:SI 2 "register_operand")
 		   (match_operand:SI 3 "immediate_operand"))
 	   (const_int 0)]))]
-  "ix86_match_ccmode (insn, CCNOmode)
+  "(optimize_insn_for_size_p () || ! TARGET_FUSE_TEST_AND_BRANCH)
+   && ix86_match_ccmode (insn, CCNOmode)
    && (REGNO (operands[2]) != AX_REG
        || satisfies_constraint_K (operands[3]))
    && peep2_reg_dead_p (1, operands[2])"
@@ -18408,7 +18409,8 @@
 	  [(and:QI (match_operand:QI 2 "register_operand")
 		   (match_operand:QI 3 "immediate_operand"))
 	   (const_int 0)]))]
-  "! TARGET_PARTIAL_REG_STALL
+  "! TARGET_FUSE_TEST_AND_BRANCH
+   && ! TARGET_PARTIAL_REG_STALL
    && ix86_match_ccmode (insn, CCNOmode)
    && REGNO (operands[2]) != AX_REG
    && peep2_reg_dead_p (1, operands[2])"
@@ -18429,7 +18431,8 @@
 				(const_int 8)) 0)
 	     (match_operand 3 "const_int_operand"))
 	   (const_int 0)]))]
-  "! TARGET_PARTIAL_REG_STALL
+  "! TARGET_FUSE_TEST_AND_BRANCH
+   && ! TARGET_PARTIAL_REG_STALL
    && ix86_match_ccmode (insn, CCNOmode)
    && REGNO (operands[2]) != AX_REG
    && peep2_reg_dead_p (1, operands[2])"
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index a46450ad99d..ef0cc5a5a0f 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -113,6 +113,11 @@ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64",
 DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
 	  m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER | m_ZNVER1 | m_GENERIC)
 
+/* X86_TUNE_FUSE_TEST_AND_BRANCH: Fuse test with a subsequent
+   conditional jump instruction. */
+DEF_TUNE (X86_TUNE_FUSE_TEST_AND_BRANCH, "fuse_test_and_branch",
+          m_BDVER)
+
 /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional
    jump instruction when the alu instruction produces the CCFLAG consumed by
    the conditional jump instruction. */

Reply via email to