In ThunderX, any 1 cycle arthemantic instruction that produces the flags register, will be fused with a branch. This patch depends on https://gcc.gnu.org/ml/gcc-patches/2014-11/msg01508.html. Note I know bit 1 is going is already going to be used and that is why I proposed this being bit 2.
Build and tested for aarch64-elf with no regressions. ChangeLog: * config/aarch64/aarch64.c (AARCH64_FUSE_CMP_BRANCH): New define. (thunderx_tunings): Add AARCH64_FUSE_CMP_BRANCH to fuseable_ops. (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_CMP_BRANCH. --- gcc/config/aarch64/aarch64.c | 15 ++++++++++++++- 1 files changed, 14 insertions(+), 1 deletions(-) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index a258f40..5216ac0 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -304,6 +304,7 @@ static const struct cpu_vector_cost cortexa57_vector_cost = #define AARCH64_FUSE_NOTHING (0) #define AARCH64_FUSE_MOV_MOVK (1 << 0) +#define AARCH64_FUSE_CMP_BRANCH (1 << 2) #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 __extension__ @@ -349,7 +350,7 @@ static const struct tune_params thunderx_tunings = &generic_vector_cost, NAMED_PARAM (memmov_cost, 6), NAMED_PARAM (issue_rate, 2), - NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING) + NAMED_PARAM (fuseable_ops, AARCH64_FUSE_CMP_BRANCH) }; /* A processor implementing AArch64. */ @@ -10036,6 +10037,18 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) } } + if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH) + && any_condjump_p (curr)) + { + /* FIXME: this misses some which is considered simple arthematic + instructions for ThunderX. Simple shifts are missed here. */ + if (get_attr_type (prev) == TYPE_ALUS_SREG + || get_attr_type (prev) == TYPE_ALUS_IMM + || get_attr_type (prev) == TYPE_LOGICS_REG + || get_attr_type (prev) == TYPE_LOGICS_IMM) + return true; + } + return false; } -- 1.7.2.5