This patch overrides the branch cost for Cortex-A5 cores, building on
the previous patch:
http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00045.html
(And also depending on:
http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00044.html
to apply correctly.)
The rationale is as follows: branches are pretty much the only
instructions which can dual-issued on Cortex-A5. This makes them
relatively cheap: in particular, cheaper than long sequences of
conditionally-executed instructions. Setting the cost to zero was
experimentally determined to work better than one (or several other
values).
Together with the follow-up patch to tweak the value of
max_insns_skipped (for the arm_final_prescan_insn function), we obtain
(on a popular embedded benchmark, geometric mean improvement):
* 2.75% improvement in ARM mode (~0.9% with just this patch).
* 0.91% improvement in Thumb-2 mode.
Caveat: based on only a single test run, although previous benchmarking
(on a 4.5-based branch IIRC) showed similar improvements.
Testing still in progress. OK to apply?
Thanks,
Julian
ChangeLog
gcc/
* config/arm/arm-cores.def (cortex-a5): Use cortex_a5 tuning.
* config/arm/arm.c (arm_cortex_a5_branch_cost): New.
(arm_cortex_a5_tune): New.
commit c027c802ea85090f54df7432709f12be33226266
Author: Julian Brown <jul...@henry7.codesourcery.com>
Date: Fri May 27 11:05:49 2011 -0700
Branch cost for Cortex-A5.
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index b315df7..4ff2324 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -124,7 +124,7 @@ ARM_CORE("mpcorenovfp", mpcorenovfp, 6K, FL_LDSCHED, 9e)
ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e)
ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2)
ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2)
-ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex)
+ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5)
ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED, cortex)
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index c7eb5b0..cd3f104 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -256,6 +256,7 @@ static void arm_conditional_register_usage (void);
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
static unsigned int arm_autovectorize_vector_sizes (void);
static int arm_default_branch_cost (bool, bool);
+static int arm_cortex_a5_branch_cost (bool, bool);
/* Table of machine attributes. */
@@ -912,6 +913,16 @@ const struct tune_params arm_cortex_tune =
arm_default_branch_cost
};
+const struct tune_params arm_cortex_a5_tune =
+{
+ arm_9e_rtx_costs,
+ NULL,
+ 1, /* Constant limit. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ false, /* Prefer constant pool. */
+ arm_cortex_a5_branch_cost
+};
+
const struct tune_params arm_cortex_a9_tune =
{
arm_9e_rtx_costs,
@@ -8098,6 +8109,12 @@ arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
return (optimize > 0) ? 2 : 0;
}
+static int
+arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
+{
+ return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
+}
+
static int fp_consts_inited = 0;
/* Only zero is valid for VFP. Other values are also valid for FPA. */