https://gcc.gnu.org/g:4c5eb66e701bc9f3bf1298269f52559b10d63a09
commit r15-2253-g4c5eb66e701bc9f3bf1298269f52559b10d63a09 Author: Jennifer Schmitz <jschm...@nvidia.com> Date: Mon Jul 22 23:24:45 2024 -0700 aarch64: Fuse CMP+CSEL and CMP+CSET for -mcpu=neoverse-v2 According to the Neoverse V2 Software Optimization Guide (section 4.14), the instruction pairs CMP+CSEL and CMP+CSET can be fused, which had not been implemented so far. This patch implements and tests the two fusion pairs. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. There was also no non-noise impact on SPEC CPU2017 benchmark. OK for mainline? Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> gcc/ * config/aarch64/aarch64.cc (aarch_macro_fusion_pair_p): Implement fusion logic. * config/aarch64/aarch64-fusion-pairs.def (cmp+csel): New entry. (cmp+cset): Likewise. * config/aarch64/tuning_models/neoversev2.h: Enable logic in field fusible_ops. gcc/testsuite/ * gcc.target/aarch64/cmp_csel_fuse.c: New test. * gcc.target/aarch64/cmp_cset_fuse.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-fusion-pairs.def | 2 ++ gcc/config/aarch64/aarch64.cc | 19 +++++++++++++ gcc/config/aarch64/tuning_models/neoversev2.h | 5 +++- gcc/testsuite/gcc.target/aarch64/cmp_csel_fuse.c | 34 ++++++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/cmp_cset_fuse.c | 31 +++++++++++++++++++++ 5 files changed, 90 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def index 9a43b0c80657..bf5e85ba8fe1 100644 --- a/gcc/config/aarch64/aarch64-fusion-pairs.def +++ b/gcc/config/aarch64/aarch64-fusion-pairs.def @@ -37,5 +37,7 @@ AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC) AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH) AARCH64_FUSION_PAIR ("alu+cbz", ALU_CBZ) AARCH64_FUSION_PAIR ("addsub_2reg_const1", ADDSUB_2REG_CONST1) +AARCH64_FUSION_PAIR ("cmp+csel", CMP_CSEL) +AARCH64_FUSION_PAIR ("cmp+cset", CMP_CSET) #undef AARCH64_FUSION_PAIR diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 9e51236ce9fa..db598ebf2c79 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -27348,6 +27348,25 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr))) return true; + /* FUSE CMP and CSEL. */ + if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_CSEL) + && prev_set && curr_set + && GET_CODE (SET_SRC (prev_set)) == COMPARE + && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE + && REG_P (XEXP (SET_SRC (curr_set), 1)) + && REG_P (XEXP (SET_SRC (curr_set), 2)) + && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr))) + return true; + + /* Fuse CMP and CSET. */ + if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_CSET) + && prev_set && curr_set + && GET_CODE (SET_SRC (prev_set)) == COMPARE + && GET_RTX_CLASS (GET_CODE (SET_SRC (curr_set))) == RTX_COMPARE + && REG_P (SET_DEST (curr_set)) + && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr))) + return true; + /* Fuse flag-setting ALU instructions and conditional branch. */ if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH) && any_condjump_p (curr)) diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h index f76e4ef358f7..ae99fab22d80 100644 --- a/gcc/config/aarch64/tuning_models/neoversev2.h +++ b/gcc/config/aarch64/tuning_models/neoversev2.h @@ -221,7 +221,10 @@ static const struct tune_params neoversev2_tunings = 2 /* store_pred. */ }, /* memmov_cost. */ 5, /* issue_rate */ - (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */ + (AARCH64_FUSE_AES_AESMC + | AARCH64_FUSE_CMP_BRANCH + | AARCH64_FUSE_CMP_CSEL + | AARCH64_FUSE_CMP_CSET), /* fusible_ops */ "32:16", /* function_align. */ "4", /* jump_align. */ "32:16", /* loop_align. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cmp_csel_fuse.c b/gcc/testsuite/gcc.target/aarch64/cmp_csel_fuse.c new file mode 100644 index 000000000000..f5e511e46737 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/cmp_csel_fuse.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcpu=neoverse-v2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** f1: +** ... +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, le +** ret +*/ +int f1 (int a, int b, int c) +{ + int cmp = a > b; + int add1 = c + 3; + int add2 = c + 8; + return cmp ? add1 : add2; +} + +/* +** f2: +** ... +** cmp x[0-9]+, x[0-9]+ +** csel x[0-9]+, x[0-9]+, x[0-9]+, le +** ret +*/ +long long f2 (long long a, long long b, long long c) +{ + long long cmp = a > b; + long long add1 = c + 3; + long long add2 = c + 8; + return cmp ? add1 : add2; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/cmp_cset_fuse.c b/gcc/testsuite/gcc.target/aarch64/cmp_cset_fuse.c new file mode 100644 index 000000000000..04f1ce2773ba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/cmp_cset_fuse.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcpu=neoverse-v2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** f1: +** cmp w[0-9]+, w[0-9]+ +** cset w[0-9]+, gt +** ... +*/ +int g; +int f1 (int a, int b) +{ + int cmp = a > b; + g = cmp + 1; + return cmp; +} + +/* +** f2: +** cmp x[0-9]+, x[0-9]+ +** cset x[0-9]+, gt +** ... +*/ +long long h; +long long f2 (long long a, long long b) +{ + long long cmp = a > b; + h = cmp + 1; + return cmp; +}