[PATCH 8/8] AArch64: rules for CMPBR instructions
Add rules for lowering `cbranch4` to CBB/CBH/CB when CMPBR extension is enabled. gcc/ChangeLog: * config/aarch64/aarch64.md (cbranch4): emit CMPBR instructions if possible. (cbranch4): new expand rule. (aarch64_cb): likewise. (aarch64_cb): likewise. * config/aarch64/iterators.md (cmpbr_suffix): new mode attr. * config/aarch64/predicates.md (const_0_to_63_operand): new predicate. (aarch64_cb_immediate): likewise. (aarch64_cb_operand): likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/cmpbr.c: update tests. --- gcc/config/aarch64/aarch64.md| 87 +++- gcc/config/aarch64/iterators.md | 5 + gcc/config/aarch64/predicates.md | 17 + gcc/testsuite/gcc.target/aarch64/cmpbr.c | 484 --- 4 files changed, 275 insertions(+), 318 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 256df0dcc04..73f3e062e57 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -720,18 +720,41 @@ (define_constants ;; Conditional jumps ;; --- -(define_expand "cbranch4" +(define_expand "cbranch4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand:GPI 1 "register_operand") (match_operand:GPI 2 "aarch64_plus_operand")]) (label_ref (match_operand 3)) (pc)))] "" - " - operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], -operands[2]); - operands[2] = const0_rtx; - " + { + if (TARGET_CMPBR && aarch64_cb_operand (operands[2], mode)) +{ + emit_jump_insn (gen_aarch64_cb (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + else +{ + operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), +operands[1], operands[2]); + operands[2] = const0_rtx; +} + } +) + +(define_expand "cbranch4" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand:SHORT 1 "register_operand") +(match_operand:SHORT 2 "aarch64_cb_short_operand")]) + (label_ref (match_operand 3)) + (pc)))] + "TARGET_CMPBR" + { + emit_jump_insn (gen_aarch64_cb (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } ) (define_expand "cbranch4" @@ -758,6 +781,58 @@ (define_expand "cbranchcc4" "" ) +;; Emit a `CB (register)` or `CB (immediate)` instruction. +(define_insn "aarch64_cb" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand:GPI 1 "register_operand") +(match_operand:GPI 2 "aarch64_cb_operand")]) + (label_ref (match_operand 3)) + (pc)))] + "TARGET_CMPBR" + "cb%m0\\t%1, %2, %l3"; + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 3) (pc)) + (const_int BRANCH_LEN_N_1Kib)) + (lt (minus (match_dup 3) (pc)) + (const_int BRANCH_LEN_P_1Kib))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 3) (pc)) + (const_int BRANCH_LEN_N_1Kib)) + (lt (minus (match_dup 3) (pc)) + (const_int BRANCH_LEN_P_1Kib))) + (const_string "no") + (const_string "yes")))] +) + +;; Emit a `CBB (register)` or `CBH (register)` instruction. +(define_insn "aarch64_cb" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand:SHORT 1 "register_operand") +(match_operand:SHORT 2 "aarch64_cb_short_operand")]) + (label_ref (match_operand 3)) + (pc)))] + "TARGET_CMPBR" + "cb%m0\\t%1, %2, %l3"; + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 3) (pc)) + (const_int BRANCH_LEN_N_1Kib)) + (lt (minus (match_dup 3) (pc)) + (const_int BRANCH_LEN_P_1Kib))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 3) (pc)) + (const_int BRANCH_LEN_N_1Kib)) + (lt (minus (
[PATCH 3/8] AArch64: rename branch instruction rules
Give the `define_insn` rules used in lowering `cbranch4` to RTL more descriptive and consistent names: from now on, each rule is named after the AArch64 instruction that it generates. Also add comments to document each rule. gcc/ChangeLog: * config/aarch64/aarch64.md (condjump): rename to ... (aarch64_bcond): ...here. (*compare_condjump): rename to ... (*aarch64_bcond_wide_imm): ...here. (restore_stack_nonlocal): handle rename. (stack_protect_combined_test): likewise. * config/aarch64/aarch64-simd.md (cbranch4): likewise. * config/aarch64/aarch64-sme.md (aarch64_restore_za): likewise. * config/aarch64/aarch64.cc (aarch64_gen_test_and_branch): likewise. --- gcc/config/aarch64/aarch64-simd.md | 2 +- gcc/config/aarch64/aarch64-sme.md | 3 ++- gcc/config/aarch64/aarch64.cc | 2 +- gcc/config/aarch64/aarch64.md | 15 +-- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e2afe87e513..197a5f65f34 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3946,7 +3946,7 @@ (define_expand "cbranch4" rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx); rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx); - emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3])); + emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[3])); DONE; }) diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index c49affd0dd3..6a7c31acf0a 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -389,7 +389,8 @@ (define_insn_and_split "aarch64_restore_za" auto label = gen_label_rtx (); auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM); emit_insn (gen_aarch64_read_tpidr2 (tpidr2)); -auto jump = emit_likely_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label)); +auto jump = emit_likely_jump_insn ( + gen_aarch64_cbnedi1 (tpidr2, label)); JUMP_LABEL (jump) = label; aarch64_restore_za (operands[0]); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index fff8d9da49d..c0afdeb87ee 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -2879,7 +2879,7 @@ aarch64_gen_test_and_branch (rtx_code code, rtx x, int bitnum, emit_insn (gen_aarch64_and3nr_compare0 (mode, x, mask)); rtx cc_reg = gen_rtx_REG (CC_NZVmode, CC_REGNUM); rtx x = gen_rtx_fmt_ee (code, CC_NZVmode, cc_reg, const0_rtx); - return gen_condjump (x, cc_reg, label); + return gen_aarch64_bcond (x, cc_reg, label); } return gen_aarch64_tb (code, mode, mode, x, gen_int_mode (bitnum, mode), label); diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 45b2283c5c0..23775ec58ca 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -740,7 +740,8 @@ (define_expand "cbranchcc4" "" ) -(define_insn "condjump" +;; Emit `B`, assuming that the condition is already in the CC register. +(define_insn "aarch64_bcond" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand 1 "cc_register") (const_int 0)]) @@ -780,7 +781,7 @@ (define_insn "condjump" ;; sub x0, x1, #(CST & 0xfff000) ;; subsx0, x0, #(CST & 0x000fff) ;; b .Label -(define_insn_and_split "*compare_condjump" +(define_insn_and_split "*aarch64_bcond_wide_imm" [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") (match_operand:GPI 1 "aarch64_imm24" "n")) @@ -801,11 +802,12 @@ (define_insn_and_split "*compare_condjump" rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); rtx cmp_rtx = gen_rtx_fmt_ee (, mode, cc_reg, const0_rtx); -emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2])); +emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[2])); DONE; } ) +;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ` (define_insn "aarch64_cb1" [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") @@ -832,6 +834,7 @@ (define_insn "aarch64_cb1" (const_int 1)))] ) +;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ` (define_insn "*cb1" [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r") @@ -1325,13 +1328,13 @@ (define_expand "restore_stack_nonlocal" emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now)); rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx); - emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, done_label)); +
[PATCH 0/8] AArch64: CMPBR support
This patch series adds support for the CMPBR extension. It includes the new `+cmpbr` option and rules to generate the new instructions when lowering conditional branches. Karl Meakin (8): AArch64: place branch instruction rules together AArch64: reformat branch instruction rules AArch64: rename branch instruction rules AArch64: add constants for branch displacements AArch64: make `far_branch` attribute a boolean AArch64: recognize `+cmpbr` option AArch64: precommit test for CMPBR instructions AArch64: rules for CMPBR instructions .../aarch64/aarch64-option-extensions.def |2 + gcc/config/aarch64/aarch64-simd.md|2 +- gcc/config/aarch64/aarch64-sme.md |3 +- gcc/config/aarch64/aarch64.cc |2 +- gcc/config/aarch64/aarch64.h |3 + gcc/config/aarch64/aarch64.md | 557 +--- gcc/config/aarch64/iterators.md |5 + gcc/config/aarch64/predicates.md | 17 + gcc/doc/invoke.texi |3 + gcc/testsuite/gcc.target/aarch64/cmpbr.c | 1238 + 10 files changed, 1615 insertions(+), 217 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/cmpbr.c -- 2.45.2
[PATCH 1/8] AArch64: place branch instruction rules together
The rules for conditional branches were spread throughout `aarch64.md`. Group them together so it is easier to understand how `cbranch4` is lowered to RTL. gcc/ChangeLog: * config/aarch64/aarch64.md (condjump): move. (*compare_condjump): likewise. (aarch64_cb1): likewise. (*cb1): likewise. (tbranch_3): likewise. (@aarch64_tb): likewise. --- gcc/config/aarch64/aarch64.md | 387 ++ 1 file changed, 201 insertions(+), 186 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index c678f7afb1a..4d556d886bc 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -674,6 +674,10 @@ (define_insn "aarch64_write_sysregti" "msrr\t%x0, %x1, %H1" ) +;; --- +;; Unconditional jumps +;; --- + (define_insn "indirect_jump" [(set (pc) (match_operand:DI 0 "register_operand" "r"))] "" @@ -692,6 +696,12 @@ (define_insn "jump" [(set_attr "type" "branch")] ) + + +;; --- +;; Conditional jumps +;; --- + (define_expand "cbranch4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand:GPI 1 "register_operand") @@ -731,6 +741,197 @@ (define_expand "cbranchcc4" "" "") +(define_insn "condjump" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand 1 "cc_register" "") (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +/* GCC's traditional style has been to use "beq" instead of "b.eq", etc., + but the "." is required for SVE conditions. */ +bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode; +if (get_attr_length (insn) == 8) + return aarch64_gen_far_branch (operands, 2, "Lbcond", +use_dot_p ? "b.%M0\\t" : "b%M0\\t"); +else + return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) + (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) + (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (const_int 0) + (const_int 1)))] +) + +;; For a 24-bit immediate CST we can optimize the compare for equality +;; and branch sequence from: +;; mov x0, #imm1 +;; movkx0, #imm2, lsl 16 /* x0 contains CST. */ +;; cmp x1, x0 +;; b .Label +;; into the shorter: +;; sub x0, x1, #(CST & 0xfff000) +;; subsx0, x0, #(CST & 0x000fff) +;; b .Label +(define_insn_and_split "*compare_condjump" + [(set (pc) (if_then_else (EQL + (match_operand:GPI 0 "register_operand" "r") + (match_operand:GPI 1 "aarch64_imm24" "n")) + (label_ref:P (match_operand 2 "" "")) + (pc)))] + "!aarch64_move_imm (INTVAL (operands[1]), mode) + && !aarch64_plus_operand (operands[1], mode) + && !reload_completed" + "#" + "&& true" + [(const_int 0)] + { +HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff; +HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000; +rtx tmp = gen_reg_rtx (mode); +emit_insn (gen_add3 (tmp, operands[0], GEN_INT (-hi_imm))); +emit_insn (gen_add3_compare0 (tmp, tmp, GEN_INT (-lo_imm))); +rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); +rtx cmp_rtx = gen_rtx_fmt_ee (, mode, + cc_reg, const0_rtx); +emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2])); +DONE; + } +) + +(define_insn "aarch64_cb1" + [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "!aarch64_track_speculation" + { +if (get_attr_length (insn) == 8) + return aarch64_gen_far_branch (operands, 1, "Lcb", "\\t%0, "); +else + return "\\t%0, %l1"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576)) + (lt (minus (match_dup 1) (pc)) (const_int 1048572))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minu
[PATCH 7/8] AArch64: precommit test for CMPBR instructions
Commit the test file `cmpbr.c` before rules for generating the new instructions are added, so that the changes in codegen are more obvious in the next commit. gcc/testsuite/ChangeLog: * gcc.target/aarch64/cmpbr.c: New test. --- gcc/testsuite/gcc.target/aarch64/cmpbr.c | 1378 ++ 1 file changed, 1378 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/cmpbr.c diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr.c b/gcc/testsuite/gcc.target/aarch64/cmpbr.c new file mode 100644 index 000..728d6ead91c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/cmpbr.c @@ -0,0 +1,1378 @@ +/* Test that the instructions added by FEAT_CMPBR are emitted */ +/* { dg-do compile } */ +/* { dg-options "-march=armv9.5-a+cmpbr -O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include + +typedef uint8_t u8; +typedef int8_t i8; + +typedef uint16_t u16; +typedef int16_t i16; + +typedef uint32_t u32; +typedef int32_t i32; + +typedef uint64_t u64; +typedef int64_t i64; + +int taken(); +int not_taken(); + +#define COMPARE(ty, name, op, rhs) \ + int ty##_x0_##name##_##rhs(ty x0, ty x1) { \ +return (x0 op rhs) ? taken() : not_taken(); \ + } + +#define COMPARE_ALL(unsigned_ty, signed_ty, rhs) \ + COMPARE(unsigned_ty, eq, ==, rhs); \ + COMPARE(unsigned_ty, ne, !=, rhs); \ + \ + COMPARE(unsigned_ty, ult, <, rhs); \ + COMPARE(unsigned_ty, ule, <=, rhs); \ + COMPARE(unsigned_ty, ugt, >, rhs); \ + COMPARE(unsigned_ty, uge, >=, rhs); \ + \ + COMPARE(signed_ty, slt, <, rhs); \ + COMPARE(signed_ty, sle, <=, rhs); \ + COMPARE(signed_ty, sgt, >, rhs); \ + COMPARE(signed_ty, sge, >=, rhs); + +// CBB (register) +COMPARE_ALL(u8, i8, x1); + +// CBH (register) +COMPARE_ALL(u16, i16, x1); + +// CB (register) +COMPARE_ALL(u32, i32, x1); +COMPARE_ALL(u64, i64, x1); + +// CB (immediate) +COMPARE_ALL(u32, i32, 42); +COMPARE_ALL(u64, i64, 42); + +// Special cases +// CBB and CBH cannot have immediate operands. Instead we have to do a MOV+CB +COMPARE_ALL(u8, i8, 42); +COMPARE_ALL(u16, i16, 42); + +// 65 is out of the range for immediate operands (0 to 63). +// * For 8/16-bit types, use a MOV+CB as above. +// * For 32/64-bit types, use a CMP+B instead, because +// B has a longer range than CB. +COMPARE_ALL(u8, i8, 65); +COMPARE_ALL(u16, i16, 65); +COMPARE_ALL(u32, i32, 65); +COMPARE_ALL(u64, i64, 65); + +// Comparisons against zero can use the wzr/xzr register. +COMPARE_ALL(u8, i8, 0); +COMPARE_ALL(u16, i16, 0); +COMPARE_ALL(u32, i32, 0); +COMPARE_ALL(u64, i64, 0); + +/* +** u8_x0_eq_x1: +** and w1, w1, 255 +** cmp w1, w0, uxtb +** beq .L4 +** b not_taken +** b taken +*/ + +/* +** u8_x0_ne_x1: +** and w1, w1, 255 +** cmp w1, w0, uxtb +** beq .L6 +** b taken +** b not_taken +*/ + +/* +** u8_x0_ult_x1: +** and w1, w1, 255 +** cmp w1, w0, uxtb +** bls .L8 +** b taken +** b not_taken +*/ + +/* +** u8_x0_ule_x1: +** and w1, w1, 255 +** cmp w1, w0, uxtb +** bcc .L10 +** b taken +** b not_taken +*/ + +/* +** u8_x0_ugt_x1: +** and w1, w1, 255 +** cmp w1, w0, uxtb +** bcs .L12 +** b taken +** b not_taken +*/ + +/* +** u8_x0_uge_x1: +** and w1, w1, 255 +** cmp w1, w0, uxtb +** bhi .L14 +** b taken +** b not_taken +*/ + +/* +** i8_x0_slt_x1: +** sxtbw1, w1 +** cmp w1, w0, sxtb +** ble .L16 +** b taken +** b not_taken +*/ + +/* +** i8_x0_sle_x1: +** sxtbw1, w1 +** cmp w1, w0, sxtb +** blt .L18 +** b taken +** b not_taken +*/ + +/* +** i8_x0_sgt_x1: +** sxtbw1, w1 +** cmp w1, w0, sxtb +** bge .L20 +** b taken +** b not_taken +*/ + +/* +** i8_x0_sge_x1: +** sxtbw1, w1 +** cmp w1, w0, sxtb +** bgt .L22 +** b taken +** b not_taken +*/ + +/* +** u16_x0_eq_x1: +** and w1, w1, 65535 +** cmp w1, w0, uxth +** beq .L25 +** b not_taken +** b taken +*/ + +/* +** u16_x0_ne_x1: +** and
[PATCH 2/8] AArch64: reformat branch instruction rules
Make the formatting of the RTL templates in the rules for branch instructions more consistent with each other. gcc/ChangeLog: * config/aarch64/aarch64.md (cbranch4): reformat. (cbranchcc4): likewise. (condjump): likewise. (*compare_condjump): likewise. (aarch64_cb1): likewise. (*cb1): likewise. (tbranch_3): likewise. (@aarch64_tb): likewise. --- gcc/config/aarch64/aarch64.md | 82 ++- 1 file changed, 42 insertions(+), 40 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 4d556d886bc..45b2283c5c0 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -706,7 +706,7 @@ (define_expand "cbranch4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand:GPI 1 "register_operand") (match_operand:GPI 2 "aarch64_plus_operand")]) - (label_ref (match_operand 3 "" "")) + (label_ref (match_operand 3)) (pc)))] "" " @@ -717,34 +717,34 @@ (define_expand "cbranch4" ) (define_expand "cbranch4" - [(set (pc) (if_then_else - (match_operator 0 "aarch64_comparison_operator" -[(match_operand:GPF_F16 1 "register_operand") - (match_operand:GPF_F16 2 "aarch64_fp_compare_operand")]) - (label_ref (match_operand 3 "" "")) - (pc)))] + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand:GPF_F16 1 "register_operand") +(match_operand:GPF_F16 2 "aarch64_fp_compare_operand")]) + (label_ref (match_operand 3)) + (pc)))] "" - " + { operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], operands[2]); operands[2] = const0_rtx; - " + } ) (define_expand "cbranchcc4" - [(set (pc) (if_then_else - (match_operator 0 "aarch64_comparison_operator" - [(match_operand 1 "cc_register") - (match_operand 2 "const0_operand")]) - (label_ref (match_operand 3 "" "")) - (pc)))] + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand 1 "cc_register") +(match_operand 2 "const0_operand")]) + (label_ref (match_operand 3)) + (pc)))] "" - "") + "" +) (define_insn "condjump" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" - [(match_operand 1 "cc_register" "") (const_int 0)]) - (label_ref (match_operand 2 "" "")) + [(match_operand 1 "cc_register") +(const_int 0)]) + (label_ref (match_operand 2)) (pc)))] "" { @@ -782,9 +782,9 @@ (define_insn "condjump" ;; b .Label (define_insn_and_split "*compare_condjump" [(set (pc) (if_then_else (EQL - (match_operand:GPI 0 "register_operand" "r") - (match_operand:GPI 1 "aarch64_imm24" "n")) - (label_ref:P (match_operand 2 "" "")) +(match_operand:GPI 0 "register_operand" "r") +(match_operand:GPI 1 "aarch64_imm24" "n")) + (label_ref:P (match_operand 2)) (pc)))] "!aarch64_move_imm (INTVAL (operands[1]), mode) && !aarch64_plus_operand (operands[1], mode) @@ -807,9 +807,10 @@ (define_insn_and_split "*compare_condjump" ) (define_insn "aarch64_cb1" - [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") - (const_int 0)) - (label_ref (match_operand 1 "" "")) + [(set (pc) (if_then_else (EQL +(match_operand:GPI 0 "register_operand" "r") +(const_int 0)) + (label_ref (match_operand 1)) (pc)))] "!aarch64_track_speculation" { @@ -832,9 +833,10 @@ (define_insn "aarch64_cb1" ) (define_insn "*cb1" - [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r") -(const_int 0)) - (label_ref (match_operand 1 "" "")) + [(set (pc) (if_then_else (LTGE +(match_operand:ALLI 0 "register_operand" "r") +(const_int 0)) + (label_ref (match_operand 1)) (pc))) (clobber (reg:CC CC_REGNUM))] "!aarch64_track_speculation" @@ -875,11 +877,11 @@ (define_insn "*c
[PATCH 6/8] AArch64: recognize `+cmpbr` option
Add the `+cmpbr` option to enable the FEAT_CMPBR architectural extension. gcc/ChangeLog: * config/aarch64/aarch64-option-extensions.def (cmpbr): new option. * config/aarch64/aarch64.h (TARGET_CMPBR): new macro. * doc/invoke.texi (cmpbr): new option. --- gcc/config/aarch64/aarch64-option-extensions.def | 2 ++ gcc/config/aarch64/aarch64.h | 3 +++ gcc/doc/invoke.texi | 3 +++ 3 files changed, 8 insertions(+) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index dbbb021f05a..1c3e69799f5 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -249,6 +249,8 @@ AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "mops") AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc") +AARCH64_OPT_EXTENSION("cmpbr", CMPBR, (), (), (), "cmpbr") + AARCH64_OPT_EXTENSION("lse128", LSE128, (LSE), (), (), "lse128") AARCH64_OPT_EXTENSION("d128", D128, (LSE128), (), (), "d128") diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index e8bd8c73c12..d5c4a42e96d 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -410,6 +410,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED /* CSSC instructions are enabled through +cssc. */ #define TARGET_CSSC AARCH64_HAVE_ISA (CSSC) +/* CB instructions are enabled through +cmpbr. */ +#define TARGET_CMPBR AARCH64_HAVE_ISA (CMPBR) + /* Make sure this is always defined so we don't have to check for ifdefs but rather use normal ifs. */ #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 32bc45725de..3f05e5e0e34 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -22252,6 +22252,9 @@ Enable the FlagM2 flag conversion instructions. Enable the Pointer Authentication Extension. @item cssc Enable the Common Short Sequence Compression instructions. +@item cmpbr +Enable the shorter compare and branch instructions, @code{cbb}, @code{cbh} and +@code{cb}. @item sme Enable the Scalable Matrix Extension. This is only supported when SVE2 is also enabled. -- 2.45.2
[PATCH 4/8] AArch64: add constants for branch displacements
Extract the hardcoded values for the minimum PC-relative displacements into named constants and document them. gcc/ChangeLog: * config/aarch64/aarch64.md (BRANCH_LEN_P_128MiB): New constant. (BRANCH_LEN_N_128MiB): likewise. (BRANCH_LEN_P_1MiB): likewise. (BRANCH_LEN_N_1MiB): likewise. (BRANCH_LEN_P_32KiB): likewise. (BRANCH_LEN_N_32KiB): likewise. (BRANCH_LEN_P_1KiB): likewise. (BRANCH_LEN_N_1KiB): likewise. --- gcc/config/aarch64/aarch64.md | 68 ++- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 23775ec58ca..ca5bd96a754 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -696,7 +696,27 @@ (define_insn "jump" [(set_attr "type" "branch")] ) +;; Maximum PC-relative positive/negative displacements for various branching +;; instructions. +(define_constants + [ +;; +/- 128MiB. Used by B, BL. +(BRANCH_LEN_P_128Mib 134217724) +(BRANCH_LEN_N_128Mib -134217728) + +;; +/- 1MiB. Used by B., CBZ, CBNZ. +(BRANCH_LEN_P_1Mib 1048572) +(BRANCH_LEN_N_1Mib -1048576) +;; +/- 32KiB. Used by TBZ, TBNZ. +(BRANCH_LEN_P_32Kib 32764) +(BRANCH_LEN_N_32Kib -32768) + +;; +/- 1KiB. Used by CBB, CBH, CB. +(BRANCH_LEN_P_1Kib 1020) +(BRANCH_LEN_N_1Kib -1024) + ] +) ;; --- ;; Conditional jumps @@ -760,13 +780,17 @@ (define_insn "aarch64_bcond" } [(set_attr "type" "branch") (set (attr "length") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) - (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1Mib)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1Mib))) (const_int 4) (const_int 8))) (set (attr "far_branch") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) - (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1Mib)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1Mib))) (const_int 0) (const_int 1)))] ) @@ -823,13 +847,17 @@ (define_insn "aarch64_cb1" } [(set_attr "type" "branch") (set (attr "length") - (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576)) - (lt (minus (match_dup 1) (pc)) (const_int 1048572))) + (if_then_else (and (ge (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_N_1Mib)) + (lt (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_P_1Mib))) (const_int 4) (const_int 8))) (set (attr "far_branch") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) - (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1Mib)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1Mib))) (const_int 0) (const_int 1)))] ) @@ -864,13 +892,17 @@ (define_insn "*cb1" } [(set_attr "type" "branch") (set (attr "length") - (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768)) - (lt (minus (match_dup 1) (pc)) (const_int 32764))) + (if_then_else (and (ge (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_N_32Kib)) + (lt (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_P_32Kib))) (const_int 4) (const_int 8))) (set (attr "far_branch") - (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576)) - (lt (minus (match_dup 1) (pc)) (const_int 1048572))) + (if_then_else (and (ge (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_N_1Mib)) + (lt (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_P_1Mib))) (const_int 0) (const_int 1)))] ) @@ -925,13 +957,17 @@ (define_insn "@aarch64_tb" } [(set_attr "type" "branch") (set (attr "length") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768)) -
[PATCH 5/8] AArch64: make `far_branch` attribute a boolean
The `far_branch` attribute only ever takes the values 0 or 1, so make it a `no/yes` valued string attribute instead. gcc/ChangeLog: * config/aarch64/aarch64.md (far_branch): replace 0/1 with no/yes. (aarch64_bcond): handle rename. (aarch64_cb1): likewise. (*cb1): likewise. (@aarch64_tb): likewise. --- gcc/config/aarch64/aarch64.md | 22 ++ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index ca5bd96a754..256df0dcc04 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -561,9 +561,7 @@ (define_attr "enabled" "no,yes" ;; Attribute that specifies whether we are dealing with a branch to a ;; label that is far away, i.e. further away than the maximum/minimum ;; representable in a signed 21-bits number. -;; 0 :=: no -;; 1 :=: yes -(define_attr "far_branch" "" (const_int 0)) +(define_attr "far_branch" "no,yes" (const_string "no")) ;; Attribute that specifies whether the alternative uses MOVPRFX. (define_attr "movprfx" "no,yes" (const_string "no")) @@ -791,8 +789,8 @@ (define_insn "aarch64_bcond" (const_int BRANCH_LEN_N_1Mib)) (lt (minus (match_dup 2) (pc)) (const_int BRANCH_LEN_P_1Mib))) - (const_int 0) - (const_int 1)))] + (const_string "no") + (const_string "yes")))] ) ;; For a 24-bit immediate CST we can optimize the compare for equality @@ -858,8 +856,8 @@ (define_insn "aarch64_cb1" (const_int BRANCH_LEN_N_1Mib)) (lt (minus (match_dup 2) (pc)) (const_int BRANCH_LEN_P_1Mib))) - (const_int 0) - (const_int 1)))] + (const_string "no") + (const_string "yes")))] ) ;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ` @@ -874,7 +872,7 @@ (define_insn "*cb1" { if (get_attr_length (insn) == 8) { - if (get_attr_far_branch (insn) == 1) + if (get_attr_far_branch (insn) == FAR_BRANCH_YES) return aarch64_gen_far_branch (operands, 1, "Ltb", "\\t%0, , "); else @@ -903,8 +901,8 @@ (define_insn "*cb1" (const_int BRANCH_LEN_N_1Mib)) (lt (minus (match_dup 1) (pc)) (const_int BRANCH_LEN_P_1Mib))) - (const_int 0) - (const_int 1)))] + (const_string "no") + (const_string "yes")))] ) ;; --- @@ -968,8 +966,8 @@ (define_insn "@aarch64_tb" (const_int BRANCH_LEN_N_1Mib)) (lt (minus (match_dup 2) (pc)) (const_int BRANCH_LEN_P_1Mib))) - (const_int 0) - (const_int 1)))] + (const_string "no") + (const_string "yes")))] ) -- 2.45.2
[PATCH 9/9] AArch64: make rules for CBZ/TBZ higher priority
Move the rules for CBZ/TBZ to be above the rules for CBB/CBH/CB. We want them to have higher priority because they can express larger displacements. gcc/ChangeLog: * config/aarch64/aarch64.md (aarch64_cbz1): Move above rules for CBB/CBH/CB. (*aarch64_tbz1): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/cmpbr.c: Update tests. --- gcc/config/aarch64/aarch64.md| 170 --- gcc/testsuite/gcc.target/aarch64/cmpbr.c | 35 ++--- 2 files changed, 110 insertions(+), 95 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 641c3653a40..aa528cd13b4 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -697,27 +697,38 @@ (define_insn "jump" ;; Maximum PC-relative positive/negative displacements for various branching ;; instructions. (define_constants [ ;; +/- 128MiB. Used by B, BL. (BRANCH_LEN_P_128MiB 134217724) (BRANCH_LEN_N_128MiB -134217728) ;; +/- 1MiB. Used by B., CBZ, CBNZ. (BRANCH_LEN_P_1MiB 1048572) (BRANCH_LEN_N_1MiB -1048576) ;; +/- 32KiB. Used by TBZ, TBNZ. (BRANCH_LEN_P_32KiB 32764) (BRANCH_LEN_N_32KiB -32768) ;; +/- 1KiB. Used by CBB, CBH, CB. (BRANCH_LEN_P_1Kib 1020) (BRANCH_LEN_N_1Kib -1024) ] ) ;; --- ;; Conditional jumps +;; The order of the rules below is important. +;; Higher priority rules are preferred because they can express larger +;; displacements. +;; 1) EQ/NE comparisons against zero are handled by CBZ/CBNZ. +;; 2) LT/GE comparisons against zero are handled by TBZ/TBNZ. +;; 3) When the CMPBR extension is enabled: +;; a) Comparisons between two registers are handled by +;; CBB/CBH/CB. +;; b) Comparisons between a GP register and an immediate in the range 0-63 are +;; handled by CB. +;; 4) Otherwise, emit a CMP+B sequence. ;; --- (define_expand "cbranch4" @@ -770,63 +781,140 @@ (define_expand "cbranch4" (define_expand "cbranchcc4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand 1 "cc_register") (match_operand 2 "const0_operand")]) (label_ref (match_operand 3)) (pc)))] "" "" ) +;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ` +(define_insn "aarch64_cbz1" + [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") + (const_int 0)) + (label_ref (match_operand 1)) + (pc)))] + "!aarch64_track_speculation" + { +if (get_attr_length (insn) == 8) + return aarch64_gen_far_branch (operands, 1, "Lcb", "\\t%0, "); +else + return "\\t%0, %l1"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_P_1MiB))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1MiB))) + (const_string "no") + (const_string "yes")))] +) + +;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ` +(define_insn "*aarch64_tbz1" + [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r") +(const_int 0)) + (label_ref (match_operand 1)) + (pc))) + (clobber (reg:CC CC_REGNUM))] + "!aarch64_track_speculation" + { +if (get_attr_length (insn) == 8) + { + if (get_attr_far_branch (insn) == FAR_BRANCH_YES) + return aarch64_gen_far_branch (operands, 1, "Ltb", +"\\t%0, , "); + else + { + char buf[64]; + uint64_t val = ((uint64_t) 1) + << (GET_MODE_SIZE (mode) * BITS_PER_UNIT - 1); + sprintf (buf, "tst\t%%0, %" PRId64, val); + output_asm_insn (buf, operands); + return "\t%l1"; + } + } +else + return "\t%0, , %l1"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_N_32KiB)) + (lt (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_P_32KiB))) + (const_int 4) +
[PATCH 7/9] AArch64: precommit test for CMPBR instructions
Commit the test file `cmpbr.c` before rules for generating the new instructions are added, so that the changes in codegen are more obvious in the next commit. gcc/testsuite/ChangeLog: * gcc.target/aarch64/cmpbr.c: New test. --- gcc/testsuite/gcc.target/aarch64/cmpbr.c | 1378 ++ 1 file changed, 1378 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/cmpbr.c diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr.c b/gcc/testsuite/gcc.target/aarch64/cmpbr.c new file mode 100644 index 000..728d6ead91c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/cmpbr.c @@ -0,0 +1,1378 @@ +/* Test that the instructions added by FEAT_CMPBR are emitted */ +/* { dg-do compile } */ +/* { dg-options "-march=armv9.5-a+cmpbr -O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include + +typedef uint8_t u8; +typedef int8_t i8; + +typedef uint16_t u16; +typedef int16_t i16; + +typedef uint32_t u32; +typedef int32_t i32; + +typedef uint64_t u64; +typedef int64_t i64; + +int taken(); +int not_taken(); + +#define COMPARE(ty, name, op, rhs) \ + int ty##_x0_##name##_##rhs(ty x0, ty x1) { \ +return (x0 op rhs) ? taken() : not_taken(); \ + } + +#define COMPARE_ALL(unsigned_ty, signed_ty, rhs) \ + COMPARE(unsigned_ty, eq, ==, rhs); \ + COMPARE(unsigned_ty, ne, !=, rhs); \ + \ + COMPARE(unsigned_ty, ult, <, rhs); \ + COMPARE(unsigned_ty, ule, <=, rhs); \ + COMPARE(unsigned_ty, ugt, >, rhs); \ + COMPARE(unsigned_ty, uge, >=, rhs); \ + \ + COMPARE(signed_ty, slt, <, rhs); \ + COMPARE(signed_ty, sle, <=, rhs); \ + COMPARE(signed_ty, sgt, >, rhs); \ + COMPARE(signed_ty, sge, >=, rhs); + +// CBB (register) +COMPARE_ALL(u8, i8, x1); + +// CBH (register) +COMPARE_ALL(u16, i16, x1); + +// CB (register) +COMPARE_ALL(u32, i32, x1); +COMPARE_ALL(u64, i64, x1); + +// CB (immediate) +COMPARE_ALL(u32, i32, 42); +COMPARE_ALL(u64, i64, 42); + +// Special cases +// CBB and CBH cannot have immediate operands. Instead we have to do a MOV+CB +COMPARE_ALL(u8, i8, 42); +COMPARE_ALL(u16, i16, 42); + +// 65 is out of the range for immediate operands (0 to 63). +// * For 8/16-bit types, use a MOV+CB as above. +// * For 32/64-bit types, use a CMP+B instead, because +// B has a longer range than CB. +COMPARE_ALL(u8, i8, 65); +COMPARE_ALL(u16, i16, 65); +COMPARE_ALL(u32, i32, 65); +COMPARE_ALL(u64, i64, 65); + +// Comparisons against zero can use the wzr/xzr register. +COMPARE_ALL(u8, i8, 0); +COMPARE_ALL(u16, i16, 0); +COMPARE_ALL(u32, i32, 0); +COMPARE_ALL(u64, i64, 0); + +/* +** u8_x0_eq_x1: +** and w1, w1, 255 +** cmp w1, w0, uxtb +** beq .L4 +** b not_taken +** b taken +*/ + +/* +** u8_x0_ne_x1: +** and w1, w1, 255 +** cmp w1, w0, uxtb +** beq .L6 +** b taken +** b not_taken +*/ + +/* +** u8_x0_ult_x1: +** and w1, w1, 255 +** cmp w1, w0, uxtb +** bls .L8 +** b taken +** b not_taken +*/ + +/* +** u8_x0_ule_x1: +** and w1, w1, 255 +** cmp w1, w0, uxtb +** bcc .L10 +** b taken +** b not_taken +*/ + +/* +** u8_x0_ugt_x1: +** and w1, w1, 255 +** cmp w1, w0, uxtb +** bcs .L12 +** b taken +** b not_taken +*/ + +/* +** u8_x0_uge_x1: +** and w1, w1, 255 +** cmp w1, w0, uxtb +** bhi .L14 +** b taken +** b not_taken +*/ + +/* +** i8_x0_slt_x1: +** sxtbw1, w1 +** cmp w1, w0, sxtb +** ble .L16 +** b taken +** b not_taken +*/ + +/* +** i8_x0_sle_x1: +** sxtbw1, w1 +** cmp w1, w0, sxtb +** blt .L18 +** b taken +** b not_taken +*/ + +/* +** i8_x0_sgt_x1: +** sxtbw1, w1 +** cmp w1, w0, sxtb +** bge .L20 +** b taken +** b not_taken +*/ + +/* +** i8_x0_sge_x1: +** sxtbw1, w1 +** cmp w1, w0, sxtb +** bgt .L22 +** b taken +** b not_taken +*/ + +/* +** u16_x0_eq_x1: +** and w1, w1, 65535 +** cmp w1, w0, uxth +** beq .L25 +** b not_taken +** b taken +*/ + +/* +** u16_x0_ne_x1: +** and
[PATCH 8/9] AArch64: rules for CMPBR instructions
Add rules for lowering `cbranch4` to CBB/CBH/CB when CMPBR extension is enabled. gcc/ChangeLog: * config/aarch64/aarch64.md (cbranch4): Mmit CMPBR instructions if possible. (BRANCH_LEN_P_1Kib): New constant. (BRANCH_LEN_N_1Kib): Likewise. (cbranch4): New expand rule. (aarch64_cb): Likewise. (aarch64_cb): Likewise. * config/aarch64/iterators.md (cmpbr_suffix): New mode attr. * config/aarch64/predicates.md (const_0_to_63_operand): New predicate. (aarch64_cb_immediate): Likewise. (aarch64_cb_operand): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/cmpbr.c: update tests. --- gcc/config/aarch64/aarch64.md| 87 +++- gcc/config/aarch64/iterators.md | 5 + gcc/config/aarch64/predicates.md | 17 + gcc/testsuite/gcc.target/aarch64/cmpbr.c | 484 --- 4 files changed, 275 insertions(+), 318 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 248b0e8644f..641c3653a40 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -697,37 +697,60 @@ (define_insn "jump" ;; Maximum PC-relative positive/negative displacements for various branching ;; instructions. (define_constants [ ;; +/- 128MiB. Used by B, BL. (BRANCH_LEN_P_128MiB 134217724) (BRANCH_LEN_N_128MiB -134217728) ;; +/- 1MiB. Used by B., CBZ, CBNZ. (BRANCH_LEN_P_1MiB 1048572) (BRANCH_LEN_N_1MiB -1048576) ;; +/- 32KiB. Used by TBZ, TBNZ. (BRANCH_LEN_P_32KiB 32764) (BRANCH_LEN_N_32KiB -32768) + +;; +/- 1KiB. Used by CBB, CBH, CB. +(BRANCH_LEN_P_1Kib 1020) +(BRANCH_LEN_N_1Kib -1024) ] ) ;; --- ;; Conditional jumps ;; --- -(define_expand "cbranch4" +(define_expand "cbranch4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand:GPI 1 "register_operand") (match_operand:GPI 2 "aarch64_plus_operand")]) (label_ref (match_operand 3)) (pc)))] "" - " - operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], -operands[2]); - operands[2] = const0_rtx; - " + { + if (TARGET_CMPBR && aarch64_cb_operand (operands[2], mode)) +{ + emit_jump_insn (gen_aarch64_cb (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + else +{ + operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), +operands[1], operands[2]); + operands[2] = const0_rtx; +} + } +) + +(define_expand "cbranch4" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand:SHORT 1 "register_operand") +(match_operand:SHORT 2 "aarch64_cb_short_operand")]) + (label_ref (match_operand 3)) + (pc)))] + "TARGET_CMPBR" + "" ) (define_expand "cbranch4" @@ -747,13 +770,65 @@ (define_expand "cbranch4" (define_expand "cbranchcc4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand 1 "cc_register") (match_operand 2 "const0_operand")]) (label_ref (match_operand 3)) (pc)))] "" "" ) +;; Emit a `CB (register)` or `CB (immediate)` instruction. +(define_insn "aarch64_cb" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand:GPI 1 "register_operand") +(match_operand:GPI 2 "aarch64_cb_operand")]) + (label_ref (match_operand 3)) + (pc)))] + "TARGET_CMPBR" + "cb%m0\\t%1, %2, %l3"; + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 3) (pc)) + (const_int BRANCH_LEN_N_1Kib)) + (lt (minus (match_dup 3) (pc)) + (const_int BRANCH_LEN_P_1Kib))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 3) (pc)) + (const_int BRANCH_LEN_N_1Kib)) + (lt (minus (match_dup 3) (pc)) + (const_int BRANCH_LEN_P_1Kib))) + (const_string "no") + (const_string "yes")))] +) + +;; Emit a `CBB (register)` or `CBH (register)` instruction. +(define_insn "aarch64_cb" + [(set (pc) (if
[PATCH 2/9] AArch64: reformat branch instruction rules
Make the formatting of the RTL templates in the rules for branch instructions more consistent with each other. gcc/ChangeLog: * config/aarch64/aarch64.md (cbranch4): Reformat. (cbranchcc4): Likewise. (condjump): Likewise. (*compare_condjump): Likewise. (aarch64_cb1): Likewise. (*cb1): Likewise. (tbranch_3): Likewise. (@aarch64_tb): Likewise. --- gcc/config/aarch64/aarch64.md | 77 +-- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 4d556d886bc..7d0af5bd700 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -705,229 +705,228 @@ (define_insn "jump" (define_expand "cbranch4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand:GPI 1 "register_operand") (match_operand:GPI 2 "aarch64_plus_operand")]) - (label_ref (match_operand 3 "" "")) + (label_ref (match_operand 3)) (pc)))] "" " operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], operands[2]); operands[2] = const0_rtx; " ) (define_expand "cbranch4" - [(set (pc) (if_then_else - (match_operator 0 "aarch64_comparison_operator" -[(match_operand:GPF_F16 1 "register_operand") - (match_operand:GPF_F16 2 "aarch64_fp_compare_operand")]) - (label_ref (match_operand 3 "" "")) - (pc)))] + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand:GPF_F16 1 "register_operand") +(match_operand:GPF_F16 2 "aarch64_fp_compare_operand")]) + (label_ref (match_operand 3)) + (pc)))] "" - " + { operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], operands[2]); operands[2] = const0_rtx; - " + } ) (define_expand "cbranchcc4" - [(set (pc) (if_then_else - (match_operator 0 "aarch64_comparison_operator" - [(match_operand 1 "cc_register") - (match_operand 2 "const0_operand")]) - (label_ref (match_operand 3 "" "")) - (pc)))] + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand 1 "cc_register") +(match_operand 2 "const0_operand")]) + (label_ref (match_operand 3)) + (pc)))] "" - "") + "" +) (define_insn "condjump" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" - [(match_operand 1 "cc_register" "") (const_int 0)]) - (label_ref (match_operand 2 "" "")) + [(match_operand 1 "cc_register") +(const_int 0)]) + (label_ref (match_operand 2)) (pc)))] "" { /* GCC's traditional style has been to use "beq" instead of "b.eq", etc., but the "." is required for SVE conditions. */ bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode; if (get_attr_length (insn) == 8) return aarch64_gen_far_branch (operands, 2, "Lbcond", use_dot_p ? "b.%M0\\t" : "b%M0\\t"); else return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2"; } [(set_attr "type" "branch") (set (attr "length") (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) (lt (minus (match_dup 2) (pc)) (const_int 1048572))) (const_int 4) (const_int 8))) (set (attr "far_branch") (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) (lt (minus (match_dup 2) (pc)) (const_int 1048572))) (const_int 0) (const_int 1)))] ) ;; For a 24-bit immediate CST we can optimize the compare for equality ;; and branch sequence from: ;; mov x0, #imm1 ;; movkx0, #imm2, lsl 16 /* x0 contains CST. */ ;; cmp x1, x0 ;; b .Label ;; into the shorter: ;; sub x0, x1, #(CST & 0xfff000) ;; subsx0, x0, #(CST & 0x000fff) ;; b .Label (define_insn_and_split "*compare_condjump" - [(set (pc) (if_then_else (EQL - (match_operand:GPI 0 "register_operand" "r") - (match_operand:GPI 1 "aarch64_imm24" "n")) - (label_ref:P (match_operand 2 "" "")) + [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") +
[PATCH 1/9] AArch64: place branch instruction rules together
The rules for conditional branches were spread throughout `aarch64.md`. Group them together so it is easier to understand how `cbranch4` is lowered to RTL. gcc/ChangeLog: * config/aarch64/aarch64.md (condjump): Move. (*compare_condjump): Likewise. (aarch64_cb1): Likewise. (*cb1): Likewise. (tbranch_3): Likewise. (@aarch64_tb): Likewise. --- gcc/config/aarch64/aarch64.md | 387 ++ 1 file changed, 201 insertions(+), 186 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index c678f7afb1a..4d556d886bc 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -674,6 +674,10 @@ (define_insn "aarch64_write_sysregti" "msrr\t%x0, %x1, %H1" ) +;; --- +;; Unconditional jumps +;; --- + (define_insn "indirect_jump" [(set (pc) (match_operand:DI 0 "register_operand" "r"))] "" @@ -692,6 +696,12 @@ (define_insn "jump" [(set_attr "type" "branch")] ) + + +;; --- +;; Conditional jumps +;; --- + (define_expand "cbranch4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand:GPI 1 "register_operand") @@ -731,6 +741,197 @@ (define_expand "cbranchcc4" "" "") +(define_insn "condjump" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand 1 "cc_register" "") (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +/* GCC's traditional style has been to use "beq" instead of "b.eq", etc., + but the "." is required for SVE conditions. */ +bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode; +if (get_attr_length (insn) == 8) + return aarch64_gen_far_branch (operands, 2, "Lbcond", +use_dot_p ? "b.%M0\\t" : "b%M0\\t"); +else + return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) + (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) + (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (const_int 0) + (const_int 1)))] +) + +;; For a 24-bit immediate CST we can optimize the compare for equality +;; and branch sequence from: +;; mov x0, #imm1 +;; movkx0, #imm2, lsl 16 /* x0 contains CST. */ +;; cmp x1, x0 +;; b .Label +;; into the shorter: +;; sub x0, x1, #(CST & 0xfff000) +;; subsx0, x0, #(CST & 0x000fff) +;; b .Label +(define_insn_and_split "*compare_condjump" + [(set (pc) (if_then_else (EQL + (match_operand:GPI 0 "register_operand" "r") + (match_operand:GPI 1 "aarch64_imm24" "n")) + (label_ref:P (match_operand 2 "" "")) + (pc)))] + "!aarch64_move_imm (INTVAL (operands[1]), mode) + && !aarch64_plus_operand (operands[1], mode) + && !reload_completed" + "#" + "&& true" + [(const_int 0)] + { +HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff; +HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000; +rtx tmp = gen_reg_rtx (mode); +emit_insn (gen_add3 (tmp, operands[0], GEN_INT (-hi_imm))); +emit_insn (gen_add3_compare0 (tmp, tmp, GEN_INT (-lo_imm))); +rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); +rtx cmp_rtx = gen_rtx_fmt_ee (, mode, + cc_reg, const0_rtx); +emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2])); +DONE; + } +) + +(define_insn "aarch64_cb1" + [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "!aarch64_track_speculation" + { +if (get_attr_length (insn) == 8) + return aarch64_gen_far_branch (operands, 1, "Lcb", "\\t%0, "); +else + return "\\t%0, %l1"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576)) + (lt (minus (match_dup 1) (pc)) (const_int 1048572))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minu
[PATCH 5/9] AArch64: make `far_branch` attribute a boolean
The `far_branch` attribute only ever takes the values 0 or 1, so make it a `no/yes` valued string attribute instead. gcc/ChangeLog: * config/aarch64/aarch64.md (far_branch): Replace 0/1 with no/yes. (aarch64_bcond): Handle rename. (aarch64_cb1): Likewise. (*cb1): Likewise. (@aarch64_tb): Likewise. --- gcc/config/aarch64/aarch64.md | 22 ++ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index bba3d1c505d..248b0e8644f 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -554,16 +554,14 @@ (define_attr "mode_enabled" "false,true" ;; Attribute that controls whether an alternative is enabled or not. (define_attr "enabled" "no,yes" (if_then_else (and (eq_attr "arch_enabled" "yes") (eq_attr "mode_enabled" "true")) (const_string "yes") (const_string "no"))) ;; Attribute that specifies whether we are dealing with a branch to a ;; label that is far away, i.e. further away than the maximum/minimum ;; representable in a signed 21-bits number. -;; 0 :=: no -;; 1 :=: yes -(define_attr "far_branch" "" (const_int 0)) +(define_attr "far_branch" "no,yes" (const_string "no")) ;; Attribute that specifies whether the alternative uses MOVPRFX. (define_attr "movprfx" "no,yes" (const_string "no")) @@ -759,45 +757,45 @@ (define_expand "cbranchcc4" ;; Emit `B`, assuming that the condition is already in the CC register. (define_insn "aarch64_bcond" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand 1 "cc_register") (const_int 0)]) (label_ref (match_operand 2)) (pc)))] "" { /* GCC's traditional style has been to use "beq" instead of "b.eq", etc., but the "." is required for SVE conditions. */ bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode; if (get_attr_length (insn) == 8) return aarch64_gen_far_branch (operands, 2, "Lbcond", use_dot_p ? "b.%M0\\t" : "b%M0\\t"); else return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2"; } [(set_attr "type" "branch") (set (attr "length") (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int BRANCH_LEN_N_1MiB)) (lt (minus (match_dup 2) (pc)) (const_int BRANCH_LEN_P_1MiB))) (const_int 4) (const_int 8))) (set (attr "far_branch") (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int BRANCH_LEN_N_1MiB)) (lt (minus (match_dup 2) (pc)) (const_int BRANCH_LEN_P_1MiB))) - (const_int 0) - (const_int 1)))] + (const_string "no") + (const_string "yes")))] ) ;; For a 24-bit immediate CST we can optimize the compare for equality ;; and branch sequence from: ;; mov x0, #imm1 ;; movkx0, #imm2, lsl 16 /* x0 contains CST. */ ;; cmp x1, x0 ;; b .Label ;; into the shorter: ;; sub x0, x1, #(CST & 0xfff000) ;; subsx0, x0, #(CST & 0x000fff) ;; b .Label @@ -829,77 +827,77 @@ (define_insn_and_split "*aarch64_bcond_wide_imm" ;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ` (define_insn "aarch64_cbz1" [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") (const_int 0)) (label_ref (match_operand 1)) (pc)))] "!aarch64_track_speculation" { if (get_attr_length (insn) == 8) return aarch64_gen_far_branch (operands, 1, "Lcb", "\\t%0, "); else return "\\t%0, %l1"; } [(set_attr "type" "branch") (set (attr "length") (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int BRANCH_LEN_N_1MiB)) (lt (minus (match_dup 1) (pc)) (const_int BRANCH_LEN_P_1MiB))) (const_int 4) (const_int 8))) (set (attr "far_branch") (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int BRANCH_LEN_N_1MiB)) (lt (minus (match_dup 2) (pc)) (const_int BRANCH_LEN_P_1MiB))) - (const_int 0) - (const_int 1)))] + (const_string "no") + (const_string "yes")))] ) ;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ` (define_insn "*aarch64_tbz1" [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "regi
[PATCH 3/9] AArch64: rename branch instruction rules
Give the `define_insn` rules used in lowering `cbranch4` to RTL more descriptive and consistent names: from now on, each rule is named after the AArch64 instruction that it generates. Also add comments to document each rule. gcc/ChangeLog: * config/aarch64/aarch64.md (condjump): Rename to ... (aarch64_bcond): ...here. (*compare_condjump): Rename to ... (*aarch64_bcond_wide_imm): ...here. (restore_stack_nonlocal): Handle rename. (stack_protect_combined_test): Likewise. * config/aarch64/aarch64-simd.md (cbranch4): Likewise. * config/aarch64/aarch64-sme.md (aarch64_restore_za): Likewise. * config/aarch64/aarch64.cc (aarch64_gen_test_and_branch): Likewise. --- gcc/config/aarch64/aarch64-simd.md | 2 +- gcc/config/aarch64/aarch64-sme.md | 2 +- gcc/config/aarch64/aarch64.cc | 4 ++-- gcc/config/aarch64/aarch64.md | 21 - 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e2afe87e513..197a5f65f34 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3913,41 +3913,41 @@ (define_expand "vcond_mask_" (define_expand "cbranch4" [(set (pc) (if_then_else (match_operator 0 "aarch64_equality_operator" [(match_operand:VDQ_I 1 "register_operand") (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")]) (label_ref (match_operand 3 "")) (pc)))] "TARGET_SIMD" { auto code = GET_CODE (operands[0]); rtx tmp = operands[1]; /* If comparing against a non-zero vector we have to do a comparison first so we can have a != 0 comparison with the result. */ if (operands[2] != CONST0_RTX (mode)) { tmp = gen_reg_rtx (mode); emit_insn (gen_xor3 (tmp, operands[1], operands[2])); } /* For 64-bit vectors we need no reductions. */ if (known_eq (128, GET_MODE_BITSIZE (mode))) { /* Always reduce using a V4SI. */ rtx reduc = gen_lowpart (V4SImode, tmp); rtx res = gen_reg_rtx (V4SImode); emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc)); emit_move_insn (tmp, gen_lowpart (mode, res)); } rtx val = gen_reg_rtx (DImode); emit_move_insn (val, gen_lowpart (DImode, tmp)); rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx); rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx); - emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3])); + emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[3])); DONE; }) ;; Patterns comparing two vectors to produce a mask. diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index c49affd0dd3..4e4ac71c5a3 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -366,42 +366,42 @@ (define_insn "aarch64_tpidr2_restore" ;; Check whether a lazy save set up by aarch64_save_za was committed ;; and restore the saved contents if so. ;; ;; Operand 0 is the address of the current function's TPIDR2 block. (define_insn_and_split "aarch64_restore_za" [(set (reg:DI ZA_SAVED_REGNUM) (unspec:DI [(match_operand 0 "pmode_register_operand" "r") (reg:DI SME_STATE_REGNUM) (reg:DI TPIDR2_SETUP_REGNUM) (reg:DI ZA_SAVED_REGNUM)] UNSPEC_RESTORE_ZA)) (clobber (reg:DI R0_REGNUM)) (clobber (reg:DI R14_REGNUM)) (clobber (reg:DI R15_REGNUM)) (clobber (reg:DI R16_REGNUM)) (clobber (reg:DI R17_REGNUM)) (clobber (reg:DI R18_REGNUM)) (clobber (reg:DI R30_REGNUM)) (clobber (reg:CC CC_REGNUM))] "" "#" "&& epilogue_completed" [(const_int 0)] { auto label = gen_label_rtx (); auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM); emit_insn (gen_aarch64_read_tpidr2 (tpidr2)); -auto jump = emit_likely_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label)); +auto jump = emit_likely_jump_insn (gen_aarch64_cbznedi1 (tpidr2, label)); JUMP_LABEL (jump) = label; aarch64_restore_za (operands[0]); emit_label (label); DONE; } ) ;; This instruction is emitted after asms that alter ZA, in order to model ;; the effect on dataflow. The asm itself can't have ZA as an input or ;; an output, since there is no associated data type. Instead it retains ;; the original "za" clobber, which on its own would indicate that ZA ;; is dead. ;; ;; The operand is a unique identifier. diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index fff8d9da49d..b5ac6d3f37e 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -2872,44 +2872,44 @@ static rtx aarch64_gen_test_and_branch (rtx_code code, rtx x, int bitnum, rtx_code_label *label) { auto mode = GET_MODE (x); if (aarch64_track_speculation) { auto
[PATCH 4/9] AArch64: add constants for branch displacements
Extract the hardcoded values for the minimum PC-relative displacements into named constants and document them. gcc/ChangeLog: * config/aarch64/aarch64.md (BRANCH_LEN_P_128MiB): New constant. (BRANCH_LEN_N_128MiB): Likewise. (BRANCH_LEN_P_1MiB): Likewise. (BRANCH_LEN_N_1MiB): Likewise. (BRANCH_LEN_P_32KiB): Likewise. (BRANCH_LEN_N_32KiB): Likewise. --- gcc/config/aarch64/aarch64.md | 64 ++- 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 1b1e982d466..bba3d1c505d 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -692,12 +692,28 @@ (define_insn "indirect_jump" (define_insn "jump" [(set (pc) (label_ref (match_operand 0 "" "")))] "" "b\\t%l0" [(set_attr "type" "branch")] ) +;; Maximum PC-relative positive/negative displacements for various branching +;; instructions. +(define_constants + [ +;; +/- 128MiB. Used by B, BL. +(BRANCH_LEN_P_128MiB 134217724) +(BRANCH_LEN_N_128MiB -134217728) + +;; +/- 1MiB. Used by B., CBZ, CBNZ. +(BRANCH_LEN_P_1MiB 1048572) +(BRANCH_LEN_N_1MiB -1048576) +;; +/- 32KiB. Used by TBZ, TBNZ. +(BRANCH_LEN_P_32KiB 32764) +(BRANCH_LEN_N_32KiB -32768) + ] +) ;; --- ;; Conditional jumps ;; --- @@ -743,41 +759,45 @@ (define_expand "cbranchcc4" ;; Emit `B`, assuming that the condition is already in the CC register. (define_insn "aarch64_bcond" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand 1 "cc_register") (const_int 0)]) (label_ref (match_operand 2)) (pc)))] "" { /* GCC's traditional style has been to use "beq" instead of "b.eq", etc., but the "." is required for SVE conditions. */ bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode; if (get_attr_length (insn) == 8) return aarch64_gen_far_branch (operands, 2, "Lbcond", use_dot_p ? "b.%M0\\t" : "b%M0\\t"); else return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2"; } [(set_attr "type" "branch") (set (attr "length") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) - (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1MiB))) (const_int 4) (const_int 8))) (set (attr "far_branch") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) - (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1MiB))) (const_int 0) (const_int 1)))] ) ;; For a 24-bit immediate CST we can optimize the compare for equality ;; and branch sequence from: ;; mov x0, #imm1 ;; movkx0, #imm2, lsl 16 /* x0 contains CST. */ ;; cmp x1, x0 ;; b .Label ;; into the shorter: ;; sub x0, x1, #(CST & 0xfff000) ;; subsx0, x0, #(CST & 0x000fff) ;; b .Label @@ -809,69 +829,77 @@ (define_insn_and_split "*aarch64_bcond_wide_imm" ;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ` (define_insn "aarch64_cbz1" [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") (const_int 0)) (label_ref (match_operand 1)) (pc)))] "!aarch64_track_speculation" { if (get_attr_length (insn) == 8) return aarch64_gen_far_branch (operands, 1, "Lcb", "\\t%0, "); else return "\\t%0, %l1"; } [(set_attr "type" "branch") (set (attr "length") - (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576)) - (lt (minus (match_dup 1) (pc)) (const_int 1048572))) + (if_then_else (and (ge (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_P_1MiB))) (const_int 4) (const_int 8))) (set (attr "far_branch") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) -
[PATCH 6/9] AArch64: recognize `+cmpbr` option
Add the `+cmpbr` option to enable the FEAT_CMPBR architectural extension. gcc/ChangeLog: * config/aarch64/aarch64-option-extensions.def (cmpbr): New option. * config/aarch64/aarch64.h (TARGET_CMPBR): New macro. * doc/invoke.texi (cmpbr): New option. --- gcc/config/aarch64/aarch64-option-extensions.def | 2 ++ gcc/config/aarch64/aarch64.h | 3 +++ gcc/doc/invoke.texi | 3 +++ 3 files changed, 8 insertions(+) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index dbbb021f05a..1c3e69799f5 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -249,6 +249,8 @@ AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "mops") AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc") +AARCH64_OPT_EXTENSION("cmpbr", CMPBR, (), (), (), "cmpbr") + AARCH64_OPT_EXTENSION("lse128", LSE128, (LSE), (), (), "lse128") AARCH64_OPT_EXTENSION("d128", D128, (LSE128), (), (), "d128") diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index e8bd8c73c12..d5c4a42e96d 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -202,326 +202,329 @@ constexpr auto AARCH64_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED = AARCH64_ISA_MODE_SM_OFF; constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED = aarch64_feature_flags (AARCH64_DEFAULT_ISA_MODE); #endif /* Macros to test ISA flags. There is intentionally no macro for AARCH64_FL_CRYPTO, since this flag bit is not always set when its constituent features are present. Check (TARGET_AES && TARGET_SHA2) instead. */ #define AARCH64_HAVE_ISA(X) (bool (aarch64_isa_flags & AARCH64_FL_##X)) #define AARCH64_ISA_MODE((aarch64_isa_flags & AARCH64_FL_ISA_MODES).val[0]) /* The current function is a normal non-streaming function. */ #define TARGET_NON_STREAMING AARCH64_HAVE_ISA (SM_OFF) /* The current function has a streaming body. */ #define TARGET_STREAMING AARCH64_HAVE_ISA (SM_ON) /* The current function has a streaming-compatible body. */ #define TARGET_STREAMING_COMPATIBLE \ ((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0) /* PSTATE.ZA is enabled in the current function body. */ #define TARGET_ZA AARCH64_HAVE_ISA (ZA_ON) /* AdvSIMD is supported in the default configuration, unless disabled by -mgeneral-regs-only or by the +nosimd extension. The set of available instructions is then subdivided into: - the "base" set, available both in SME streaming mode and in non-streaming mode - the full set, available only in non-streaming mode. */ #define TARGET_BASE_SIMD AARCH64_HAVE_ISA (SIMD) #define TARGET_SIMD (TARGET_BASE_SIMD && TARGET_NON_STREAMING) #define TARGET_FLOAT AARCH64_HAVE_ISA (FP) /* AARCH64_FL options necessary for system register implementation. */ /* Define AARCH64_FL aliases for architectural features which are protected by -march flags in binutils but which receive no special treatment by GCC. Such flags are inherited from the Binutils definition of system registers and are mapped to the architecture in which the feature is implemented. */ #define AARCH64_FL_RASAARCH64_FL_V8A #define AARCH64_FL_LORAARCH64_FL_V8_1A #define AARCH64_FL_PANAARCH64_FL_V8_1A #define AARCH64_FL_AMUAARCH64_FL_V8_4A #define AARCH64_FL_SCXTNUMAARCH64_FL_V8_5A #define AARCH64_FL_ID_PFR2AARCH64_FL_V8_5A /* Armv8.9-A extension feature bits defined in Binutils but absent from GCC, aliased to their base architecture. */ #define AARCH64_FL_AIEAARCH64_FL_V8_9A #define AARCH64_FL_DEBUGv8p9 AARCH64_FL_V8_9A #define AARCH64_FL_FGT2 AARCH64_FL_V8_9A #define AARCH64_FL_ITEAARCH64_FL_V8_9A #define AARCH64_FL_PFAR AARCH64_FL_V8_9A #define AARCH64_FL_PMUv3_ICNTRAARCH64_FL_V8_9A #define AARCH64_FL_PMUv3_SS AARCH64_FL_V8_9A #define AARCH64_FL_PMUv3p9AARCH64_FL_V8_9A #define AARCH64_FL_RASv2 AARCH64_FL_V8_9A #define AARCH64_FL_S1PIE AARCH64_FL_V8_9A #define AARCH64_FL_S1POE AARCH64_FL_V8_9A #define AARCH64_FL_S2PIE AARCH64_FL_V8_9A #define AARCH64_FL_S2POE AARCH64_FL_V8_9A #define AARCH64_FL_SCTLR2 AARCH64_FL_V8_9A #define AARCH64_FL_SEBEP AARCH64_FL_V8_9A #define AARCH64_FL_SPE_FDSAARCH64_FL_V8_9A #define AARCH64_FL_TCR2 AARCH64_FL_V8_9A #define TARGET_V8R AARCH64_HAVE_ISA (V8R) #define TARGET_V9A AARCH64_HAVE_ISA (V9A) /* SHA2 is an optional extension to AdvSIMD. */ #define TARGET_SHA2 AARCH64_HAVE_ISA (SHA2) /* SHA3 is an optional extension to AdvSIMD. */ #define TARGET_SHA3 AARCH64_HAVE_ISA (SHA3) /* AES is an optional extension to AdvSIMD. */ #define TARGET_AES AARCH64_HAVE_ISA (AES) /* SM is an optiona
[PATCH 0/9] AArch64: CMPBR support
This patch series adds support for the CMPBR extension. It includes the new `+cmpbr` option and rules to generate the new instructions when lowering conditional branches. Karl Meakin (9): AArch64: place branch instruction rules together AArch64: reformat branch instruction rules AArch64: rename branch instruction rules AArch64: add constants for branch displacements AArch64: make `far_branch` attribute a boolean AArch64: recognize `+cmpbr` option AArch64: precommit test for CMPBR instructions AArch64: rules for CMPBR instructions AArch64: make rules for CBZ/TBZ higher priority .../aarch64/aarch64-option-extensions.def |2 + gcc/config/aarch64/aarch64-simd.md|2 +- gcc/config/aarch64/aarch64-sme.md |2 +- gcc/config/aarch64/aarch64.cc |4 +- gcc/config/aarch64/aarch64.h |3 + gcc/config/aarch64/aarch64.md | 564 +--- gcc/config/aarch64/iterators.md |5 + gcc/config/aarch64/predicates.md | 17 + gcc/doc/invoke.texi |3 + gcc/testsuite/gcc.target/aarch64/cmpbr.c | 1239 + 10 files changed, 1623 insertions(+), 218 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/cmpbr.c -- 2.45.2