Move the rules for CBZ/TBZ to be above the rules for CBB<cond>/CBH<cond>/CB<cond>. We want them to have higher priority because they can express larger displacements.
gcc/ChangeLog: * config/aarch64/aarch64.md (aarch64_cbz<optab><mode>1): Move above rules for CBB<cond>/CBH<cond>/CB<cond>. (*aarch64_tbz<optab><mode>1): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/cmpbr.c: Update tests. --- gcc/config/aarch64/aarch64.md | 163 ++++++++++++----------- gcc/testsuite/gcc.target/aarch64/cmpbr.c | 28 ++-- 2 files changed, 102 insertions(+), 89 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 32e0f739ae5..fc1cbbeaa4e 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -728,6 +728,19 @@ (define_constants ;; Conditional jumps ;; ------------------------------------------------------------------- +;; The order of the rules below is important. +;; Higher priority rules are preferred because they can express larger +;; displacements. +;; 1) EQ/NE comparisons against zero are handled by CBZ/CBNZ. +;; 2) LT/GE comparisons against zero are handled by TBZ/TBNZ. +;; 3) When the CMPBR extension is enabled: +;; a) Comparisons between two registers are handled by +;; CBB<cond>/CBH<cond>/CB<cond>. +;; b) Comparisons between a GP register and an in range immediate are +;; handled by CB<cond> (immediate). +;; 4) Otherwise, emit a CMP+B<cond> sequence. +;; ------------------------------------------------------------------- + (define_expand "cbranch<GPI:mode>4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand:GPI 1 "register_operand") @@ -738,7 +751,7 @@ (define_expand "cbranch<GPI:mode>4" { if (TARGET_CMPBR && aarch64_cb_rhs (GET_CODE (operands[0]), operands[2])) { - /* Fall-through to `aarch64_cb<INT_CMP:code><GPI:mode>`. */ + /* The branch is supported natively. */ } else { @@ -784,6 +797,80 @@ (define_expand "cbranchcc4" "" ) +;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ` +(define_insn "aarch64_cbz<optab><mode>1" + [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") + (const_int 0)) + (label_ref (match_operand 1)) + (pc)))] + "!aarch64_track_speculation" + { + if (get_attr_length (insn) == 8) + return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, "); + else + return "<cbz>\\t%<w>0, %l1"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_P_1MiB))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1MiB))) + (const_string "no") + (const_string "yes")))] +) + +;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ` +(define_insn "*aarch64_tbz<optab><mode>1" + [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r") + (const_int 0)) + (label_ref (match_operand 1)) + (pc))) + (clobber (reg:CC CC_REGNUM))] + "!aarch64_track_speculation" + { + if (get_attr_length (insn) == 8) + { + if (get_attr_far_branch (insn) == FAR_BRANCH_YES) + return aarch64_gen_far_branch (operands, 1, "Ltb", + "<inv_tb>\\t%<w>0, <sizem1>, "); + else + { + char buf[64]; + uint64_t val = ((uint64_t) 1) + << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1); + sprintf (buf, "tst\t%%<w>0, %" PRId64, val); + output_asm_insn (buf, operands); + return "<bcond>\t%l1"; + } + } + else + return "<tbz>\t%<w>0, <sizem1>, %l1"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_N_32KiB)) + (lt (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_P_32KiB))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_P_1MiB))) + (const_string "no") + (const_string "yes")))] +) + ;; Emit a `CB<cond> (register)` or `CB<cond> (immediate)` instruction. ;; The immediate range depends on the comparison code. ;; Comparisons against immediates outside this range fall back to @@ -916,80 +1003,6 @@ (define_insn_and_split "*aarch64_bcond_wide_imm<GPI:mode>" } ) -;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ` -(define_insn "aarch64_cbz<optab><mode>1" - [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") - (const_int 0)) - (label_ref (match_operand 1)) - (pc)))] - "!aarch64_track_speculation" - { - if (get_attr_length (insn) == 8) - return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, "); - else - return "<cbz>\\t%<w>0, %l1"; - } - [(set_attr "type" "branch") - (set (attr "length") - (if_then_else (and (ge (minus (match_dup 1) (pc)) - (const_int BRANCH_LEN_N_1MiB)) - (lt (minus (match_dup 1) (pc)) - (const_int BRANCH_LEN_P_1MiB))) - (const_int 4) - (const_int 8))) - (set (attr "far_branch") - (if_then_else (and (ge (minus (match_dup 2) (pc)) - (const_int BRANCH_LEN_N_1MiB)) - (lt (minus (match_dup 2) (pc)) - (const_int BRANCH_LEN_P_1MiB))) - (const_string "no") - (const_string "yes")))] -) - -;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ` -(define_insn "*aarch64_tbz<optab><mode>1" - [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r") - (const_int 0)) - (label_ref (match_operand 1)) - (pc))) - (clobber (reg:CC CC_REGNUM))] - "!aarch64_track_speculation" - { - if (get_attr_length (insn) == 8) - { - if (get_attr_far_branch (insn) == FAR_BRANCH_YES) - return aarch64_gen_far_branch (operands, 1, "Ltb", - "<inv_tb>\\t%<w>0, <sizem1>, "); - else - { - char buf[64]; - uint64_t val = ((uint64_t) 1) - << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1); - sprintf (buf, "tst\t%%<w>0, %" PRId64, val); - output_asm_insn (buf, operands); - return "<bcond>\t%l1"; - } - } - else - return "<tbz>\t%<w>0, <sizem1>, %l1"; - } - [(set_attr "type" "branch") - (set (attr "length") - (if_then_else (and (ge (minus (match_dup 1) (pc)) - (const_int BRANCH_LEN_N_32KiB)) - (lt (minus (match_dup 1) (pc)) - (const_int BRANCH_LEN_P_32KiB))) - (const_int 4) - (const_int 8))) - (set (attr "far_branch") - (if_then_else (and (ge (minus (match_dup 1) (pc)) - (const_int BRANCH_LEN_N_1MiB)) - (lt (minus (match_dup 1) (pc)) - (const_int BRANCH_LEN_P_1MiB))) - (const_string "no") - (const_string "yes")))] -) - ;; ------------------------------------------------------------------- ;; Test bit and branch ;; ------------------------------------------------------------------- diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr.c b/gcc/testsuite/gcc.target/aarch64/cmpbr.c index 0fe5641fc07..e47be900d5f 100644 --- a/gcc/testsuite/gcc.target/aarch64/cmpbr.c +++ b/gcc/testsuite/gcc.target/aarch64/cmpbr.c @@ -630,7 +630,7 @@ int far_branch(i32 x, i32 y) { /* ** i8_x0_slt_0: -** cbblt w0, wzr, .L140 +** tbnz w0, #7, .L140 ** b not_taken ** .L140: ** b taken @@ -654,7 +654,7 @@ int far_branch(i32 x, i32 y) { /* ** i8_x0_sge_0: -** cbblt w0, wzr, .L147 +** tbnz w0, #7, .L147 ** b taken ** .L147: ** b not_taken @@ -704,7 +704,7 @@ int far_branch(i32 x, i32 y) { /* ** i16_x0_slt_0: -** cbhlt w0, wzr, .L160 +** tbnz w0, #15, .L160 ** b not_taken ** .L160: ** b taken @@ -728,7 +728,7 @@ int far_branch(i32 x, i32 y) { /* ** i16_x0_sge_0: -** cbhlt w0, wzr, .L167 +** tbnz w0, #15, .L167 ** b taken ** .L167: ** b not_taken @@ -736,7 +736,7 @@ int far_branch(i32 x, i32 y) { /* ** u32_x0_eq_0: -** cbne w0, wzr, .L169 +** cbnz w0, .L169 ** b taken ** .L169: ** b not_taken @@ -744,7 +744,7 @@ int far_branch(i32 x, i32 y) { /* ** u32_x0_ne_0: -** cbeq w0, wzr, .L171 +** cbz w0, .L171 ** b taken ** .L171: ** b not_taken @@ -757,7 +757,7 @@ int far_branch(i32 x, i32 y) { /* ** u32_x0_ule_0: -** cbne w0, wzr, .L174 +** cbnz w0, .L174 ** b taken ** .L174: ** b not_taken @@ -765,7 +765,7 @@ int far_branch(i32 x, i32 y) { /* ** u32_x0_ugt_0: -** cbeq w0, wzr, .L176 +** cbz w0, .L176 ** b taken ** .L176: ** b not_taken @@ -802,7 +802,7 @@ int far_branch(i32 x, i32 y) { /* ** i32_x0_sge_0: -** cblt w0, wzr, .L187 +** tbnz w0, #31, .L187 ** b taken ** .L187: ** b not_taken @@ -810,7 +810,7 @@ int far_branch(i32 x, i32 y) { /* ** u64_x0_eq_0: -** cbne x0, xzr, .L189 +** cbnz x0, .L189 ** b taken ** .L189: ** b not_taken @@ -818,7 +818,7 @@ int far_branch(i32 x, i32 y) { /* ** u64_x0_ne_0: -** cbeq x0, xzr, .L191 +** cbz x0, .L191 ** b taken ** .L191: ** b not_taken @@ -831,7 +831,7 @@ int far_branch(i32 x, i32 y) { /* ** u64_x0_ule_0: -** cbne x0, xzr, .L194 +** cbnz x0, .L194 ** b taken ** .L194: ** b not_taken @@ -839,7 +839,7 @@ int far_branch(i32 x, i32 y) { /* ** u64_x0_ugt_0: -** cbeq x0, xzr, .L196 +** cbz x0, .L196 ** b taken ** .L196: ** b not_taken @@ -876,7 +876,7 @@ int far_branch(i32 x, i32 y) { /* ** i64_x0_sge_0: -** cblt x0, xzr, .L207 +** tbnz x0, #63, .L207 ** b taken ** .L207: ** b not_taken -- 2.45.2