Karl Meakin <karl.mea...@arm.com> writes: > Move the rules for CBZ/TBZ to be above the rules for > CBB<cond>/CBH<cond>/CB<cond>. We want them to have higher priority > because they can express larger displacements. > > gcc/ChangeLog: > > * config/aarch64/aarch64.md (aarch64_cbz<optab><mode>1): Move > above rules for CBB<cond>/CBH<cond>/CB<cond>. > (*aarch64_tbz<optab><mode>1): Likewise. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/cmpbr.c: Update tests. > --- > gcc/config/aarch64/aarch64.md | 159 ++++++++++++----------- > gcc/testsuite/gcc.target/aarch64/cmpbr.c | 32 ++--- > 2 files changed, 101 insertions(+), 90 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 23bce55f620..dd58e88fa2f 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -726,6 +726,17 @@ (define_constants > > ;; ------------------------------------------------------------------- > ;; Conditional jumps
Very, very minor, but: if we're following the aarch64-sve.md convention, there'd be another: ;; ------------------------------------------------------------------- here, to separate the heading from the description. > +;; The order of the rules below is important. > +;; Higher priority rules are preferred because they can express larger > +;; displacements. > +;; 1) EQ/NE comparisons against zero are handled by CBZ/CBNZ. > +;; 2) LT/GE comparisons against zero are handled by TBZ/TBNZ. > +;; 3) When the CMPBR extension is enabled: > +;; a) Comparisons between two registers are handled by > +;; CBB<cond>/CBH<cond>/CB<cond>. > +;; b) Comparisons between a GP register and an immediate in the range 0-63 > are Maybe just "in-range immediate", given the multiple ranges in play. OK with those changes, thanks. However, I suppose this patch means that: /* Fall through to `aarch64_cb<INT_CMP:code><GPI:mode>`. */ from patch 8 is not really accurate, since sometimes we might snag a higher-priority comparison. So maybe just. /* The branch is supported natively. */ Thanks, Richard > +;; handled by CB<cond> (immediate). > +;; 4) Otherwise, emit a CMP+B<cond> sequence. > ;; ------------------------------------------------------------------- > > (define_expand "cbranch<GPI:mode>4" > @@ -783,6 +794,80 @@ (define_expand "cbranchcc4" > "" > ) > > +;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ` > +(define_insn "aarch64_cbz<optab><mode>1" > + [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") > + (const_int 0)) > + (label_ref (match_operand 1)) > + (pc)))] > + "!aarch64_track_speculation" > + { > + if (get_attr_length (insn) == 8) > + return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, > "); > + else > + return "<cbz>\\t%<w>0, %l1"; > + } > + [(set_attr "type" "branch") > + (set (attr "length") > + (if_then_else (and (ge (minus (match_dup 1) (pc)) > + (const_int BRANCH_LEN_N_1MiB)) > + (lt (minus (match_dup 1) (pc)) > + (const_int BRANCH_LEN_P_1MiB))) > + (const_int 4) > + (const_int 8))) > + (set (attr "far_branch") > + (if_then_else (and (ge (minus (match_dup 2) (pc)) > + (const_int BRANCH_LEN_N_1MiB)) > + (lt (minus (match_dup 2) (pc)) > + (const_int BRANCH_LEN_P_1MiB))) > + (const_string "no") > + (const_string "yes")))] > +) > + > +;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ` > +(define_insn "*aarch64_tbz<optab><mode>1" > + [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" > "r") > + (const_int 0)) > + (label_ref (match_operand 1)) > + (pc))) > + (clobber (reg:CC CC_REGNUM))] > + "!aarch64_track_speculation" > + { > + if (get_attr_length (insn) == 8) > + { > + if (get_attr_far_branch (insn) == FAR_BRANCH_YES) > + return aarch64_gen_far_branch (operands, 1, "Ltb", > + "<inv_tb>\\t%<w>0, <sizem1>, "); > + else > + { > + char buf[64]; > + uint64_t val = ((uint64_t) 1) > + << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1); > + sprintf (buf, "tst\t%%<w>0, %" PRId64, val); > + output_asm_insn (buf, operands); > + return "<bcond>\t%l1"; > + } > + } > + else > + return "<tbz>\t%<w>0, <sizem1>, %l1"; > + } > + [(set_attr "type" "branch") > + (set (attr "length") > + (if_then_else (and (ge (minus (match_dup 1) (pc)) > + (const_int BRANCH_LEN_N_32KiB)) > + (lt (minus (match_dup 1) (pc)) > + (const_int BRANCH_LEN_P_32KiB))) > + (const_int 4) > + (const_int 8))) > + (set (attr "far_branch") > + (if_then_else (and (ge (minus (match_dup 1) (pc)) > + (const_int BRANCH_LEN_N_1MiB)) > + (lt (minus (match_dup 1) (pc)) > + (const_int BRANCH_LEN_P_1MiB))) > + (const_string "no") > + (const_string "yes")))] > +) > + > ;; Emit a `CB<cond> (register)` or `CB<cond> (immediate)` instruction. > ;; Only immediates in the range 0-63 are supported. > ;; Comparisons against immediates outside this range fall back to > @@ -909,80 +994,6 @@ (define_insn_and_split > "*aarch64_bcond_wide_imm<GPI:mode>" > } > ) > > -;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ` > -(define_insn "aarch64_cbz<optab><mode>1" > - [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") > - (const_int 0)) > - (label_ref (match_operand 1)) > - (pc)))] > - "!aarch64_track_speculation" > - { > - if (get_attr_length (insn) == 8) > - return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, > "); > - else > - return "<cbz>\\t%<w>0, %l1"; > - } > - [(set_attr "type" "branch") > - (set (attr "length") > - (if_then_else (and (ge (minus (match_dup 1) (pc)) > - (const_int BRANCH_LEN_N_1MiB)) > - (lt (minus (match_dup 1) (pc)) > - (const_int BRANCH_LEN_P_1MiB))) > - (const_int 4) > - (const_int 8))) > - (set (attr "far_branch") > - (if_then_else (and (ge (minus (match_dup 2) (pc)) > - (const_int BRANCH_LEN_N_1MiB)) > - (lt (minus (match_dup 2) (pc)) > - (const_int BRANCH_LEN_P_1MiB))) > - (const_string "no") > - (const_string "yes")))] > -) > - > -;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ` > -(define_insn "*aarch64_tbz<optab><mode>1" > - [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" > "r") > - (const_int 0)) > - (label_ref (match_operand 1)) > - (pc))) > - (clobber (reg:CC CC_REGNUM))] > - "!aarch64_track_speculation" > - { > - if (get_attr_length (insn) == 8) > - { > - if (get_attr_far_branch (insn) == FAR_BRANCH_YES) > - return aarch64_gen_far_branch (operands, 1, "Ltb", > - "<inv_tb>\\t%<w>0, <sizem1>, "); > - else > - { > - char buf[64]; > - uint64_t val = ((uint64_t) 1) > - << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1); > - sprintf (buf, "tst\t%%<w>0, %" PRId64, val); > - output_asm_insn (buf, operands); > - return "<bcond>\t%l1"; > - } > - } > - else > - return "<tbz>\t%<w>0, <sizem1>, %l1"; > - } > - [(set_attr "type" "branch") > - (set (attr "length") > - (if_then_else (and (ge (minus (match_dup 1) (pc)) > - (const_int BRANCH_LEN_N_32KiB)) > - (lt (minus (match_dup 1) (pc)) > - (const_int BRANCH_LEN_P_32KiB))) > - (const_int 4) > - (const_int 8))) > - (set (attr "far_branch") > - (if_then_else (and (ge (minus (match_dup 1) (pc)) > - (const_int BRANCH_LEN_N_1MiB)) > - (lt (minus (match_dup 1) (pc)) > - (const_int BRANCH_LEN_P_1MiB))) > - (const_string "no") > - (const_string "yes")))] > -) > - > ;; ------------------------------------------------------------------- > ;; Test bit and branch > ;; ------------------------------------------------------------------- > diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr.c > b/gcc/testsuite/gcc.target/aarch64/cmpbr.c > index 74e546ec0b7..dc212e236db 100644 > --- a/gcc/testsuite/gcc.target/aarch64/cmpbr.c > +++ b/gcc/testsuite/gcc.target/aarch64/cmpbr.c > @@ -608,7 +608,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** i8_x0_slt_0: > -** cbblt w0, wzr, .L140 > +** tbnz w0, #7, .L140 > ** b not_taken > ** .L140: > ** b taken > @@ -632,7 +632,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** i8_x0_sge_0: > -** cbblt w0, wzr, .L147 > +** tbnz w0, #7, .L147 > ** b taken > ** .L147: > ** b not_taken > @@ -682,7 +682,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** i16_x0_slt_0: > -** cbhlt w0, wzr, .L160 > +** tbnz w0, #15, .L160 > ** b not_taken > ** .L160: > ** b taken > @@ -706,7 +706,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** i16_x0_sge_0: > -** cbhlt w0, wzr, .L167 > +** tbnz w0, #15, .L167 > ** b taken > ** .L167: > ** b not_taken > @@ -714,7 +714,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** u32_x0_eq_0: > -** cbne w0, wzr, .L169 > +** cbnz w0, .L169 > ** b taken > ** .L169: > ** b not_taken > @@ -722,7 +722,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** u32_x0_ne_0: > -** cbeq w0, wzr, .L171 > +** cbz w0, .L171 > ** b taken > ** .L171: > ** b not_taken > @@ -735,7 +735,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** u32_x0_ule_0: > -** cbne w0, wzr, .L174 > +** cbnz w0, .L174 > ** b taken > ** .L174: > ** b not_taken > @@ -743,7 +743,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** u32_x0_ugt_0: > -** cbeq w0, wzr, .L176 > +** cbz w0, .L176 > ** b taken > ** .L176: > ** b not_taken > @@ -756,7 +756,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** i32_x0_slt_0: > -** cblt w0, wzr, .L180 > +** tbnz w0, #31, .L180 > ** b not_taken > ** .L180: > ** b taken > @@ -780,7 +780,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** i32_x0_sge_0: > -** cblt w0, wzr, .L187 > +** tbnz w0, #31, .L187 > ** b taken > ** .L187: > ** b not_taken > @@ -788,7 +788,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** u64_x0_eq_0: > -** cbne x0, xzr, .L189 > +** cbnz x0, .L189 > ** b taken > ** .L189: > ** b not_taken > @@ -796,7 +796,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** u64_x0_ne_0: > -** cbeq x0, xzr, .L191 > +** cbz x0, .L191 > ** b taken > ** .L191: > ** b not_taken > @@ -809,7 +809,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** u64_x0_ule_0: > -** cbne x0, xzr, .L194 > +** cbnz x0, .L194 > ** b taken > ** .L194: > ** b not_taken > @@ -817,7 +817,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** u64_x0_ugt_0: > -** cbeq x0, xzr, .L196 > +** cbz x0, .L196 > ** b taken > ** .L196: > ** b not_taken > @@ -830,7 +830,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** i64_x0_slt_0: > -** cblt x0, xzr, .L200 > +** tbnz x0, #63, .L200 > ** b not_taken > ** .L200: > ** b taken > @@ -854,7 +854,7 @@ COMPARE_ALL(u64, i64, 4098); > > /* > ** i64_x0_sge_0: > -** cblt x0, xzr, .L207 > +** tbnz x0, #63, .L207 > ** b taken > ** .L207: > ** b not_taken