Karl Meakin <[email protected]> writes:
> Move the rules for CBZ/TBZ to be above the rules for
> CBB<cond>/CBH<cond>/CB<cond>. We want them to have higher priority
> because they can express larger displacements.
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64.md (aarch64_cbz<optab><mode>1): Move
> above rules for CBB<cond>/CBH<cond>/CB<cond>.
> (*aarch64_tbz<optab><mode>1): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/cmpbr.c: Update tests.
> ---
> gcc/config/aarch64/aarch64.md | 159 ++++++++++++-----------
> gcc/testsuite/gcc.target/aarch64/cmpbr.c | 32 ++---
> 2 files changed, 101 insertions(+), 90 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 23bce55f620..dd58e88fa2f 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -726,6 +726,17 @@ (define_constants
>
> ;; -------------------------------------------------------------------
> ;; Conditional jumps
Very, very minor, but: if we're following the aarch64-sve.md convention,
there'd be another:
;; -------------------------------------------------------------------
here, to separate the heading from the description.
> +;; The order of the rules below is important.
> +;; Higher priority rules are preferred because they can express larger
> +;; displacements.
> +;; 1) EQ/NE comparisons against zero are handled by CBZ/CBNZ.
> +;; 2) LT/GE comparisons against zero are handled by TBZ/TBNZ.
> +;; 3) When the CMPBR extension is enabled:
> +;; a) Comparisons between two registers are handled by
> +;; CBB<cond>/CBH<cond>/CB<cond>.
> +;; b) Comparisons between a GP register and an immediate in the range 0-63
> are
Maybe just "in-range immediate", given the multiple ranges in play.
OK with those changes, thanks.
However, I suppose this patch means that:
/* Fall through to `aarch64_cb<INT_CMP:code><GPI:mode>`. */
from patch 8 is not really accurate, since sometimes we might snag
a higher-priority comparison. So maybe just.
/* The branch is supported natively. */
Thanks,
Richard
> +;; handled by CB<cond> (immediate).
> +;; 4) Otherwise, emit a CMP+B<cond> sequence.
> ;; -------------------------------------------------------------------
>
> (define_expand "cbranch<GPI:mode>4"
> @@ -783,6 +794,80 @@ (define_expand "cbranchcc4"
> ""
> )
>
> +;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ`
> +(define_insn "aarch64_cbz<optab><mode>1"
> + [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
> + (const_int 0))
> + (label_ref (match_operand 1))
> + (pc)))]
> + "!aarch64_track_speculation"
> + {
> + if (get_attr_length (insn) == 8)
> + return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0,
> ");
> + else
> + return "<cbz>\\t%<w>0, %l1";
> + }
> + [(set_attr "type" "branch")
> + (set (attr "length")
> + (if_then_else (and (ge (minus (match_dup 1) (pc))
> + (const_int BRANCH_LEN_N_1MiB))
> + (lt (minus (match_dup 1) (pc))
> + (const_int BRANCH_LEN_P_1MiB)))
> + (const_int 4)
> + (const_int 8)))
> + (set (attr "far_branch")
> + (if_then_else (and (ge (minus (match_dup 2) (pc))
> + (const_int BRANCH_LEN_N_1MiB))
> + (lt (minus (match_dup 2) (pc))
> + (const_int BRANCH_LEN_P_1MiB)))
> + (const_string "no")
> + (const_string "yes")))]
> +)
> +
> +;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ`
> +(define_insn "*aarch64_tbz<optab><mode>1"
> + [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand"
> "r")
> + (const_int 0))
> + (label_ref (match_operand 1))
> + (pc)))
> + (clobber (reg:CC CC_REGNUM))]
> + "!aarch64_track_speculation"
> + {
> + if (get_attr_length (insn) == 8)
> + {
> + if (get_attr_far_branch (insn) == FAR_BRANCH_YES)
> + return aarch64_gen_far_branch (operands, 1, "Ltb",
> + "<inv_tb>\\t%<w>0, <sizem1>, ");
> + else
> + {
> + char buf[64];
> + uint64_t val = ((uint64_t) 1)
> + << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
> + sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
> + output_asm_insn (buf, operands);
> + return "<bcond>\t%l1";
> + }
> + }
> + else
> + return "<tbz>\t%<w>0, <sizem1>, %l1";
> + }
> + [(set_attr "type" "branch")
> + (set (attr "length")
> + (if_then_else (and (ge (minus (match_dup 1) (pc))
> + (const_int BRANCH_LEN_N_32KiB))
> + (lt (minus (match_dup 1) (pc))
> + (const_int BRANCH_LEN_P_32KiB)))
> + (const_int 4)
> + (const_int 8)))
> + (set (attr "far_branch")
> + (if_then_else (and (ge (minus (match_dup 1) (pc))
> + (const_int BRANCH_LEN_N_1MiB))
> + (lt (minus (match_dup 1) (pc))
> + (const_int BRANCH_LEN_P_1MiB)))
> + (const_string "no")
> + (const_string "yes")))]
> +)
> +
> ;; Emit a `CB<cond> (register)` or `CB<cond> (immediate)` instruction.
> ;; Only immediates in the range 0-63 are supported.
> ;; Comparisons against immediates outside this range fall back to
> @@ -909,80 +994,6 @@ (define_insn_and_split
> "*aarch64_bcond_wide_imm<GPI:mode>"
> }
> )
>
> -;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ`
> -(define_insn "aarch64_cbz<optab><mode>1"
> - [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
> - (const_int 0))
> - (label_ref (match_operand 1))
> - (pc)))]
> - "!aarch64_track_speculation"
> - {
> - if (get_attr_length (insn) == 8)
> - return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0,
> ");
> - else
> - return "<cbz>\\t%<w>0, %l1";
> - }
> - [(set_attr "type" "branch")
> - (set (attr "length")
> - (if_then_else (and (ge (minus (match_dup 1) (pc))
> - (const_int BRANCH_LEN_N_1MiB))
> - (lt (minus (match_dup 1) (pc))
> - (const_int BRANCH_LEN_P_1MiB)))
> - (const_int 4)
> - (const_int 8)))
> - (set (attr "far_branch")
> - (if_then_else (and (ge (minus (match_dup 2) (pc))
> - (const_int BRANCH_LEN_N_1MiB))
> - (lt (minus (match_dup 2) (pc))
> - (const_int BRANCH_LEN_P_1MiB)))
> - (const_string "no")
> - (const_string "yes")))]
> -)
> -
> -;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ`
> -(define_insn "*aarch64_tbz<optab><mode>1"
> - [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand"
> "r")
> - (const_int 0))
> - (label_ref (match_operand 1))
> - (pc)))
> - (clobber (reg:CC CC_REGNUM))]
> - "!aarch64_track_speculation"
> - {
> - if (get_attr_length (insn) == 8)
> - {
> - if (get_attr_far_branch (insn) == FAR_BRANCH_YES)
> - return aarch64_gen_far_branch (operands, 1, "Ltb",
> - "<inv_tb>\\t%<w>0, <sizem1>, ");
> - else
> - {
> - char buf[64];
> - uint64_t val = ((uint64_t) 1)
> - << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
> - sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
> - output_asm_insn (buf, operands);
> - return "<bcond>\t%l1";
> - }
> - }
> - else
> - return "<tbz>\t%<w>0, <sizem1>, %l1";
> - }
> - [(set_attr "type" "branch")
> - (set (attr "length")
> - (if_then_else (and (ge (minus (match_dup 1) (pc))
> - (const_int BRANCH_LEN_N_32KiB))
> - (lt (minus (match_dup 1) (pc))
> - (const_int BRANCH_LEN_P_32KiB)))
> - (const_int 4)
> - (const_int 8)))
> - (set (attr "far_branch")
> - (if_then_else (and (ge (minus (match_dup 1) (pc))
> - (const_int BRANCH_LEN_N_1MiB))
> - (lt (minus (match_dup 1) (pc))
> - (const_int BRANCH_LEN_P_1MiB)))
> - (const_string "no")
> - (const_string "yes")))]
> -)
> -
> ;; -------------------------------------------------------------------
> ;; Test bit and branch
> ;; -------------------------------------------------------------------
> diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr.c
> b/gcc/testsuite/gcc.target/aarch64/cmpbr.c
> index 74e546ec0b7..dc212e236db 100644
> --- a/gcc/testsuite/gcc.target/aarch64/cmpbr.c
> +++ b/gcc/testsuite/gcc.target/aarch64/cmpbr.c
> @@ -608,7 +608,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** i8_x0_slt_0:
> -** cbblt w0, wzr, .L140
> +** tbnz w0, #7, .L140
> ** b not_taken
> ** .L140:
> ** b taken
> @@ -632,7 +632,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** i8_x0_sge_0:
> -** cbblt w0, wzr, .L147
> +** tbnz w0, #7, .L147
> ** b taken
> ** .L147:
> ** b not_taken
> @@ -682,7 +682,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** i16_x0_slt_0:
> -** cbhlt w0, wzr, .L160
> +** tbnz w0, #15, .L160
> ** b not_taken
> ** .L160:
> ** b taken
> @@ -706,7 +706,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** i16_x0_sge_0:
> -** cbhlt w0, wzr, .L167
> +** tbnz w0, #15, .L167
> ** b taken
> ** .L167:
> ** b not_taken
> @@ -714,7 +714,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** u32_x0_eq_0:
> -** cbne w0, wzr, .L169
> +** cbnz w0, .L169
> ** b taken
> ** .L169:
> ** b not_taken
> @@ -722,7 +722,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** u32_x0_ne_0:
> -** cbeq w0, wzr, .L171
> +** cbz w0, .L171
> ** b taken
> ** .L171:
> ** b not_taken
> @@ -735,7 +735,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** u32_x0_ule_0:
> -** cbne w0, wzr, .L174
> +** cbnz w0, .L174
> ** b taken
> ** .L174:
> ** b not_taken
> @@ -743,7 +743,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** u32_x0_ugt_0:
> -** cbeq w0, wzr, .L176
> +** cbz w0, .L176
> ** b taken
> ** .L176:
> ** b not_taken
> @@ -756,7 +756,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** i32_x0_slt_0:
> -** cblt w0, wzr, .L180
> +** tbnz w0, #31, .L180
> ** b not_taken
> ** .L180:
> ** b taken
> @@ -780,7 +780,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** i32_x0_sge_0:
> -** cblt w0, wzr, .L187
> +** tbnz w0, #31, .L187
> ** b taken
> ** .L187:
> ** b not_taken
> @@ -788,7 +788,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** u64_x0_eq_0:
> -** cbne x0, xzr, .L189
> +** cbnz x0, .L189
> ** b taken
> ** .L189:
> ** b not_taken
> @@ -796,7 +796,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** u64_x0_ne_0:
> -** cbeq x0, xzr, .L191
> +** cbz x0, .L191
> ** b taken
> ** .L191:
> ** b not_taken
> @@ -809,7 +809,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** u64_x0_ule_0:
> -** cbne x0, xzr, .L194
> +** cbnz x0, .L194
> ** b taken
> ** .L194:
> ** b not_taken
> @@ -817,7 +817,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** u64_x0_ugt_0:
> -** cbeq x0, xzr, .L196
> +** cbz x0, .L196
> ** b taken
> ** .L196:
> ** b not_taken
> @@ -830,7 +830,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** i64_x0_slt_0:
> -** cblt x0, xzr, .L200
> +** tbnz x0, #63, .L200
> ** b not_taken
> ** .L200:
> ** b taken
> @@ -854,7 +854,7 @@ COMPARE_ALL(u64, i64, 4098);
>
> /*
> ** i64_x0_sge_0:
> -** cblt x0, xzr, .L207
> +** tbnz x0, #63, .L207
> ** b taken
> ** .L207:
> ** b not_taken