Add rules for lowering `cbranch<mode>4` to CBB<cond>/CBH<cond>/CB<cond> when
CMPBR extension is enabled.

gcc/ChangeLog:

        * config/aarch64/aarch64-protos.h (aarch64_cb_rhs): New function.
        * config/aarch64/aarch64.cc (aarch64_cb_rhs): Likewise.
        * config/aarch64/aarch64.md (cbranch<mode>4): Rename to ...
        (cbranch<GPI:mode>4): ...here, and emit CMPBR if possible.
        (cbranch<SHORT:mode>4): New expand rule.
        (aarch64_cb<INT_CMP:code><GPI:mode>): New insn rule.
        (aarch64_cb<INT_CMP:code><SHORT:mode>): Likewise.
        * config/aarch64/constraints.md (Uc0): New constraint.
        (Uc1): Likewise.
        (Uc2): Likewise.
        * config/aarch64/iterators.md (cmpbr_suffix): New mode attr.
        (INT_CMP): New code iterator.
        (cmpbr_imm_constraint): New code attr.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/cmpbr.c:
---
 gcc/config/aarch64/aarch64-protos.h           |   2 +
 gcc/config/aarch64/aarch64.cc                 |  33 +
 gcc/config/aarch64/aarch64.md                 |  95 ++-
 gcc/config/aarch64/constraints.md             |  18 +
 gcc/config/aarch64/iterators.md               |  30 +
 gcc/testsuite/gcc.target/aarch64/cmpbr-far.c  |  52 ++
 gcc/testsuite/gcc.target/aarch64/cmpbr.c      | 749 +++++++-----------
 gcc/testsuite/gcc.target/aarch64/cmpbr.h      |  16 +
 .../gcc.target/aarch64/sve/mask_store.c       |  28 +
 gcc/testsuite/gcc.target/aarch64/sve/sqlite.c | 205 +++++
 10 files changed, 766 insertions(+), 462 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/cmpbr-far.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/cmpbr.h
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mask_store.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/sqlite.c

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 31f2f5b8bd2..e946e8da11d 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1135,6 +1135,8 @@ bool aarch64_general_check_builtin_call (location_t, 
vec<location_t>,
                                         unsigned int, tree, unsigned int,
                                         tree *);
 
+bool aarch64_cb_rhs (rtx_code op_code, rtx rhs);
+
 namespace aarch64 {
   void report_non_ice (location_t, tree, unsigned int);
   void report_out_of_range (location_t, tree, unsigned int, HOST_WIDE_INT,
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 2cd03b941bd..f3ce3a15b09 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -959,6 +959,39 @@ svpattern_token (enum aarch64_svpattern pattern)
   gcc_unreachable ();
 }
 
+/* Return true if RHS is an operand suitable for a CB<cc> (immediate)
+   instruction.  OP_CODE determines the type of the comparison.  */
+bool
+aarch64_cb_rhs (rtx_code op_code, rtx rhs)
+{
+  if (!CONST_INT_P (rhs))
+    return REG_P (rhs);
+
+  HOST_WIDE_INT rhs_val = INTVAL (rhs);
+
+  switch (op_code)
+    {
+    case EQ:
+    case NE:
+    case GT:
+    case GTU:
+    case LT:
+    case LTU:
+      return IN_RANGE (rhs_val, 0, 63);
+
+    case GE:  /* CBGE:   signed greater than or equal */
+    case GEU: /* CBHS: unsigned greater than or equal */
+      return IN_RANGE (rhs_val, 1, 64);
+
+    case LE:  /* CBLE:   signed less than or equal */
+    case LEU: /* CBLS: unsigned less than or equal */
+      return IN_RANGE (rhs_val, -1, 62);
+
+    default:
+      return false;
+    }
+}
+
 /* Return the location of a piece that is known to be passed or returned
    in registers.  FIRST_ZR is the first unused vector argument register
    and FIRST_PR is the first unused predicate argument register.  */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 0169ec5cf24..c50c41753a7 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -713,6 +713,10 @@ (define_constants
     ;; +/- 32KiB.  Used by TBZ, TBNZ.
     (BRANCH_LEN_P_32KiB  32764)
     (BRANCH_LEN_N_32KiB -32768)
+
+    ;; +/- 1KiB.  Used by CBB<cond>, CBH<cond>, CB<cond>.
+    (BRANCH_LEN_P_1Kib  1020)
+    (BRANCH_LEN_N_1Kib -1024)
   ]
 )
 
@@ -720,7 +724,7 @@ (define_constants
 ;; Conditional jumps
 ;; -------------------------------------------------------------------
 
-(define_expand "cbranch<mode>4"
+(define_expand "cbranch<GPI:mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
                            [(match_operand:GPI 1 "register_operand")
                             (match_operand:GPI 2 "aarch64_plus_operand")])
@@ -728,12 +732,29 @@ (define_expand "cbranch<mode>4"
                           (pc)))]
   ""
   {
-    operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
-                                          operands[2]);
-    operands[2] = const0_rtx;
+    if (TARGET_CMPBR && aarch64_cb_rhs (GET_CODE (operands[0]), operands[2]))
+      {
+       /* The branch is supported natively.  */
+      }
+    else
+      {
+        operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]),
+                                              operands[1], operands[2]);
+        operands[2] = const0_rtx;
+      }
   }
 )
 
+(define_expand "cbranch<SHORT:mode>4"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+                           [(match_operand:SHORT 1 "register_operand")
+                            (match_operand:SHORT 2 "aarch64_reg_or_zero")])
+                          (label_ref (match_operand 3))
+                          (pc)))]
+  "TARGET_CMPBR"
+  ""
+)
+
 (define_expand "cbranch<mode>4"
   [(set (pc) (if_then_else
                (match_operator 0 "aarch64_comparison_operator"
@@ -759,6 +780,72 @@ (define_expand "cbranchcc4"
   ""
 )
 
+;; Emit a `CB<cond> (register)` or `CB<cond> (immediate)` instruction.
+;; The immediate range depends on the comparison code.
+;; Comparisons against immediates outside this range fall back to
+;; CMP + B<cond>.
+(define_insn "aarch64_cb<INT_CMP:code><GPI:mode>"
+  [(set (pc) (if_then_else (INT_CMP
+                            (match_operand:GPI 0 "register_operand" "r")
+                            (match_operand:GPI 1 "nonmemory_operand"
+                              "r<INT_CMP:cmpbr_imm_constraint>"))
+                          (label_ref (match_operand 2))
+                          (pc)))]
+  "TARGET_CMPBR && aarch64_cb_rhs (<INT_CMP:CODE>, operands[1])"
+  {
+    return (get_attr_far_branch (insn) == FAR_BRANCH_NO)
+      ? "cb<INT_CMP:cmp_op>\\t%<w>0, %<w>1, %l2"
+      : aarch64_gen_far_branch (operands, 2, "L",
+          "cb<INT_CMP:inv_cmp_op>\\t%<w>0, %<w>1, ");
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+       (if_then_else (and (ge (minus (match_dup 2) (pc))
+                              (const_int BRANCH_LEN_N_1Kib))
+                          (lt (minus (match_dup 2) (pc))
+                              (const_int BRANCH_LEN_P_1Kib)))
+                     (const_int 4)
+                     (const_int 8)))
+   (set (attr "far_branch")
+       (if_then_else (and (ge (minus (match_dup 2) (pc))
+                              (const_int BRANCH_LEN_N_1Kib))
+                          (lt (minus (match_dup 2) (pc))
+                              (const_int BRANCH_LEN_P_1Kib)))
+                     (const_string "no")
+                     (const_string "yes")))]
+)
+
+;; Emit a `CBB<cond> (register)` or `CBH<cond> (register)` instruction.
+(define_insn "aarch64_cb<INT_CMP:code><SHORT:mode>"
+  [(set (pc) (if_then_else (INT_CMP
+                            (match_operand:SHORT 0 "register_operand" "r")
+                            (match_operand:SHORT 1 "aarch64_reg_or_zero" "rZ"))
+                          (label_ref (match_operand 2))
+                          (pc)))]
+  "TARGET_CMPBR"
+  {
+    return (get_attr_far_branch (insn) == FAR_BRANCH_NO)
+      ? "cb<SHORT:cmpbr_suffix><INT_CMP:cmp_op>\\t%<w>0, %<w>1, %l2"
+      : aarch64_gen_far_branch (operands, 2, "L",
+          "cb<SHORT:cmpbr_suffix><INT_CMP:inv_cmp_op>\\t%<w>0, %<w>1, ");
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+       (if_then_else (and (ge (minus (match_dup 2) (pc))
+                              (const_int BRANCH_LEN_N_1Kib))
+                          (lt (minus (match_dup 2) (pc))
+                              (const_int BRANCH_LEN_P_1Kib)))
+                     (const_int 4)
+                     (const_int 8)))
+   (set (attr "far_branch")
+       (if_then_else (and (ge (minus (match_dup 2) (pc))
+                              (const_int BRANCH_LEN_N_1Kib))
+                          (lt (minus (match_dup 2) (pc))
+                              (const_int BRANCH_LEN_P_1Kib)))
+                     (const_string "no")
+                     (const_string "yes")))]
+)
+
 ;; Emit `B<cond>`, assuming that the condition is already in the CC register.
 (define_insn "aarch64_bcond"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
diff --git a/gcc/config/aarch64/constraints.md 
b/gcc/config/aarch64/constraints.md
index e9f69f823a6..dc1925dfb6c 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -304,6 +304,24 @@ (define_constraint "Ui7"
   (and (match_code "const_int")
        (match_test "(unsigned HOST_WIDE_INT) ival <= 7")))
 
+(define_constraint "Uc0"
+  "@internal
+  A constraint that matches the integers 0...63."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 63)")))
+
+(define_constraint "Uc1"
+  "@internal
+  A constraint that matches the integers 1...64."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 64)")))
+
+(define_constraint "Uc2"
+  "@internal
+  A constraint that matches the integers -1...62."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -1, 62)")))
+
 (define_constraint "Up3"
   "@internal
   A constraint that matches the integers 2^(0...4)."
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index a8957681357..c59fcd679d7 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2961,6 +2961,36 @@ (define_code_attr cmp_op [(lt "lt")
                          (geu "hs")
                          (gtu "hi")])
 
+(define_code_attr inv_cmp_op [(lt "ge")
+                         (le "gt")
+                         (eq "ne")
+                         (ne "eq")
+                         (ge "lt")
+                         (gt "le")
+                         (ltu "hs")
+                         (leu "hi")
+                         (geu "lo")
+                         (gtu "ls")])
+
+(define_mode_attr cmpbr_suffix [(QI "b") (HI "h")])
+
+(define_code_iterator INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
+
+(define_code_attr cmpbr_imm_constraint [
+    (eq "Uc0")
+    (ne "Uc0")
+    (gt "Uc0")
+    (gtu "Uc0")
+    (lt "Uc0")
+    (ltu "Uc0")
+
+    (ge "Uc1")
+    (geu "Uc1")
+
+    (le "Uc2")
+    (leu "Uc2")
+])
+
 (define_code_attr fix_trunc_optab [(fix "fix_trunc")
                                   (unsigned_fix "fixuns_trunc")])
 
diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr-far.c 
b/gcc/testsuite/gcc.target/aarch64/cmpbr-far.c
new file mode 100644
index 00000000000..d63b16ebf97
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/cmpbr-far.c
@@ -0,0 +1,52 @@
+// Test that the instructions added by FEAT_CMPBR are emitted
+// { dg-do compile }
+// { dg-do-if assemble { target aarch64_asm_cmpbr_ok } }
+// { dg-options "-march=armv9.5-a+cmpbr -O2" }
+// { dg-final { check-function-bodies "**" "*/" "" { target *-*-* } 
{\.L[0-9]+} } }
+
+#include "cmpbr.h"
+
+// clang-format off
+#define STORE_2()   z = 0; z = 0;
+#define STORE_4()   STORE_2();   STORE_2();
+#define STORE_8()   STORE_4();   STORE_4();
+#define STORE_16()  STORE_8();   STORE_8();
+#define STORE_32()  STORE_16();  STORE_16();
+#define STORE_64()  STORE_32();  STORE_32();
+#define STORE_128() STORE_64();  STORE_64();
+#define STORE_256() STORE_128(); STORE_128();
+// clang-format on
+
+#define COMPARE(ty, name, op, rhs)                                             
\
+  int ty##_x0_##name##_##rhs(ty x0, ty x1) {                                   
\
+    volatile int z = 0;                                                        
\
+    if (__builtin_expect(x0 op rhs, 1)) {                                      
   \
+      STORE_256();                                                             
\
+    }                                                                          
\
+    return taken();                                                            
\
+  }
+
+COMPARE(u32, eq, ==, 42);
+
+int far_branch(i32 x, i32 y) {
+  volatile int z = 0;
+  if (__builtin_expect(x == y, 1)) {
+    STORE_256();
+  }
+  return taken();
+}
+
+/*
+** i32_x0_eq_x1:
+**     sub     sp, sp, #16
+**     str     wzr, \[sp, 12\]
+**     cbeq    w0, w1, .L([0-9]+)
+**     b       .L([0-9]+)
+** .L\1:
+**     str     wzr, \[sp, 12\]
+**     ...
+**     str     wzr, \[sp, 12\]
+** .L\2:
+**     add     sp, sp, 16
+**     b       taken
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr.c 
b/gcc/testsuite/gcc.target/aarch64/cmpbr.c
index 4b2408fdc84..8846e822f17 100644
--- a/gcc/testsuite/gcc.target/aarch64/cmpbr.c
+++ b/gcc/testsuite/gcc.target/aarch64/cmpbr.c
@@ -121,208 +121,167 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_eq_x1:
-**     and     (w[0-9]+), w1, 255
-**     cmp     \1, w0, uxtb
-**     beq     .L([0-9]+)
+**     cbbeq   w1, w0, .L([0-9]+)
 **     b       not_taken
-** .L\2:
+** .L\1:
 **     b       taken
 */
 
 /*
 ** u8_x0_ne_x1:
-**     and     (w[0-9]+), w1, 255
-**     cmp     \1, w0, uxtb
-**     beq     .L([0-9]+)
+**     cbbeq   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** u8_x0_ult_x1:
-**     and     (w[0-9]+), w1, 255
-**     cmp     \1, w0, uxtb
-**     bls     .L([0-9]+)
+**     cbbls   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** u8_x0_ule_x1:
-**     and     (w[0-9]+), w1, 255
-**     cmp     \1, w0, uxtb
-**     bcc     .L([0-9]+)
+**     cbblo   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** u8_x0_ugt_x1:
-**     and     (w[0-9]+), w1, 255
-**     cmp     \1, w0, uxtb
-**     bcs     .L([0-9]+)
+**     cbbhs   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** u8_x0_uge_x1:
-**     and     (w[0-9]+), w1, 255
-**     cmp     \1, w0, uxtb
-**     bhi     .L([0-9]+)
+**     cbbhi   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** i8_x0_slt_x1:
-**     sxtb    (w[0-9]+), w1
-**     cmp     \1, w0, sxtb
-**     ble     .L([0-9]+)
+**     cbble   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** i8_x0_sle_x1:
-**     sxtb    (w[0-9]+), w1
-**     cmp     \1, w0, sxtb
-**     blt     .L([0-9]+)
+**     cbblt   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** i8_x0_sgt_x1:
-**     sxtb    (w[0-9]+), w1
-**     cmp     \1, w0, sxtb
-**     bge     .L([0-9]+)
+**     cbbge   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** i8_x0_sge_x1:
-**     sxtb    (w[0-9]+), w1
-**     cmp     \1, w0, sxtb
-**     bgt     .L([0-9]+)
+**     cbbgt   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** u16_x0_eq_x1:
-**     and     (w[0-9]+), w1, 65535
-**     cmp     \1, w0, uxth
-**     beq     .L([0-9]+)
+**     cbheq   w1, w0, .L([0-9]+)
 **     b       not_taken
-** .L\2:
+** .L\1:
 **     b       taken
 */
 
 /*
 ** u16_x0_ne_x1:
-**     and     (w[0-9]+), w1, 65535
-**     cmp     \1, w0, uxth
-**     beq     .L([0-9]+)
+**     cbheq   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** u16_x0_ult_x1:
-**     and     (w[0-9]+), w1, 65535
-**     cmp     \1, w0, uxth
-**     bls     .L([0-9]+)
+**     cbhls   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** u16_x0_ule_x1:
-**     and     (w[0-9]+), w1, 65535
-**     cmp     \1, w0, uxth
-**     bcc     .L([0-9]+)
+**     cbhlo   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** u16_x0_ugt_x1:
-**     and     (w[0-9]+), w1, 65535
-**     cmp     \1, w0, uxth
-**     bcs     .L([0-9]+)
+**     cbhhs   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** u16_x0_uge_x1:
-**     and     (w[0-9]+), w1, 65535
-**     cmp     \1, w0, uxth
-**     bhi     .L([0-9]+)
+**     cbhhi   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** i16_x0_slt_x1:
-**     sxth    (w[0-9]+), w1
-**     cmp     \1, w0, sxth
-**     ble     .L([0-9]+)
+**     cbhle   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** i16_x0_sle_x1:
-**     sxth    (w[0-9]+), w1
-**     cmp     \1, w0, sxth
-**     blt     .L([0-9]+)
+**     cbhlt   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** i16_x0_sgt_x1:
-**     sxth    (w[0-9]+), w1
-**     cmp     \1, w0, sxth
-**     bge     .L([0-9]+)
+**     cbhge   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** i16_x0_sge_x1:
-**     sxth    (w[0-9]+), w1
-**     cmp     \1, w0, sxth
-**     bgt     .L([0-9]+)
+**     cbhgt   w1, w0, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** u32_x0_eq_x1:
-**     cmp     w0, w1
-**     beq     .L([0-9]+)
+**     cbeq    w0, w1, .L([0-9]+)
 **     b       not_taken
 ** .L\1:
 **     b       taken
@@ -330,8 +289,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ne_x1:
-**     cmp     w0, w1
-**     beq     .L([0-9]+)
+**     cbeq    w0, w1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -339,8 +297,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ult_x1:
-**     cmp     w0, w1
-**     bcs     .L([0-9]+)
+**     cbhs    w0, w1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -348,8 +305,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ule_x1:
-**     cmp     w0, w1
-**     bhi     .L([0-9]+)
+**     cbhi    w0, w1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -357,8 +313,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ugt_x1:
-**     cmp     w0, w1
-**     bls     .L([0-9]+)
+**     cbls    w0, w1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -366,8 +321,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_uge_x1:
-**     cmp     w0, w1
-**     bcc     .L([0-9]+)
+**     cblo    w0, w1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -375,8 +329,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_slt_x1:
-**     cmp     w0, w1
-**     bge     .L([0-9]+)
+**     cbge    w0, w1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -384,8 +337,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_sle_x1:
-**     cmp     w0, w1
-**     bgt     .L([0-9]+)
+**     cbgt    w0, w1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -393,8 +345,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_sgt_x1:
-**     cmp     w0, w1
-**     ble     .L([0-9]+)
+**     cble    w0, w1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -402,8 +353,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_sge_x1:
-**     cmp     w0, w1
-**     blt     .L([0-9]+)
+**     cblt    w0, w1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -411,8 +361,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_eq_x1:
-**     cmp     x0, x1
-**     beq     .L([0-9]+)
+**     cbeq    x0, x1, .L([0-9]+)
 **     b       not_taken
 ** .L\1:
 **     b       taken
@@ -420,8 +369,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ne_x1:
-**     cmp     x0, x1
-**     beq     .L([0-9]+)
+**     cbeq    x0, x1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -429,8 +377,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ult_x1:
-**     cmp     x0, x1
-**     bcs     .L([0-9]+)
+**     cbhs    x0, x1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -438,8 +385,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ule_x1:
-**     cmp     x0, x1
-**     bhi     .L([0-9]+)
+**     cbhi    x0, x1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -447,8 +393,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ugt_x1:
-**     cmp     x0, x1
-**     bls     .L([0-9]+)
+**     cbls    x0, x1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -456,8 +401,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_uge_x1:
-**     cmp     x0, x1
-**     bcc     .L([0-9]+)
+**     cblo    x0, x1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -465,8 +409,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_slt_x1:
-**     cmp     x0, x1
-**     bge     .L([0-9]+)
+**     cbge    x0, x1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -474,8 +417,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_sle_x1:
-**     cmp     x0, x1
-**     bgt     .L([0-9]+)
+**     cbgt    x0, x1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -483,8 +425,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_sgt_x1:
-**     cmp     x0, x1
-**     ble     .L([0-9]+)
+**     cble    x0, x1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -492,8 +433,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_sge_x1:
-**     cmp     x0, x1
-**     blt     .L([0-9]+)
+**     cblt    x0, x1, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -501,8 +441,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_eq_42:
-**     cmp     w0, 42
-**     beq     .L([0-9]+)
+**     cbeq    w0, 42, .L([0-9]+)
 **     b       not_taken
 ** .L\1:
 **     b       taken
@@ -510,8 +449,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ne_42:
-**     cmp     w0, 42
-**     beq     .L([0-9]+)
+**     cbeq    w0, 42, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -519,8 +457,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ult_42:
-**     cmp     w0, 41
-**     bhi     .L([0-9]+)
+**     cbhi    w0, 41, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -528,8 +465,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ule_42:
-**     cmp     w0, 42
-**     bhi     .L([0-9]+)
+**     cbhi    w0, 42, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -537,8 +473,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ugt_42:
-**     cmp     w0, 42
-**     bls     .L([0-9]+)
+**     cbls    w0, 42, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -546,8 +481,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_uge_42:
-**     cmp     w0, 41
-**     bls     .L([0-9]+)
+**     cbls    w0, 41, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -555,8 +489,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_slt_42:
-**     cmp     w0, 41
-**     bgt     .L([0-9]+)
+**     cbgt    w0, 41, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -564,8 +497,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_sle_42:
-**     cmp     w0, 42
-**     bgt     .L([0-9]+)
+**     cbgt    w0, 42, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -573,8 +505,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_sgt_42:
-**     cmp     w0, 42
-**     ble     .L([0-9]+)
+**     cble    w0, 42, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -582,8 +513,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_sge_42:
-**     cmp     w0, 41
-**     ble     .L([0-9]+)
+**     cble    w0, 41, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -591,8 +521,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_eq_42:
-**     cmp     x0, 42
-**     beq     .L([0-9]+)
+**     cbeq    x0, 42, .L([0-9]+)
 **     b       not_taken
 ** .L\1:
 **     b       taken
@@ -600,8 +529,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ne_42:
-**     cmp     x0, 42
-**     beq     .L([0-9]+)
+**     cbeq    x0, 42, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -609,8 +537,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ult_42:
-**     cmp     x0, 41
-**     bhi     .L([0-9]+)
+**     cbhi    x0, 41, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -618,8 +545,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ule_42:
-**     cmp     x0, 42
-**     bhi     .L([0-9]+)
+**     cbhi    x0, 42, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -627,8 +553,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ugt_42:
-**     cmp     x0, 42
-**     bls     .L([0-9]+)
+**     cbls    x0, 42, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -636,8 +561,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_uge_42:
-**     cmp     x0, 41
-**     bls     .L([0-9]+)
+**     cbls    x0, 41, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -645,8 +569,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_slt_42:
-**     cmp     x0, 41
-**     bgt     .L([0-9]+)
+**     cbgt    x0, 41, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -654,8 +577,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_sle_42:
-**     cmp     x0, 42
-**     bgt     .L([0-9]+)
+**     cbgt    x0, 42, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -663,8 +585,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_sgt_42:
-**     cmp     x0, 42
-**     ble     .L([0-9]+)
+**     cble    x0, 42, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -672,8 +593,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_sge_42:
-**     cmp     x0, 41
-**     ble     .L([0-9]+)
+**     cble    x0, 41, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -681,8 +601,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_eq_0:
-**     tst     w0, 255
-**     bne     .L([0-9]+)
+**     cbbne   w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -690,8 +609,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_ne_0:
-**     tst     w0, 255
-**     beq     .L([0-9]+)
+**     cbbeq   w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -704,8 +622,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_ule_0:
-**     tst     w0, 255
-**     bne     .L([0-9]+)
+**     cbbne   w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -713,8 +630,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_ugt_0:
-**     tst     w0, 255
-**     beq     .L([0-9]+)
+**     cbbeq   w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -727,7 +643,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i8_x0_slt_0:
-**     tbnz    w0, 7, .L([0-9]+)
+**     cbblt   w0, wzr, .L([0-9]+)
 **     b       not_taken
 ** .L\1:
 **     b       taken
@@ -735,27 +651,23 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i8_x0_sle_0:
-**     sxtb    (w[0-9]+), w0
-**     cmp     \1, 0
-**     ble     .L([0-9]+)
+**     cbble   w0, wzr, .L([0-9]+)
 **     b       not_taken
-** .L\2:
+** .L\1:
 **     b       taken
 */
 
 /*
 ** i8_x0_sgt_0:
-**     sxtb    (w[0-9]+), w0
-**     cmp     \1, 0
-**     ble     .L([0-9]+)
+**     cbble   w0, wzr, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** i8_x0_sge_0:
-**     tbnz    w0, 7, .L([0-9]+)
+**     cbblt   w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -763,8 +675,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_eq_0:
-**     tst     w0, 65535
-**     bne     .L([0-9]+)
+**     cbhne   w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -772,8 +683,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ne_0:
-**     tst     w0, 65535
-**     beq     .L([0-9]+)
+**     cbheq   w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -786,8 +696,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ule_0:
-**     tst     w0, 65535
-**     bne     .L([0-9]+)
+**     cbhne   w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -795,8 +704,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ugt_0:
-**     tst     w0, 65535
-**     beq     .L([0-9]+)
+**     cbheq   w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -809,7 +717,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_slt_0:
-**     tbnz    w0, 15, .L([0-9]+)
+**     cbhlt   w0, wzr, .L([0-9]+)
 **     b       not_taken
 ** .L\1:
 **     b       taken
@@ -817,27 +725,23 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_sle_0:
-**     sxth    (w[0-9]+), w0
-**     cmp     \1, 0
-**     ble     .L([0-9]+)
+**     cbhle   w0, wzr, .L([0-9]+)
 **     b       not_taken
-** .L\2:
+** .L\1:
 **     b       taken
 */
 
 /*
 ** i16_x0_sgt_0:
-**     sxth    (w[0-9]+), w0
-**     cmp     \1, 0
-**     ble     .L([0-9]+)
+**     cbhle   w0, wzr, .L([0-9]+)
 **     b       taken
-** .L\2:
+** .L\1:
 **     b       not_taken
 */
 
 /*
 ** i16_x0_sge_0:
-**     tbnz    w0, 15, .L([0-9]+)
+**     cbhlt   w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -845,7 +749,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_eq_0:
-**     cbnz    w0, .L([0-9]+)
+**     cbne    w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -853,7 +757,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ne_0:
-**     cbz     w0, .L([0-9]+)
+**     cbeq    w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -866,7 +770,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ule_0:
-**     cbnz    w0, .L([0-9]+)
+**     cbne    w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -874,7 +778,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ugt_0:
-**     cbz     w0, .L([0-9]+)
+**     cbeq    w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -895,8 +799,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_sle_0:
-**     cmp     w0, 0
-**     ble     .L([0-9]+)
+**     cble    w0, wzr, .L([0-9]+)
 **     b       not_taken
 ** .L\1:
 **     b       taken
@@ -904,8 +807,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_sgt_0:
-**     cmp     w0, 0
-**     ble     .L([0-9]+)
+**     cble    w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -913,7 +815,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_sge_0:
-**     tbnz    w0, #31, .L([0-9]+)
+**     cblt    w0, wzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -921,7 +823,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_eq_0:
-**     cbnz    x0, .L([0-9]+)
+**     cbne    x0, xzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -929,7 +831,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ne_0:
-**     cbz     x0, .L([0-9]+)
+**     cbeq    x0, xzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -942,7 +844,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ule_0:
-**     cbnz    x0, .L([0-9]+)
+**     cbne    x0, xzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -950,7 +852,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ugt_0:
-**     cbz     x0, .L([0-9]+)
+**     cbeq    x0, xzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -971,8 +873,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_sle_0:
-**     cmp     x0, 0
-**     ble     .L([0-9]+)
+**     cble    x0, xzr, .L([0-9]+)
 **     b       not_taken
 ** .L\1:
 **     b       taken
@@ -980,8 +881,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_sgt_0:
-**     cmp     x0, 0
-**     ble     .L([0-9]+)
+**     cble    x0, xzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -989,7 +889,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_sge_0:
-**     tbnz    x0, #63, .L([0-9]+)
+**     cblt    x0, xzr, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -997,9 +897,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_eq_42:
-**     and     (w[0-9]+), w0, 255
-**     cmp     \1, 42
-**     beq     .L([0-9]+)
+**     mov     w([0-9]+), 42
+**     cbbeq   w0, w\1, .L([0-9]+)
 **     b       not_taken
 ** .L\2:
 **     b       taken
@@ -1007,9 +906,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_ne_42:
-**     and     (w[0-9]+), w0, 255
-**     cmp     \1, 42
-**     beq     .L([0-9]+)
+**     mov     w([0-9]+), 42
+**     cbbeq   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1017,9 +915,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_ult_42:
-**     and     (w[0-9]+), w0, 255
-**     cmp     \1, 41
-**     bhi     .L([0-9]+)
+**     mov     w([0-9]+), 41
+**     cbbhi   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1027,9 +924,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_ule_42:
-**     and     (w[0-9]+), w0, 255
-**     cmp     \1, 42
-**     bhi     .L([0-9]+)
+**     mov     w([0-9]+), 42
+**     cbbhi   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1037,9 +933,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_ugt_42:
-**     and     (w[0-9]+), w0, 255
-**     cmp     \1, 42
-**     bls     .L([0-9]+)
+**     mov     w([0-9]+), 42
+**     cbbls   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1047,9 +942,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_uge_42:
-**     and     (w[0-9]+), w0, 255
-**     cmp     \1, 41
-**     bls     .L([0-9]+)
+**     mov     w([0-9]+), 41
+**     cbbls   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1057,9 +951,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i8_x0_slt_42:
-**     sxtb    (w[0-9]+), w0
-**     cmp     \1, 41
-**     bgt     .L([0-9]+)
+**     mov     w([0-9]+), 41
+**     cbbgt   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1067,9 +960,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i8_x0_sle_42:
-**     sxtb    (w[0-9]+), w0
-**     cmp     \1, 42
-**     bgt     .L([0-9]+)
+**     mov     w([0-9]+), 42
+**     cbbgt   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1077,9 +969,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i8_x0_sgt_42:
-**     sxtb    (w[0-9]+), w0
-**     cmp     \1, 42
-**     ble     .L([0-9]+)
+**     mov     w([0-9]+), 42
+**     cbble   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1087,9 +978,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i8_x0_sge_42:
-**     sxtb    (w[0-9]+), w0
-**     cmp     \1, 41
-**     ble     .L([0-9]+)
+**     mov     w([0-9]+), 41
+**     cbble   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1097,9 +987,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_eq_42:
-**     and     (w[0-9]+), w0, 65535
-**     cmp     \1, 42
-**     beq     .L([0-9]+)
+**     mov     w([0-9]+), 42
+**     cbheq   w0, w\1, .L([0-9]+)
 **     b       not_taken
 ** .L\2:
 **     b       taken
@@ -1107,9 +996,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ne_42:
-**     and     (w[0-9]+), w0, 65535
-**     cmp     \1, 42
-**     beq     .L([0-9]+)
+**     mov     w([0-9]+), 42
+**     cbheq   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1117,9 +1005,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ult_42:
-**     and     (w[0-9]+), w0, 65535
-**     cmp     \1, 41
-**     bhi     .L([0-9]+)
+**     mov     w([0-9]+), 41
+**     cbhhi   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1127,9 +1014,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ule_42:
-**     and     (w[0-9]+), w0, 65535
-**     cmp     \1, 42
-**     bhi     .L([0-9]+)
+**     mov     w([0-9]+), 42
+**     cbhhi   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1137,9 +1023,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ugt_42:
-**     and     (w[0-9]+), w0, 65535
-**     cmp     \1, 42
-**     bls     .L([0-9]+)
+**     mov     w([0-9]+), 42
+**     cbhls   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1147,9 +1032,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_uge_42:
-**     and     (w[0-9]+), w0, 65535
-**     cmp     \1, 41
-**     bls     .L([0-9]+)
+**     mov     w([0-9]+), 41
+**     cbhls   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1157,9 +1041,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_slt_42:
-**     sxth    (w[0-9]+), w0
-**     cmp     \1, 41
-**     bgt     .L([0-9]+)
+**     mov     w([0-9]+), 41
+**     cbhgt   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1167,9 +1050,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_sle_42:
-**     sxth    (w[0-9]+), w0
-**     cmp     \1, 42
-**     bgt     .L([0-9]+)
+**     mov     w([0-9]+), 42
+**     cbhgt   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1177,9 +1059,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_sgt_42:
-**     sxth    (w[0-9]+), w0
-**     cmp     \1, 42
-**     ble     .L([0-9]+)
+**     mov     w([0-9]+), 42
+**     cbhle   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1187,9 +1068,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_sge_42:
-**     sxth    (w[0-9]+), w0
-**     cmp     \1, 41
-**     ble     .L([0-9]+)
+**     mov     w([0-9]+), 41
+**     cbhle   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1197,9 +1077,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_eq_64:
-**     and     (w[0-9]+), w0, 255
-**     cmp     \1, 64
-**     beq     .L([0-9]+)
+**     mov     w([0-9]+), 64
+**     cbbeq   w0, w\1, .L([0-9]+)
 **     b       not_taken
 ** .L\2:
 **     b       taken
@@ -1207,9 +1086,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_ne_64:
-**     and     (w[0-9]+), w0, 255
-**     cmp     \1, 64
-**     beq     .L([0-9]+)
+**     mov     w([0-9]+), 64
+**     cbbeq   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1217,9 +1095,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_ult_64:
-**     and     (w[0-9]+), w0, 255
-**     cmp     \1, 63
-**     bhi     .L([0-9]+)
+**     mov     w([0-9]+), 63
+**     cbbhi   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1227,9 +1104,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_ule_64:
-**     and     (w[0-9]+), w0, 255
-**     cmp     \1, 64
-**     bhi     .L([0-9]+)
+**     mov     w([0-9]+), 64
+**     cbbhi   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1237,9 +1113,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_ugt_64:
-**     and     (w[0-9]+), w0, 255
-**     cmp     \1, 64
-**     bls     .L([0-9]+)
+**     mov     w([0-9]+), 64
+**     cbbls   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1247,9 +1122,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u8_x0_uge_64:
-**     and     (w[0-9]+), w0, 255
-**     cmp     \1, 63
-**     bls     .L([0-9]+)
+**     mov     w([0-9]+), 63
+**     cbbls   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1257,9 +1131,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i8_x0_slt_64:
-**     sxtb    (w[0-9]+), w0
-**     cmp     \1, 63
-**     bgt     .L([0-9]+)
+**     mov     w([0-9]+), 63
+**     cbbgt   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1267,9 +1140,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i8_x0_sle_64:
-**     sxtb    (w[0-9]+), w0
-**     cmp     \1, 64
-**     bgt     .L([0-9]+)
+**     mov     w([0-9]+), 64
+**     cbbgt   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1277,9 +1149,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i8_x0_sgt_64:
-**     sxtb    (w[0-9]+), w0
-**     cmp     \1, 64
-**     ble     .L([0-9]+)
+**     mov     w([0-9]+), 64
+**     cbble   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1287,9 +1158,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i8_x0_sge_64:
-**     sxtb    (w[0-9]+), w0
-**     cmp     \1, 63
-**     ble     .L([0-9]+)
+**     mov     w([0-9]+), 63
+**     cbble   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1297,9 +1167,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_eq_64:
-**     and     (w[0-9]+), w0, 65535
-**     cmp     \1, 64
-**     beq     .L([0-9]+)
+**     mov     w([0-9]+), 64
+**     cbheq   w0, w\1, .L([0-9]+)
 **     b       not_taken
 ** .L\2:
 **     b       taken
@@ -1307,9 +1176,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ne_64:
-**     and     (w[0-9]+), w0, 65535
-**     cmp     \1, 64
-**     beq     .L([0-9]+)
+**     mov     w([0-9]+), 64
+**     cbheq   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1317,9 +1185,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ult_64:
-**     and     (w[0-9]+), w0, 65535
-**     cmp     \1, 63
-**     bhi     .L([0-9]+)
+**     mov     w([0-9]+), 63
+**     cbhhi   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1327,9 +1194,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ule_64:
-**     and     (w[0-9]+), w0, 65535
-**     cmp     \1, 64
-**     bhi     .L([0-9]+)
+**     mov     w([0-9]+), 64
+**     cbhhi   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1337,9 +1203,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ugt_64:
-**     and     (w[0-9]+), w0, 65535
-**     cmp     \1, 64
-**     bls     .L([0-9]+)
+**     mov     w([0-9]+), 64
+**     cbhls   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1347,9 +1212,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_uge_64:
-**     and     (w[0-9]+), w0, 65535
-**     cmp     \1, 63
-**     bls     .L([0-9]+)
+**     mov     w([0-9]+), 63
+**     cbhls   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1357,9 +1221,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_slt_64:
-**     sxth    (w[0-9]+), w0
-**     cmp     \1, 63
-**     bgt     .L([0-9]+)
+**     mov     w([0-9]+), 63
+**     cbhgt   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1367,9 +1230,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_sle_64:
-**     sxth    (w[0-9]+), w0
-**     cmp     \1, 64
-**     bgt     .L([0-9]+)
+**     mov     w([0-9]+), 64
+**     cbhgt   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1377,9 +1239,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_sgt_64:
-**     sxth    (w[0-9]+), w0
-**     cmp     \1, 64
-**     ble     .L([0-9]+)
+**     mov     w([0-9]+), 64
+**     cbhle   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1387,9 +1248,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_sge_64:
-**     sxth    (w[0-9]+), w0
-**     cmp     \1, 63
-**     ble     .L([0-9]+)
+**     mov     w([0-9]+), 63
+**     cbhle   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1415,8 +1275,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ult_64:
-**     cmp     w0, 63
-**     bhi     .L([0-9]+)
+**     cbhi    w0, 63, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -1451,8 +1310,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_slt_64:
-**     cmp     w0, 63
-**     bgt     .L([0-9]+)
+**     cbgt    w0, 63, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -1505,8 +1363,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ult_64:
-**     cmp     x0, 63
-**     bhi     .L([0-9]+)
+**     cbhi    x0, 63, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -1541,8 +1398,7 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_slt_64:
-**     cmp     x0, 63
-**     bgt     .L([0-9]+)
+**     cbgt    x0, 63, .L([0-9]+)
 **     b       taken
 ** .L\1:
 **     b       not_taken
@@ -1577,9 +1433,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_eq_4098:
-**     mov     (w[0-9]+), 4098
-**     cmp     \1, w0, uxth
-**     beq     .L([0-9]+)
+**     mov     w([0-9]+), 4098
+**     cbheq   w0, w\1, .L([0-9]+)
 **     b       not_taken
 ** .L\2:
 **     b       taken
@@ -1587,9 +1442,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ne_4098:
-**     mov     (w[0-9]+), 4098
-**     cmp     \1, w0, uxth
-**     beq     .L([0-9]+)
+**     mov     w([0-9]+), 4098
+**     cbheq   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1597,9 +1451,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ult_4098:
-**     mov     (w[0-9]+), 4097
-**     cmp     \1, w0, uxth
-**     bcc     .L([0-9]+)
+**     mov     w([0-9]+), 4097
+**     cbhhi   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1607,9 +1460,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ule_4098:
-**     mov     (w[0-9]+), 4098
-**     cmp     \1, w0, uxth
-**     bcc     .L([0-9]+)
+**     mov     w([0-9]+), 4098
+**     cbhhi   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1617,9 +1469,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_ugt_4098:
-**     mov     (w[0-9]+), 4098
-**     cmp     \1, w0, uxth
-**     bcs     .L([0-9]+)
+**     mov     w([0-9]+), 4098
+**     cbhls   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1627,9 +1478,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u16_x0_uge_4098:
-**     mov     (w[0-9]+), 4097
-**     cmp     \1, w0, uxth
-**     bcs     .L([0-9]+)
+**     mov     w([0-9]+), 4097
+**     cbhls   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1637,9 +1487,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_slt_4098:
-**     mov     (w[0-9]+), 4097
-**     cmp     \1, w0, sxth
-**     blt     .L([0-9]+)
+**     mov     w([0-9]+), 4097
+**     cbhgt   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1647,9 +1496,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_sle_4098:
-**     mov     (w[0-9]+), 4098
-**     cmp     \1, w0, sxth
-**     blt     .L([0-9]+)
+**     mov     w([0-9]+), 4098
+**     cbhgt   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1657,9 +1505,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_sgt_4098:
-**     mov     (w[0-9]+), 4098
-**     cmp     \1, w0, sxth
-**     bge     .L([0-9]+)
+**     mov     w([0-9]+), 4098
+**     cbhle   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1667,9 +1514,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i16_x0_sge_4098:
-**     mov     (w[0-9]+), 4097
-**     cmp     \1, w0, sxth
-**     bge     .L([0-9]+)
+**     mov     w([0-9]+), 4097
+**     cbhle   w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1677,9 +1523,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_eq_4098:
-**     mov     (w[0-9]+), 4098
-**     cmp     w0, \1
-**     beq     .L([0-9]+)
+**     mov     w([0-9]+), 4098
+**     cbeq    w0, w\1, .L([0-9]+)
 **     b       not_taken
 ** .L\2:
 **     b       taken
@@ -1687,9 +1532,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ne_4098:
-**     mov     (w[0-9]+), 4098
-**     cmp     w0, \1
-**     beq     .L([0-9]+)
+**     mov     w([0-9]+), 4098
+**     cbeq    w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1697,9 +1541,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ult_4098:
-**     mov     (w[0-9]+), 4097
-**     cmp     w0, \1
-**     bhi     .L([0-9]+)
+**     mov     w([0-9]+), 4097
+**     cbhi    w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1707,9 +1550,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ule_4098:
-**     mov     (w[0-9]+), 4098
-**     cmp     w0, \1
-**     bhi     .L([0-9]+)
+**     mov     w([0-9]+), 4098
+**     cbhi    w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1717,9 +1559,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_ugt_4098:
-**     mov     (w[0-9]+), 4098
-**     cmp     w0, \1
-**     bls     .L([0-9]+)
+**     mov     w([0-9]+), 4098
+**     cbls    w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1727,9 +1568,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u32_x0_uge_4098:
-**     mov     (w[0-9]+), 4097
-**     cmp     w0, \1
-**     bls     .L([0-9]+)
+**     mov     w([0-9]+), 4097
+**     cbls    w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1737,9 +1577,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_slt_4098:
-**     mov     (w[0-9]+), 4097
-**     cmp     w0, \1
-**     bgt     .L([0-9]+)
+**     mov     w([0-9]+), 4097
+**     cbgt    w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1747,9 +1586,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_sle_4098:
-**     mov     (w[0-9]+), 4098
-**     cmp     w0, \1
-**     bgt     .L([0-9]+)
+**     mov     w([0-9]+), 4098
+**     cbgt    w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1757,9 +1595,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_sgt_4098:
-**     mov     (w[0-9]+), 4098
-**     cmp     w0, \1
-**     ble     .L([0-9]+)
+**     mov     w([0-9]+), 4098
+**     cble    w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1767,9 +1604,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i32_x0_sge_4098:
-**     mov     (w[0-9]+), 4097
-**     cmp     w0, \1
-**     ble     .L([0-9]+)
+**     mov     w([0-9]+), 4097
+**     cble    w0, w\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1777,9 +1613,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_eq_4098:
-**     mov     (x[0-9]+), 4098
-**     cmp     x0, \1
-**     beq     .L([0-9]+)
+**     mov     x([0-9]+), 4098
+**     cbeq    x0, x\1, .L([0-9]+)
 **     b       not_taken
 ** .L\2:
 **     b       taken
@@ -1787,9 +1622,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ne_4098:
-**     mov     (x[0-9]+), 4098
-**     cmp     x0, \1
-**     beq     .L([0-9]+)
+**     mov     x([0-9]+), 4098
+**     cbeq    x0, x\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1797,9 +1631,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ult_4098:
-**     mov     (x[0-9]+), 4097
-**     cmp     x0, \1
-**     bhi     .L([0-9]+)
+**     mov     x([0-9]+), 4097
+**     cbhi    x0, x\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1807,9 +1640,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ule_4098:
-**     mov     (x[0-9]+), 4098
-**     cmp     x0, \1
-**     bhi     .L([0-9]+)
+**     mov     x([0-9]+), 4098
+**     cbhi    x0, x\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1817,9 +1649,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_ugt_4098:
-**     mov     (x[0-9]+), 4098
-**     cmp     x0, \1
-**     bls     .L([0-9]+)
+**     mov     x([0-9]+), 4098
+**     cbls    x0, x\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1827,9 +1658,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** u64_x0_uge_4098:
-**     mov     (x[0-9]+), 4097
-**     cmp     x0, \1
-**     bls     .L([0-9]+)
+**     mov     x([0-9]+), 4097
+**     cbls    x0, x\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1837,9 +1667,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_slt_4098:
-**     mov     (x[0-9]+), 4097
-**     cmp     x0, \1
-**     bgt     .L([0-9]+)
+**     mov     x([0-9]+), 4097
+**     cbgt    x0, x\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1847,9 +1676,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_sle_4098:
-**     mov     (x[0-9]+), 4098
-**     cmp     x0, \1
-**     bgt     .L([0-9]+)
+**     mov     x([0-9]+), 4098
+**     cbgt    x0, x\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1857,9 +1685,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_sgt_4098:
-**     mov     (x[0-9]+), 4098
-**     cmp     x0, \1
-**     ble     .L([0-9]+)
+**     mov     x([0-9]+), 4098
+**     cble    x0, x\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1867,9 +1694,8 @@ FAR_BRANCH(u64, 42);
 
 /*
 ** i64_x0_sge_4098:
-**     mov     (x[0-9]+), 4097
-**     cmp     x0, \1
-**     ble     .L([0-9]+)
+**     mov     x([0-9]+), 4097
+**     cble    x0, x\1, .L([0-9]+)
 **     b       taken
 ** .L\2:
 **     b       not_taken
@@ -1878,10 +1704,10 @@ FAR_BRANCH(u64, 42);
 /*
 ** far_branch_u8_x0_eq_x1:
 **     sub     sp, sp, #16
-**     and     w([0-9]+), w1, 255
 **     str     wzr, \[sp, 12\]
-**     cmp     w\1, w0, uxtb
-**     bne     .L([0-9]+)
+**     cbbeq   w0|w1, w1|w0, .L([0-9]+)
+**     b       .L([0-9]+)
+** .L\1:
 **     str     wzr, \[sp, 12\]
 **     ...
 **     str     wzr, \[sp, 12\]
@@ -1889,13 +1715,14 @@ FAR_BRANCH(u64, 42);
 **     add     sp, sp, 16
 **     b       taken
 */
+
 /*
 ** far_branch_u16_x0_eq_x1:
 **     sub     sp, sp, #16
-**     and     w([0-9]+), w1, 65535
 **     str     wzr, \[sp, 12\]
-**     cmp     w\1, w0, uxth
-**     bne     .L([0-9]+)
+**     cbheq   w0|w1, w1|w0, .L([0-9]+)
+**     b       .L([0-9]+)
+** .L\1:
 **     str     wzr, \[sp, 12\]
 **     ...
 **     str     wzr, \[sp, 12\]
@@ -1908,12 +1735,13 @@ FAR_BRANCH(u64, 42);
 ** far_branch_u32_x0_eq_x1:
 **     sub     sp, sp, #16
 **     str     wzr, \[sp, 12\]
-**     cmp     w0|w1, w1|w0
-**     bne     .L([0-9]+)
+**     cbeq    w0, w1, .L([0-9]+)
+**     b       .L([0-9]+)
+** .L\1:
 **     str     wzr, \[sp, 12\]
 **     ...
 **     str     wzr, \[sp, 12\]
-** .L\1:
+** .L\2:
 **     add     sp, sp, 16
 **     b       taken
 */
@@ -1922,12 +1750,13 @@ FAR_BRANCH(u64, 42);
 ** far_branch_u64_x0_eq_x1:
 **     sub     sp, sp, #16
 **     str     wzr, \[sp, 12\]
-**     cmp     x0|x1, x1|x0
-**     bne     .L([0-9]+)
+**     cbeq    x0, x1, .L([0-9]+)
+**     b       .L([0-9]+)
+** .L\1:
 **     str     wzr, \[sp, 12\]
 **     ...
 **     str     wzr, \[sp, 12\]
-** .L\1:
+** .L\2:
 **     add     sp, sp, 16
 **     b       taken
 */
@@ -1935,14 +1764,15 @@ FAR_BRANCH(u64, 42);
 /*
 ** far_branch_u8_x0_eq_42:
 **     sub     sp, sp, #16
-**     and     w([0-9]+), w0, 255
+**     mov     w([0-9]+), 42
 **     str     wzr, \[sp, 12\]
-**     cmp     w\1, 42
-**     bne     .L([0-9]+)
+**     cbbeq   w0, w\1, .L([0-9]+)
+**     b       .L([0-9]+)
+** .L\2:
 **     str     wzr, \[sp, 12\]
 **     ...
 **     str     wzr, \[sp, 12\]
-** .L\2:
+** .L\3:
 **     add     sp, sp, 16
 **     b       taken
 */
@@ -1950,14 +1780,15 @@ FAR_BRANCH(u64, 42);
 /*
 ** far_branch_u16_x0_eq_42:
 **     sub     sp, sp, #16
-**     and     w([0-9]+), w0, 65535
+**     mov     w([0-9]+), 42
 **     str     wzr, \[sp, 12\]
-**     cmp     w\1, 42
-**     bne     .L([0-9]+)
+**     cbheq   w0, w\1, .L([0-9]+)
+**     b       .L([0-9]+)
+** .L\2:
 **     str     wzr, \[sp, 12\]
 **     ...
 **     str     wzr, \[sp, 12\]
-** .L\2:
+** .L\3:
 **     add     sp, sp, 16
 **     b       taken
 */
@@ -1966,12 +1797,13 @@ FAR_BRANCH(u64, 42);
 ** far_branch_u32_x0_eq_42:
 **     sub     sp, sp, #16
 **     str     wzr, \[sp, 12\]
-**     cmp     w0, 42
-**     bne     .L([0-9]+)
+**     cbeq    w0, 42, .L([0-9]+)
+**     b       .L([0-9]+)
+** .L\1:
 **     str     wzr, \[sp, 12\]
 **     ...
 **     str     wzr, \[sp, 12\]
-** .L\1:
+** .L\2:
 **     add     sp, sp, 16
 **     b       taken
 */
@@ -1980,12 +1812,13 @@ FAR_BRANCH(u64, 42);
 ** far_branch_u64_x0_eq_42:
 **     sub     sp, sp, #16
 **     str     wzr, \[sp, 12\]
-**     cmp     x0, 42
-**     bne     .L([0-9]+)
+**     cbeq    x0, 42, .L([0-9]+)
+**     b       .L([0-9]+)
+** .L\1:
 **     str     wzr, \[sp, 12\]
 **     ...
 **     str     wzr, \[sp, 12\]
-** .L\1:
+** .L\2:
 **     add     sp, sp, 16
 **     b       taken
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr.h 
b/gcc/testsuite/gcc.target/aarch64/cmpbr.h
new file mode 100644
index 00000000000..b173b7944e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/cmpbr.h
@@ -0,0 +1,16 @@
+#include <stdint.h>
+
+typedef uint8_t u8;
+typedef int8_t i8;
+
+typedef uint16_t u16;
+typedef int16_t i16;
+
+typedef uint32_t u32;
+typedef int32_t i32;
+
+typedef uint64_t u64;
+typedef int64_t i64;
+
+int taken();
+int not_taken();
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_store.c 
b/gcc/testsuite/gcc.target/aarch64/sve/mask_store.c
new file mode 100644
index 00000000000..aed1f1748b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_store.c
@@ -0,0 +1,28 @@
+// { dg-do compile }
+// { dg-options "-march=armv8-a+sve -msve-vector-bits=512 -O3" }
+// { dg-final { check-function-bodies "**" "" "" } }
+
+typedef struct Array {
+    int elems[3];
+} Array;
+
+int loop(Array **pp, int len, int idx, int reset) {
+    int nRet = 0;
+
+    #pragma GCC unroll 0
+    for (int i = 0; i < len; i++) {
+        Array *p = pp[i];
+        if (p) {
+            nRet += p->elems[idx];
+
+            if (reset) {
+                p->elems[idx] = 0;
+            }
+        }
+    }
+
+    return nRet;
+}
+
+// { dg-final { scan-assembler-times {ld1w\tz[0-9]+\.d, p[0-7]/z} 1 } }
+// { dg-final { scan-assembler-times {add\tz[0-9]+\.s, p[0-7]/m}  1 } }
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sqlite.c 
b/gcc/testsuite/gcc.target/aarch64/sve/sqlite.c
new file mode 100644
index 00000000000..25e11f6e4eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/sqlite.c
@@ -0,0 +1,205 @@
+// { dg-do compile }
+// { dg-options "-march=armv8-a+sve -msve-vector-bits=512 -O3" }
+// { dg-final { check-function-bodies "**" "" "" { target *-*-* } {\.L[0-9]+} 
} }
+
+#define SQLITE_DBSTATUS_CACHE_HIT 7
+
+typedef short i16;
+typedef long i64;
+typedef i64 sqlite3_int64;
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+
+typedef struct Bitvec Bitvec;
+typedef struct BtCursor BtCursor;
+typedef struct BtLock BtLock;
+typedef struct Btree Btree;
+typedef struct BtShared BtShared;
+typedef struct Db Db;
+typedef struct DbPage DbPage;
+typedef struct MemPage MemPage;
+typedef struct Pager Pager;
+typedef struct PagerSavepoint PagerSavepoint;
+typedef struct PCache PCache;
+typedef struct PgHdr PgHdr;
+typedef struct Schema Schema;
+typedef struct sqlite3 sqlite3;
+typedef struct sqlite3_backup sqlite3_backup;
+typedef struct sqlite3_file sqlite3_file;
+typedef struct sqlite3_mutex sqlite3_mutex;
+typedef struct sqlite3_vfs sqlite3_vfs;
+typedef struct Wal Wal;
+
+struct BtLock {
+  Btree *pBtree;
+  u32 iTable;
+  u8 eLock;
+  BtLock *pNext;
+};
+
+struct Btree {
+  sqlite3 *db;
+  BtShared *pBtShared;
+  u8 inTrans;
+  u8 sharable;
+  u8 locked;
+  u8 hasIncrblobCur;
+  int wantToLock;
+  int nBackup;
+  u32 iDataVersion;
+  Btree *pNext;
+  Btree *pPrev;
+  BtLock lock;
+};
+
+struct BtShared {
+  Pager *pPager;
+  sqlite3 *db;
+  BtCursor *pCursor;
+  MemPage *pPage1;
+  u8 openFlags;
+  u8 autoVacuum;
+  u8 incrVacuum;
+  u8 bDoTruncate;
+  u8 inTransaction;
+  u8 max1bytePayload;
+  u8 optimalReserve;
+  u16 btsFlags;
+  u16 maxLocal;
+  u16 minLocal;
+  u16 maxLeaf;
+  u16 minLeaf;
+  u32 pageSize;
+  u32 usableSize;
+  int nTransaction;
+  u32 nPage;
+  void *pSchema;
+  void (*xFreeSchema)(void *);
+  sqlite3_mutex *mutex;
+  Bitvec *pHasContent;
+  int nRef;
+  BtShared *pNext;
+  BtLock *pLock;
+  Btree *pWriter;
+  u8 *pTmpSpace;
+};
+
+struct Db {
+  char *zDbSName;
+  Btree *pBt;
+  u8 safety_level;
+  u8 bSyncSet;
+  Schema *pSchema;
+};
+
+struct Pager {
+  sqlite3_vfs *pVfs;
+  u8 exclusiveMode;
+  u8 journalMode;
+  u8 useJournal;
+  u8 noSync;
+  u8 fullSync;
+  u8 extraSync;
+  u8 syncFlags;
+  u8 walSyncFlags;
+  u8 tempFile;
+  u8 noLock;
+  u8 readOnly;
+  u8 memDb;
+  u8 eState;
+  u8 eLock;
+  u8 changeCountDone;
+  u8 setMaster;
+  u8 doNotSpill;
+  u8 subjInMemory;
+  u8 bUseFetch;
+  u8 hasHeldSharedLock;
+  u32 dbSize;
+  u32 dbOrigSize;
+  u32 dbFileSize;
+  u32 dbHintSize;
+  int errCode;
+  int nRec;
+  u32 cksumInit;
+  u32 nSubRec;
+  Bitvec *pInJournal;
+  sqlite3_file *fd;
+  sqlite3_file *jfd;
+  sqlite3_file *sjfd;
+  i64 journalOff;
+  i64 journalHdr;
+  sqlite3_backup *pBackup;
+  PagerSavepoint *aSavepoint;
+  int nSavepoint;
+  u32 iDataVersion;
+  char dbFileVers[16];
+  int nMmapOut;
+  sqlite3_int64 szMmap;
+  PgHdr *pMmapFreelist;
+  u16 nExtra;
+  i16 nReserve;
+  u32 vfsFlags;
+  u32 sectorSize;
+  int pageSize;
+  u32 mxPgno;
+  i64 journalSizeLimit;
+  char *zFilename;
+  char *zJournal;
+  int (*xBusyHandler)(void *);
+  void *pBusyHandlerArg;
+  int aStat[3];
+  void *pCodec;
+  char *pTmpSpace;
+  PCache *pPCache;
+  Wal *pWal;
+  char *zWal;
+};
+
+struct sqlite3 {
+  Db *dbs;
+  int len;
+};
+
+
+#if 0
+int loop1(sqlite3 sqlite, int eStat, int resetFlag) {
+  int nRet = 0;
+
+  #pragma GCC unroll 0
+  for (int i = 0; i < sqlite.len; i++) {
+    Db *db = &sqlite.dbs[i];
+    if (db->pBt) {
+      if (resetFlag) {
+        db->pBt->pBtShared->pPager->aStat[eStat - SQLITE_DBSTATUS_CACHE_HIT] = 
0;
+      }
+    }
+  }
+
+  return nRet;
+}
+#endif
+
+int loop2(sqlite3 *sqlite, int eStat, int resetFlag) {
+  int nRet = 0;
+
+  int len_before = sqlite->len;
+
+  #pragma GCC unroll 0
+  for (int i = 0; i < sqlite->len; i++) {
+    Db *db = &sqlite->dbs[i];
+    if (db->pBt) {
+      if (resetFlag ) {
+        db->pBt->pBtShared->pPager->aStat[eStat - SQLITE_DBSTATUS_CACHE_HIT] = 
0;
+      }
+    }
+  }
+
+
+  int len_after = sqlite->len;
+
+  return len_before == len_after;
+}
+
+// { dg-final { scan-assembler-times {ld1w\tz[0-9]+\.d, p[0-7]/z} 1 } }
+// { dg-final { scan-assembler-times {add\tz[0-9]+\.s, p[0-7]/m}  1 } }
-- 
2.48.1

Reply via email to