As Jakub notes in the PR, the representation for add_compare and sub_compare were wrong. And several of the add_carryin patterns were duplicates.

This adds a CC_Cmode for which only the Carry bit is valid.

The patch appears to generate moderately decent code. For gcc7 we should look into why we'll prefer to mark an output REG_UNUSED instead of matching the pattern with that output removed. This results in continuing to use adds (though simplifying adc) after we've proved that there will be no carry into the high part of an adds+adc pair.

Ok?


r~
        * config/aarch64/aarch64-modes.def (CC_Cmode): New.
        * config/aarch64/aarch64.c (aarch64_select_cc_mode): Add check for
        use of CC_Cmode.
        (aarch64_get_condition_code_1): Handle CC_Cmode.
        * config/aarch64/aarch64.md (addti3): Use adddi3_compare1.
        (*add<mode>3_compare1_cconly): New.
        (add<mode>3_compare1): New.
        (add<mode>3_carryin, *addsi3_carryin_uxtw): Sort compare operand
        to be first.  Use aarch64_carry_operation.
        (*add<mode>3_carryin_alt1, *addsi3_carryin_alt1_uxtw): Remove.
        (*add<mode>3_carryin_alt2, *addsi3_carryin_alt2_uxtw): Remove.
        (*add<mode>3_carryin_alt3, *addsi3_carryin_alt3_uxtw): Remove.
        (subti3): Use subdi3_compare1.
        (*sub<mode>3_compare0): Rename from sub<mode>3_compare0.
        (sub<mode>3_compare1): New.
        (*sub<mode>3_carryin0, *subsi3_carryin_uxtw): New.
        (*sub<mode>3_carryin): Use aarch64_borrow_operation.
        (*subsi3_carryin_uxtw): Likewise.
        (*ngc<mode>, *ngcsi_uxtw): Likewise.
        (*sub<mode>3_carryin_alt, *subsi3_carryin_alt_uxtw): New.
        * config/aarch64/iterators.md (DWI): New.
        * config/aarch64/predicates.md (aarch64_carry_operation): New.
        (aarch64_borrow_operation): New.

testsuite/
        * testsuite/gcc.target/aarch64/ccmp_1.c: Accept wzr for zero.
        * testsuite/gcc.target/aarch64/tst_3.c: Accept ands for tst.



diff --git a/gcc/config/aarch64/aarch64-modes.def 
b/gcc/config/aarch64/aarch64-modes.def
index 3fab205..7de0b3f 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -25,6 +25,7 @@ CC_MODE (CC_ZESWP); /* zero-extend LHS (but swap to make it 
RHS).  */
 CC_MODE (CC_SESWP); /* sign-extend LHS (but swap to make it RHS).  */
 CC_MODE (CC_NZ);    /* Only N and Z bits of condition flags are valid.  */
 CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
+CC_MODE (CC_C);     /* Only C bit of condition flags is valid.  */
 
 /* Half-precision floating point for __fp16.  */
 FLOAT_MODE (HF, 2, 0);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 38c7443..0c18ab2 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4185,6 +4185,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
     return ((code == GT || code == GE || code == LE || code == LT)
            ? CC_SESWPmode : CC_ZESWPmode);
 
+  /* A test for unsigned overflow.  */
+  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
+      && code == NE
+      && GET_CODE (x) == PLUS
+      && GET_CODE (y) == ZERO_EXTEND)
+    return CC_Cmode;
+
   /* For everything else, return CCmode.  */
   return CCmode;
 }
@@ -4284,6 +4291,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, 
enum rtx_code comp_code)
        }
       break;
 
+    case CC_Cmode:
+      switch (comp_code)
+       {
+       case NE: return AARCH64_CS;
+       case EQ: return AARCH64_CC;
+       default: return -1;
+       }
+      break;
+
     default:
       return -1;
       break;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 71fc514..363785e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1710,7 +1710,7 @@
   ""
 {
   rtx low = gen_reg_rtx (DImode);
-  emit_insn (gen_adddi3_compare0 (low, gen_lowpart (DImode, operands[1]),
+  emit_insn (gen_adddi3_compare1 (low, gen_lowpart (DImode, operands[1]),
                                  gen_lowpart (DImode, operands[2])));
 
   rtx high = gen_reg_rtx (DImode);
@@ -1755,6 +1755,44 @@
   [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
+(define_insn "*add<mode>3_compare1_cconly"
+  [(set (reg:CC_C CC_REGNUM)
+       (ne:CC_C
+         (plus:<DWI>
+           (zero_extend:<DWI>
+             (match_operand:GPI 0 "aarch64_reg_or_zero" "%rZ,rZ,rZ"))
+           (zero_extend:<DWI>
+             (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")))
+         (zero_extend:<DWI>
+           (plus:GPI (match_dup 0) (match_dup 1)))))]
+  ""
+  "@
+  cmn\\t%<w>0, %<w>1
+  cmn\\t%<w>0, %<w>1
+  cmp\\t%<w>0, #%n1"
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
+)
+
+(define_insn "add<mode>3_compare1"
+  [(set (reg:CC_C CC_REGNUM)
+       (ne:CC_C
+         (plus:<DWI>
+           (zero_extend:<DWI>
+             (match_operand:GPI 1 "aarch64_reg_or_zero" "%rZ,rZ,rZ"))
+           (zero_extend:<DWI>
+             (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J")))
+         (zero_extend:<DWI>
+           (plus:GPI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand" "=r,r,r")
+       (plus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+  adds\\t%<w>0, %<w>1, %<w>2
+  adds\\t%<w>0, %<w>1, %<w>2
+  subs\\t%<w>0, %<w>1, #%n2"
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
+)
+
 (define_insn "*adds_shift_imm_<mode>"
   [(set (reg:CC_NZ CC_REGNUM)
        (compare:CC_NZ
@@ -2074,105 +2112,42 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn "add<mode>3_carryin"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (plus:GPI (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
-             (plus:GPI
-               (match_operand:GPI 1 "register_operand" "r")
-               (match_operand:GPI 2 "register_operand" "r"))))]
-   ""
-   "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "type" "adc_reg")]
-)
-
-;; zero_extend version of above
-(define_insn "*addsi3_carryin_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (plus:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
-             (plus:SI
-              (match_operand:SI 1 "register_operand" "r")
-              (match_operand:SI 2 "register_operand" "r")))))]
-   ""
-   "adc\\t%w0, %w1, %w2"
-  [(set_attr "type" "adc_reg")]
-)
-
-(define_insn "*add<mode>3_carryin_alt1"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (plus:GPI (plus:GPI
-               (match_operand:GPI 1 "register_operand" "r")
-               (match_operand:GPI 2 "register_operand" "r"))
-              (geu:GPI (reg:CC CC_REGNUM) (const_int 0))))]
+(define_expand "add<mode>3_carryin"
+  [(set (match_operand:GPI 0 "register_operand")
+       (plus:GPI
+         (plus:GPI
+           (ne:GPI (reg:CC_C CC_REGNUM) (const_int 0))
+           (match_operand:GPI 1 "aarch64_reg_or_zero"))
+         (match_operand:GPI 2 "aarch64_reg_or_zero")))]
    ""
-   "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "type" "adc_reg")]
-)
-
-;; zero_extend version of above
-(define_insn "*addsi3_carryin_alt1_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (plus:SI (plus:SI
-              (match_operand:SI 1 "register_operand" "r")
-              (match_operand:SI 2 "register_operand" "r"))
-              (geu:SI (reg:CC CC_REGNUM) (const_int 0)))))]
    ""
-   "adc\\t%w0, %w1, %w2"
-  [(set_attr "type" "adc_reg")]
-)
-
-(define_insn "*add<mode>3_carryin_alt2"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (plus:GPI (plus:GPI
-                (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
-               (match_operand:GPI 1 "register_operand" "r"))
-             (match_operand:GPI 2 "register_operand" "r")))]
-   ""
-   "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "type" "adc_reg")]
 )
 
-;; zero_extend version of above
-(define_insn "*addsi3_carryin_alt2_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (plus:SI (plus:SI
-               (geu:SI (reg:CC CC_REGNUM) (const_int 0))
-              (match_operand:SI 1 "register_operand" "r"))
-             (match_operand:SI 2 "register_operand" "r"))))]
-   ""
-   "adc\\t%w0, %w1, %w2"
-  [(set_attr "type" "adc_reg")]
-)
+;; Note that a single add with carry is matched by cinc,
+;; and the adc_reg and csel types are matched into the same
+;; pipelines by existing cores.
 
-(define_insn "*add<mode>3_carryin_alt3"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (plus:GPI (plus:GPI
-                (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
-               (match_operand:GPI 2 "register_operand" "r"))
-             (match_operand:GPI 1 "register_operand" "r")))]
+(define_insn "*add<mode>3_carryin"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (plus:GPI
+         (plus:GPI
+           (match_operand:GPI 3 "aarch64_carry_operation" "")
+           (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ"))
+         (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")))]
    ""
    "adc\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "type" "adc_reg")]
 )
 
 ;; zero_extend version of above
-(define_insn "*addsi3_carryin_alt3_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (plus:SI (plus:SI
-               (geu:SI (reg:CC CC_REGNUM) (const_int 0))
-              (match_operand:SI 2 "register_operand" "r"))
-             (match_operand:SI 1 "register_operand" "r"))))]
+(define_insn "*addsi3_carryin_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (zero_extend:DI
+         (plus:SI
+           (plus:SI
+             (match_operand:SI 3 "aarch64_carry_operation" "")
+             (match_operand:SI 1 "aarch64_reg_or_zero" "rZ"))
+           (match_operand:SI 2 "aarch64_reg_or_zero" "rZ"))))]
    ""
    "adc\\t%w0, %w1, %w2"
   [(set_attr "type" "adc_reg")]
@@ -2281,7 +2256,7 @@
   ""
 {
   rtx low = gen_reg_rtx (DImode);
-  emit_insn (gen_subdi3_compare0 (low, gen_lowpart (DImode, operands[1]),
+  emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]),
                                  gen_lowpart (DImode, operands[2])));
 
   rtx high = gen_reg_rtx (DImode);
@@ -2293,7 +2268,7 @@
   DONE;
 })
 
-(define_insn "sub<mode>3_compare0"
+(define_insn "*sub<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
        (compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
                                  (match_operand:GPI 2 "register_operand" "r"))
@@ -2318,6 +2293,18 @@
   [(set_attr "type" "alus_sreg")]
 )
 
+(define_insn "sub<mode>3_compare1"
+  [(set (reg:CC CC_REGNUM)
+       (compare:CC
+         (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+         (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+       (minus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "subs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "alus_sreg")]
+)
+
 (define_insn "*sub_<shift>_<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
        (minus:GPI (match_operand:GPI 3 "register_operand" "r")
@@ -2440,13 +2427,53 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn "sub<mode>3_carryin"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (minus:GPI (minus:GPI
-               (match_operand:GPI 1 "register_operand" "r")
-               (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
-              (match_operand:GPI 2 "register_operand" "r")))]
+;; The hardware description is op1 + ~op2 + C.
+;;                           = op1 + (-op2 + 1) + (1 - !C)
+;;                           = op1 - op2 - 1 + 1 - !C
+;;                           = op1 - op2 - !C.
+;; We describe the later.
+
+(define_insn "*sub<mode>3_carryin0"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (minus:GPI
+         (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+         (match_operand:GPI 2 "aarch64_borrow_operation" "")))]
+   ""
+   "sbc\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+;; zero_extend version of the above
+(define_insn "*subsi3_carryin_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (zero_extend:DI
+         (minus:SI
+           (match_operand:SI 1 "aarch64_reg_or_zero" "rZ")
+           (match_operand:SI 2 "aarch64_borrow_operation" ""))))]
+   ""
+   "sbc\\t%w0, %w1, wzr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_expand "sub<mode>3_carryin"
+  [(set (match_operand:GPI 0 "register_operand")
+       (minus:GPI
+         (minus:GPI
+           (match_operand:GPI 1 "aarch64_reg_or_zero")
+           (match_operand:GPI 2 "register_operand"))
+         (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))]
+   ""
+   ""
+)
+
+(define_insn "*sub<mode>3_carryin"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (minus:GPI
+         (minus:GPI
+           (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+           (match_operand:GPI 2 "register_operand" "r"))
+         (match_operand:GPI 3 "aarch64_borrow_operation" "")))]
+
    ""
    "sbc\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "type" "adc_reg")]
@@ -2454,13 +2481,40 @@
 
 ;; zero_extend version of the above
 (define_insn "*subsi3_carryin_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (minus:SI (minus:SI
-               (match_operand:SI 1 "register_operand" "r")
-               (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
-              (match_operand:SI 2 "register_operand" "r"))))]
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (zero_extend:DI
+         (minus:SI
+           (minus:SI
+             (match_operand:SI 1 "aarch64_reg_or_zero" "rZ")
+             (match_operand:SI 2 "register_operand" "r"))
+           (match_operand:SI 3 "aarch64_borrow_operation" ""))))]
+
+   ""
+   "sbc\\t%w0, %w1, %w2"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*sub<mode>3_carryin_alt"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (minus:GPI
+         (minus:GPI
+           (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+           (match_operand:GPI 3 "aarch64_borrow_operation" ""))
+         (match_operand:GPI 2 "register_operand" "r")))]
+   ""
+   "sbc\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
+;; zero_extend version of the above
+(define_insn "*subsi3_carryin_alt_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (zero_extend:DI
+         (minus:SI
+           (minus:SI
+             (match_operand:SI 1 "aarch64_reg_or_zero" "rZ")
+             (match_operand:SI 3 "aarch64_borrow_operation" ""))
+           (match_operand:SI 2 "register_operand" "r"))))]
    ""
    "sbc\\t%w0, %w1, %w2"
   [(set_attr "type" "adc_reg")]
@@ -2564,8 +2618,9 @@
 
 (define_insn "*ngc<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-       (minus:GPI (neg:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
-                  (match_operand:GPI 1 "register_operand" "r")))]
+       (minus:GPI
+         (neg:GPI (match_operand:GPI 2 "aarch64_borrow_operation" ""))
+         (match_operand:GPI 1 "register_operand" "r")))]
   ""
   "ngc\\t%<w>0, %<w>1"
   [(set_attr "type" "adc_reg")]
@@ -2574,8 +2629,9 @@
 (define_insn "*ngcsi_uxtw"
   [(set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI
-        (minus:SI (neg:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
-                  (match_operand:SI 1 "register_operand" "r"))))]
+         (minus:SI
+           (neg:SI (match_operand:SI 2 "aarch64_borrow_operation" ""))
+           (match_operand:SI 1 "register_operand" "r"))))]
   ""
   "ngc\\t%w0, %w1"
   [(set_attr "type" "adc_reg")]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 49598a2..d9bd391 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -350,6 +350,9 @@
 ;; For constraints used in scalar immediate vector moves
 (define_mode_attr hq [(HI "h") (QI "q")])
 
+;; For doubling width of an integer mode
+(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")])
+
 ;; For scalar usage of vector/FP registers
 (define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
                    (SF "s") (DF "d")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 3eb33fa..fa6f96d 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -242,6 +242,25 @@
   return aarch64_get_condition_code (op) >= 0;
 })
 
+(define_special_predicate "aarch64_carry_operation"
+  (match_code "ne,geu")
+{
+  if (XEXP (op, 1) != const0_rtx)
+    return false;
+  machine_mode ccmode = (GET_CODE (op) == NE ? CC_Cmode : CCmode);
+  rtx op0 = XEXP (op, 0);
+  return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode;
+})
+
+(define_special_predicate "aarch64_borrow_operation"
+  (match_code "eq,ltu")
+{
+  if (XEXP (op, 1) != const0_rtx)
+    return false;
+  machine_mode ccmode = (GET_CODE (op) == EQ ? CC_Cmode : CCmode);
+  rtx op0 = XEXP (op, 0);
+  return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode;
+})
 
 ;; True if the operand is memory reference suitable for a load/store exclusive.
 (define_predicate "aarch64_sync_memory_operand"
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c 
b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
index 7c39b61..2b1e87b 100644
--- a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
@@ -85,7 +85,7 @@ f13 (int a, int b)
 /* { dg-final { scan-assembler "cmp\t(.)+34" } } */
 /* { dg-final { scan-assembler "cmp\t(.)+35" } } */
 
-/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, 0" 4 } } */
+/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, (0|wzr)" 4 } } */
 /* { dg-final { scan-assembler-times "fcmpe\t(.)+0\\.0" 2 } } */
 /* { dg-final { scan-assembler-times "fcmp\t(.)+0\\.0" 2 } } */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/tst_3.c 
b/gcc/testsuite/gcc.target/aarch64/tst_3.c
index 2204b33..3fea633 100644
--- a/gcc/testsuite/gcc.target/aarch64/tst_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/tst_3.c
@@ -9,4 +9,4 @@ f1 (int x)
   return x;
 }
 
-/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]*.*1" } } */
+/* { dg-final { scan-assembler "(tst|ands)\t(x|w)\[0-9\]*.*1" } } */

Reply via email to