https://gcc.gnu.org/g:71033b1dfbd12200fc9fb29d4ec975f0c9588be0

commit r16-8401-g71033b1dfbd12200fc9fb29d4ec975f0c9588be0
Author: Roger Sayle <[email protected]>
Date:   Wed Apr 1 23:54:50 2026 +0100

    PR target/123238: VCOND_MASK regression on aarch64.
    
    This patch fixes the regression PR target/123238 on aarch64, using the
    changes to aarch64's aarch64_rtx_costs proposed by Tamar Christina.
    
    To explain and motivate things for aarch64:
    
    void foo(char c[])
    {
        for (int i = 0; i < 8; i++)
            c[i] = c[i] != 'a' ? 'c' : 'e';
    }
    
    currently generates with -O2 the following:
    
    foo:    movi    v30.8b, 0x61
            ldr     d0, [x0]
            movi    v29.8b, 0x63
            movi    v31.8b, 0x65
            cmeq    v30.8b, v0.8b, v30.8b
            not     v30.8b, v30.8b
            bit     v31.8b, v29.8b, v30.8b
            str     d31, [x0]
            ret
    
    where a cmeq followed by a not is used to implement NE_EXPR.
    c.f. the comment "Handle NE as !EQ" in aarch64-simd.md's expander
    of vec_cmp<mode><mode>.  With the patch for PR 123238, including this
    change to aarch64_rtx_costs to indicate that NE is more expensive
    than EQ, the middle-end swaps the VCOND_EXPR, reducing the number of
    instructions in the example above [to what it was in GCC 14].
    
    2026-04-01  Tamar Christina  <[email protected]>
                Roger Sayle  <[email protected]>
    
    gcc/ChangeLog
            PR target/123238
            * config/aarch64/aarch64.cc (aarch64_rtx_costs) <case NE/EQ>:
            Provide improved costs for scalar and vector comparisons.
    
    gcc/testsuite/ChangeLog
            PR target/123238
            * gcc.target/aarch64/pr123238.c: New test case.

Diff:
---
 gcc/config/aarch64/aarch64.cc               | 20 ++++++++++++++++++--
 gcc/testsuite/gcc.target/aarch64/pr123238.c | 16 ++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 197d4f272695..4405074cdad5 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -16234,8 +16234,24 @@ cost_plus:
     case GEU:
     case LE:
     case LEU:
-
-      return false; /* All arguments must be in registers.  */
+      {
+       op0 = XEXP (x, 0);
+       op1 = XEXP (x, 1);
+       machine_mode inner_mode = GET_MODE (op0);
+       *cost += rtx_cost (op0, inner_mode, code, 0, speed);
+       if (op1 != CONST0_RTX (inner_mode))
+         {
+           unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+           bool unsigned_p = code == LTU || code == LEU || code == GTU
+                             || code == GEU;
+           if ((vec_flags & VEC_SVE_DATA) == 0
+               || !aarch64_sve_cmp_immediate_p (op1, !unsigned_p))
+             *cost += rtx_cost (op1, inner_mode, code, 1, speed);
+           if (code == NE && (vec_flags & VEC_ADVSIMD))
+             *cost += COSTS_N_INSNS (1);
+         }
+       return true;
+      }
 
     case FMA:
       op0 = XEXP (x, 0);
diff --git a/gcc/testsuite/gcc.target/aarch64/pr123238.c 
b/gcc/testsuite/gcc.target/aarch64/pr123238.c
new file mode 100644
index 000000000000..3228dde2e764
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr123238.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void foo(char c[])
+{
+    for (int i = 0; i < 8; i++)
+        c[i] = c[i] != 'a' ? 'c' : 'e';
+}
+
+void bar(char c[])
+{
+    for (int i = 0; i < 8; i++)
+        c[i] = c[i] == 'a' ? 'c' : 'e';
+}
+
+/* { dg-final { scan-assembler-not "not" } } */

Reply via email to