https://gcc.gnu.org/g:71033b1dfbd12200fc9fb29d4ec975f0c9588be0
commit r16-8401-g71033b1dfbd12200fc9fb29d4ec975f0c9588be0 Author: Roger Sayle <[email protected]> Date: Wed Apr 1 23:54:50 2026 +0100 PR target/123238: VCOND_MASK regression on aarch64. This patch fixes the regression PR target/123238 on aarch64, using the changes to aarch64's aarch64_rtx_costs proposed by Tamar Christina. To explain and motivate things for aarch64: void foo(char c[]) { for (int i = 0; i < 8; i++) c[i] = c[i] != 'a' ? 'c' : 'e'; } currently generates with -O2 the following: foo: movi v30.8b, 0x61 ldr d0, [x0] movi v29.8b, 0x63 movi v31.8b, 0x65 cmeq v30.8b, v0.8b, v30.8b not v30.8b, v30.8b bit v31.8b, v29.8b, v30.8b str d31, [x0] ret where a cmeq followed by a not is used to implement NE_EXPR. c.f. the comment "Handle NE as !EQ" in aarch64-simd.md's expander of vec_cmp<mode><mode>. With the patch for PR 123238, including this change to aarch64_rtx_costs to indicate that NE is more expensive than EQ, the middle-end swaps the VCOND_EXPR, reducing the number of instructions in the example above [to what it was in GCC 14]. 2026-04-01 Tamar Christina <[email protected]> Roger Sayle <[email protected]> gcc/ChangeLog PR target/123238 * config/aarch64/aarch64.cc (aarch64_rtx_costs) <case NE/EQ>: Provide improved costs for scalar and vector comparisons. gcc/testsuite/ChangeLog PR target/123238 * gcc.target/aarch64/pr123238.c: New test case. Diff: --- gcc/config/aarch64/aarch64.cc | 20 ++++++++++++++++++-- gcc/testsuite/gcc.target/aarch64/pr123238.c | 16 ++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 197d4f272695..4405074cdad5 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -16234,8 +16234,24 @@ cost_plus: case GEU: case LE: case LEU: - - return false; /* All arguments must be in registers. */ + { + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + machine_mode inner_mode = GET_MODE (op0); + *cost += rtx_cost (op0, inner_mode, code, 0, speed); + if (op1 != CONST0_RTX (inner_mode)) + { + unsigned int vec_flags = aarch64_classify_vector_mode (mode); + bool unsigned_p = code == LTU || code == LEU || code == GTU + || code == GEU; + if ((vec_flags & VEC_SVE_DATA) == 0 + || !aarch64_sve_cmp_immediate_p (op1, !unsigned_p)) + *cost += rtx_cost (op1, inner_mode, code, 1, speed); + if (code == NE && (vec_flags & VEC_ADVSIMD)) + *cost += COSTS_N_INSNS (1); + } + return true; + } case FMA: op0 = XEXP (x, 0); diff --git a/gcc/testsuite/gcc.target/aarch64/pr123238.c b/gcc/testsuite/gcc.target/aarch64/pr123238.c new file mode 100644 index 000000000000..3228dde2e764 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr123238.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +void foo(char c[]) +{ + for (int i = 0; i < 8; i++) + c[i] = c[i] != 'a' ? 'c' : 'e'; +} + +void bar(char c[]) +{ + for (int i = 0; i < 8; i++) + c[i] = c[i] == 'a' ? 'c' : 'e'; +} + +/* { dg-final { scan-assembler-not "not" } } */
