https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119974

            Bug ID: 119974
           Summary: Missing combination of SVE RDFFRS
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Keywords: aarch64-sve, missed-optimization
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: ktkachov at gcc dot gnu.org
  Target Milestone: ---
            Target: aarch64

A testcase:
#include <arm_sve.h>

int foo(double *x, double val, int n)
{
    double *endp = x + n;
    uint64_t index = 0;
    svbool_t pg;
    svbool_t ffr;
    svbool_t pcmp;
    svbool_t ptrue_b = svptrue_b8();
    svbool_t ptrue_d = svptrue_b64();
    svfloat64_t x_vec, val_vec = svdup_f64(val);
    for (pg = svwhilelt_b64_u64((intptr_t)x, (intptr_t)endp);
         svptest_first(ptrue_d, pg);
         x += svcntd(), pg = svwhilelt_b64_u64((intptr_t)x, (intptr_t)endp)) {
        svsetffr();
        x_vec = svldff1_f64(pg, x);
        ffr = svrdffr();
        if (!svptest_last(ptrue_d, ffr)) {
            goto ff_fail;
        }
        pcmp = svcmpeq_f64(pg, x_vec, val_vec);
        if (svptest_any(ptrue_d, pcmp)) {
            goto found_val;
        }
        index += svcntp_b64(ptrue_d, pg);
    }
    return index;

ff_fail:
    pcmp = svcmpeq_f64(ffr, x_vec, val_vec);
    if (svptest_any(ptrue_d, pcmp)) {
        goto found_val;
    }
    return -1;

found_val:
    pcmp = svbrkb_b_z(pg, pcmp);
    index += svcntp_b64(ptrue_d, pcmp);
    return index;
}

Compiled with, say -O3 -march=armv9-a misses combining a RDFFR and PTEST into a
RDFFRS:
...
.L6:
        setffr
        ldff1d  z31.d, p7/z, [x0]
        fcmeq   p14.d, p7/z, z31.d, z0.d
        rdffr   p6.b
        ptest   p15, p6.b
        b.last  .L3
...

The cc_fusion pass does attempt it:
trying to parallelize insn 32 and insn 40
failed to match this instruction:
(parallel [
        (set (reg:CC_NZC 66 cc)
            (unspec:CC_NZC [
                    (reg:VNx16BI 128)
                    (subreg:VNx2BI (reg:VNx16BI 128) 0)
                    (const_int 1 [0x1])
                    (reg:VNx2BI 86 ffrt)
                ] UNSPEC_PTEST))
        (set (reg/v:VNx16BI 114 [ ffr ])
            (reg:VNx16BI 86 ffrt))
    ])

But looks like the subreg of reg 128 foils the matching of the
*aarch64_rdffr_cc pattern in aarch64-sve.md

Reply via email to