https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106265

            Bug ID: 106265
           Summary: RISC-V SPEC2017 507.cactu code bloat due to address
                    generation
           Product: gcc
           Version: 12.1.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vineet.gupta at linux dot dev
  Target Milestone: ---

SPEC2017 FP benchmark 507.cactu: ML_BSSN_RHS.cc:ML_BSSN_RHS_Body() has terrible
codegen. A recurring pattern which shows up in hotspot analysis is

  380a1a:       lui     a6,0x1     # LI 4096 before linker relaxation
  380a1c:       addi    t2,sp,32
  380a20:       addi    a6,a6,-1192 # b58
  380a24:       add     a6,a6,t2
  380a26:       sd      t4,0(a6)

The first 4 instructions help calculate the destination of the last SD
instruction. There were 27 such distinct instances in the hottest top block.

Part of this is the ISA not having a single instruction to do set a reg with
32-bit const and/or the limited addressing modes. However the compiler is not
helping either. All those 27 instances have the first instruction to set
register with 4096, sometimes with the register still being live with the exact
same value.

Using creduce I was able to create a small'ish (not ideal small) test case
which shows 14 instances of li <reg>,4096

Built as riscv64-unknown-linux-gnu-g++ -O3 -static -ffast-math -funroll-loops
-march=rv64gc_zba_zbb_zbc_zbs -c  -std=c++03 

This is with trunk as of June 14 (commit 6abe341558ab)

--->8-----
void *a();
int b, c, d, l, o, q;
double *e, *f, *p, *r, *s = 0;
long g, h, k;
double m, n, aa;
void y() {
  double ag, ai, aj, ap, ar, at, aw(01.0 / 0);
  double *am((double *)a);
  double *an;
  double *ao((double *)a());
  long av = sizeof(double) * g;
  for (; l; ++l)
    for (int j = d; j < q; ++j)
      for (int i = 1; i < o; i++) {
        long az = i * j + l;
        double ba, bb, bc, bd, be, bf, bg, bh, bi, bj, bk, bl, bm, bn, bo, bp,
            bq, br, bs, bv(-ar);
        switch (b)
        case 4: {
          *(double *)((char *)0)[2] = ((char *)0)[2];
          ba = ((char *)0)[k + av] + ((char *)0)[2] + ((char *)0)[k] +
               *(double *)((char *)0)[av] +
               ((char *)0)[av * 2] * ((char *)0)[k * av];
          bb = (&am[az])[h] + ((char *)0)[k * h] +
               ((char *)&am[az])[1] *
                   (((char *)&am[az])[h] + *(&am)[h] + (&am[az])[k]) +
               (&am[az])[2] - ((char *)0)[h] + (&am[az])[k * av];
          bc = 4 * 8 + *(double *)&((char *)0)[2];
          bd = 4 * ((char *)0)[k + 1] +
               (&an[az])[k * h] * (((char *)0)[-1] + (&an[az])[k + h]) +
               8 * ((&an[az])[k * 2] + (&an[az])[av + h * -2] + (&an[az])[av] +
                    (&an[az])[av * 2 + h]) -
               8 * ((&an[az])[av * 2] + (&an[az])[k] + (&an[az])[av * 21] +
                    (&an[az])[av + h]) +
               *(&an)[h] - (&an[az])[av * 22] * (&an[az])[h] +
               (&an[az])[av * 2 * 2];
          bf = (&az)[0] * (((char *)&ao[az])[0] + ao[av] + (&ao[az])[av * 2] +
                           *(double *)((char *)ao)[k]);
          bg = (&ao[az])[0] * *(&ao)[av] + *(&ao)[2];
          bh = *((char **)0)[av] * (&ao[az])[av - h] +
               ((char *)0)[1] * *(double *)((char *)0)[1] *
                   (*(double *)((char *)&ao)[k] + *(double *)((char *)0)[12] +
                    ((char *)&ao[az])[av]) +
               *(&ao)[av * 2] - *(&ao)[k] - (&ao[az])[2] +
               (&ao[az])[k * av * 2];
          bi = *(&ao)[av * h] *
                   (ao[h] + ((char *)0)[k * 2] + (&ao[az])[k * av] +
                    ((char *)0)[21]) *
                   (((char *)0)[h] + *(double *)((char *)0)[h * 2] +
                    *(double *)((char *)&ao[az])[k] + *(&ao)[h]) +
               *(&ao)[av * 22] - (&ao[az])[2 * h * 2] + ((char *)0)[22];
          bj = 4 * ((&ao[az])[av] + (&ao[az])[k * av * h] * (&ao[az])[av * -1]
+
                    (&ao[az])[av * h]) +
               ((&ao[az])[av * 12] + (&ao[az])[k] + ((char *)&ao)[h] +
                (&ao[az])[av * 2 + h * 0]) *
                   (((char *)0)[2] + *(double *)((char *)&ao[az])[av * 2] +
                    (&ao[az])[h] + (&ao[az])[av + h]) +
               (&ao[az])[av * 22] - ((char *)&ao[az])[h] - (&ao[az])[av * -2] +
               (&ao[az])[av + h * 2];
          bk = 8 * (&ap)[1];
          bm = ((char *)0)[1] * (&e[az])[2];
          bn = -(&az)[0];
          bo = (&e[az])[h] - (&e[az])[h * 2] * aw;
          bq = (&at)[h];
          br = 8 * 8 * ((&az)[1] + (&f[az])[-2] - (&f[az])[2]);
          bs = (&f[az])[h * -2] - (&f[az])[h * 2];
          n = ((char *)&s)[h * 2];
        }
          double bt(e[az] * f[az] - at * at);
        double bu(p[az] * at * az);
        double bw(f[az] - p[az]);
        double bx = 01.0 / 0;
        double by = 01.0 / 0 * 0;
        double ca;
        double cb = -bl;
        double cd;
        double cf = 0.5 * bn;
        double ch;
        double cj = bp - 0.5 * bo;
        double cm = ch * bv;
        double cn = bk * cd * ch;
        double co = bk + cd * by;
        double cq = bm + bx;
        double ct = ca + bx;
        double cu = cb * bt + cf * bu + cj * bv;
        double cz = bt + bq * bv;
        double db = br * bq * by;
        double dc = cm * cu * bw + cz * by;
        double dd = cn * bt + cq * ct + bs * by;
        double df(c == 1 ? s[az] : 0);
        double dg = df * n;
        double dj = bu + ai * bv;
        double dk = ag * ai * bu + aj * bv;
        double dl = bw + aj * bx;
        double dm;
        double dn = bu * bv;
        double t = bu + dj;
        double u = dk * by;
        double v = ag * bv + dl * by;
        double w = bx + dm;
        double x = bf + 0.333333333333333333333333333333 * bc + bi + ba + bj +
                   by * (bb + bd +
                         bg * bh *
                             (6 * w * dg + dn * co * t * bv + u * aa * cj * v +
                              w * db)) -
                   m * dc * dd + (be + 0.666666666666666666666666666667) * co;
        r[az] = x;
      }
}
--->8-----

Reply via email to