https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106265
Bug ID: 106265 Summary: RISC-V SPEC2017 507.cactu code bloat due to address generation Product: gcc Version: 12.1.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: vineet.gupta at linux dot dev Target Milestone: --- SPEC2017 FP benchmark 507.cactu: ML_BSSN_RHS.cc:ML_BSSN_RHS_Body() has terrible codegen. A recurring pattern which shows up in hotspot analysis is 380a1a: lui a6,0x1 # LI 4096 before linker relaxation 380a1c: addi t2,sp,32 380a20: addi a6,a6,-1192 # b58 380a24: add a6,a6,t2 380a26: sd t4,0(a6) The first 4 instructions help calculate the destination of the last SD instruction. There were 27 such distinct instances in the hottest top block. Part of this is the ISA not having a single instruction to do set a reg with 32-bit const and/or the limited addressing modes. However the compiler is not helping either. All those 27 instances have the first instruction to set register with 4096, sometimes with the register still being live with the exact same value. Using creduce I was able to create a small'ish (not ideal small) test case which shows 14 instances of li <reg>,4096 Built as riscv64-unknown-linux-gnu-g++ -O3 -static -ffast-math -funroll-loops -march=rv64gc_zba_zbb_zbc_zbs -c -std=c++03 This is with trunk as of June 14 (commit 6abe341558ab) --->8----- void *a(); int b, c, d, l, o, q; double *e, *f, *p, *r, *s = 0; long g, h, k; double m, n, aa; void y() { double ag, ai, aj, ap, ar, at, aw(01.0 / 0); double *am((double *)a); double *an; double *ao((double *)a()); long av = sizeof(double) * g; for (; l; ++l) for (int j = d; j < q; ++j) for (int i = 1; i < o; i++) { long az = i * j + l; double ba, bb, bc, bd, be, bf, bg, bh, bi, bj, bk, bl, bm, bn, bo, bp, bq, br, bs, bv(-ar); switch (b) case 4: { *(double *)((char *)0)[2] = ((char *)0)[2]; ba = ((char *)0)[k + av] + ((char *)0)[2] + ((char *)0)[k] + *(double *)((char *)0)[av] + ((char *)0)[av * 2] * ((char *)0)[k * av]; bb = (&am[az])[h] + ((char *)0)[k * h] + ((char *)&am[az])[1] * (((char *)&am[az])[h] + *(&am)[h] + (&am[az])[k]) + (&am[az])[2] - ((char *)0)[h] + (&am[az])[k * av]; bc = 4 * 8 + *(double *)&((char *)0)[2]; bd = 4 * ((char *)0)[k + 1] + (&an[az])[k * h] * (((char *)0)[-1] + (&an[az])[k + h]) + 8 * ((&an[az])[k * 2] + (&an[az])[av + h * -2] + (&an[az])[av] + (&an[az])[av * 2 + h]) - 8 * ((&an[az])[av * 2] + (&an[az])[k] + (&an[az])[av * 21] + (&an[az])[av + h]) + *(&an)[h] - (&an[az])[av * 22] * (&an[az])[h] + (&an[az])[av * 2 * 2]; bf = (&az)[0] * (((char *)&ao[az])[0] + ao[av] + (&ao[az])[av * 2] + *(double *)((char *)ao)[k]); bg = (&ao[az])[0] * *(&ao)[av] + *(&ao)[2]; bh = *((char **)0)[av] * (&ao[az])[av - h] + ((char *)0)[1] * *(double *)((char *)0)[1] * (*(double *)((char *)&ao)[k] + *(double *)((char *)0)[12] + ((char *)&ao[az])[av]) + *(&ao)[av * 2] - *(&ao)[k] - (&ao[az])[2] + (&ao[az])[k * av * 2]; bi = *(&ao)[av * h] * (ao[h] + ((char *)0)[k * 2] + (&ao[az])[k * av] + ((char *)0)[21]) * (((char *)0)[h] + *(double *)((char *)0)[h * 2] + *(double *)((char *)&ao[az])[k] + *(&ao)[h]) + *(&ao)[av * 22] - (&ao[az])[2 * h * 2] + ((char *)0)[22]; bj = 4 * ((&ao[az])[av] + (&ao[az])[k * av * h] * (&ao[az])[av * -1] + (&ao[az])[av * h]) + ((&ao[az])[av * 12] + (&ao[az])[k] + ((char *)&ao)[h] + (&ao[az])[av * 2 + h * 0]) * (((char *)0)[2] + *(double *)((char *)&ao[az])[av * 2] + (&ao[az])[h] + (&ao[az])[av + h]) + (&ao[az])[av * 22] - ((char *)&ao[az])[h] - (&ao[az])[av * -2] + (&ao[az])[av + h * 2]; bk = 8 * (&ap)[1]; bm = ((char *)0)[1] * (&e[az])[2]; bn = -(&az)[0]; bo = (&e[az])[h] - (&e[az])[h * 2] * aw; bq = (&at)[h]; br = 8 * 8 * ((&az)[1] + (&f[az])[-2] - (&f[az])[2]); bs = (&f[az])[h * -2] - (&f[az])[h * 2]; n = ((char *)&s)[h * 2]; } double bt(e[az] * f[az] - at * at); double bu(p[az] * at * az); double bw(f[az] - p[az]); double bx = 01.0 / 0; double by = 01.0 / 0 * 0; double ca; double cb = -bl; double cd; double cf = 0.5 * bn; double ch; double cj = bp - 0.5 * bo; double cm = ch * bv; double cn = bk * cd * ch; double co = bk + cd * by; double cq = bm + bx; double ct = ca + bx; double cu = cb * bt + cf * bu + cj * bv; double cz = bt + bq * bv; double db = br * bq * by; double dc = cm * cu * bw + cz * by; double dd = cn * bt + cq * ct + bs * by; double df(c == 1 ? s[az] : 0); double dg = df * n; double dj = bu + ai * bv; double dk = ag * ai * bu + aj * bv; double dl = bw + aj * bx; double dm; double dn = bu * bv; double t = bu + dj; double u = dk * by; double v = ag * bv + dl * by; double w = bx + dm; double x = bf + 0.333333333333333333333333333333 * bc + bi + ba + bj + by * (bb + bd + bg * bh * (6 * w * dg + dn * co * t * bv + u * aa * cj * v + w * db)) - m * dc * dd + (be + 0.666666666666666666666666666667) * co; r[az] = x; } } --->8-----