https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98544
--- Comment #14 from Richard Biener <rguenth at gcc dot gnu.org> ---
So that makes it sth like the following then - still not miscomparing. What's
the "problem at length N" hinting at? Does N somehow make ido/l1 different?
typedef unsigned long size_t;
template<typename T> inline void PM(T &a, T &b, T c, T d)
{ a=c+d; b=c-d; }
template<typename T1, typename T2, typename T3> inline void MULPM
(T1 &a, T1 &b, T2 c, T2 d, T3 e, T3 f)
{ a=c*e+d*f; b=c*f-d*e; }
typedef double T;
void __attribute__((noipa))
radb2(size_t ido, size_t l1,
const T * __restrict cc, T * __restrict ch,
const T * __restrict wa)
{
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+2*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
for (size_t k=0; k<l1;++k)
for (size_t i=2; i<ido; i+=2)
{
size_t ic=ido-i;
T ti2, tr2;
PM (CH(i-1,k,0),tr2,CC(i-1,0,k),CC(ic-1,1,k));
PM (ti2,CH(i ,k,0),CC(i ,0,k),CC(ic ,1,k));
MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ti2,tr2);
}
}
int main()
{
double *cc, *ch, *wa;
size_t ido = 10, l1 = 10;
cc = new double[ido*l1*2];
ch = new double[ido*l1*2];
wa = new double[ido];
for (size_t i = 0; i < 10*10*10; ++i)
cc[i] = i;
for (size_t i = 0; i < 10*10*10; ++i)
wa[i] = i - 0.5;
radb2 (ido, l1, cc, ch, wa);
for (size_t i = 0; i < ido*l1*2; ++i)
__builtin_printf ("%f\n", ch[i]);
return 0;
}