https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70001
--- Comment #2 from Markus Trippelsdorf <trippels at gcc dot gnu.org> ---
markus@x4 tmp % cat fft-old.cpp
#include <array>
#include <complex>
using namespace std;
typedef std::complex<double> cd;
const int LOG = 17;
const int N = (1 << LOG);
array<cd, N> A;
array<cd, N> B;
void FFT(array<cd, N> &arr) { array<array<cd, N>, LOG + 1> F; }
int main() {
FFT(A);
FFT(B);
}
markus@x4 tmp % perf stat clang++ -c -std=c++14 fft-old.cpp
Performance counter stats for 'clang++ -c -std=c++14 fft-old.cpp':
1244.864638 task-clock (msec) # 0.992 CPUs utilized
15 context-switches # 0.012 K/sec
3 cpu-migrations # 0.002 K/sec
14,555 page-faults # 0.012 M/sec
3,665,601,629 cycles # 2.945 GHz
(72.48%)
1,076,285,871 stalled-cycles-frontend # 29.36% frontend cycles
idle (77.86%)
461,560,586 stalled-cycles-backend # 12.59% backend cycles
idle (70.45%)
4,099,304,808 instructions # 1.12 insns per cycle
# 0.26 stalled cycles per
insn (84.63%)
994,875,396 branches # 799.184 M/sec
(74.43%)
17,383,559 branch-misses # 1.75% of all branches
(57.77%)
1.254379190 seconds time elapsed
markus@x4 tmp % perf stat g++ -c -std=c++14 fft-old.cpp
Performance counter stats for 'g++ -c -std=c++14 fft-old.cpp':
34408.032001 task-clock (msec) # 0.999 CPUs utilized
104 context-switches # 0.003 K/sec
5 cpu-migrations # 0.000 K/sec
1,699,084 page-faults # 0.049 M/sec
106,988,050,234 cycles # 3.109 GHz
(68.78%)
18,142,016,241 stalled-cycles-frontend # 16.96% frontend cycles
idle (64.97%)
44,391,891,312 stalled-cycles-backend # 41.49% backend cycles
idle (67.37%)
99,403,441,373 instructions # 0.93 insns per cycle
# 0.45 stalled cycles per
insn (67.52%)
21,050,529,933 branches # 611.791 M/sec
(65.02%)
455,155,359 branch-misses # 2.16% of all branches
(68.65%)
34.436263240 seconds time elapsed