https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63864
Bug ID: 63864 Summary: Missed optimization, related to SRA(??) Product: gcc Version: 5.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: vermaelen.wouter at gmail dot com Hi, In my code I replaced some 'manual' vector/matrix calculations with (inlined) function calls using vector/matrix types. When using clang both approaches result in nearly identical generated code. But when using gcc the code becomes much worse. I don't know too much about compiler internals, but if I had to make a guess I'd say that for some reason SRA doesn't work in this case. See the code below: 'test_ok()' is the original function, 'test_slow()' is the rewritten version. I tried to simplify the code as much as possible while not making it too simple (so that neither compiler starts vectorizing the code). Tested with: g++ (GCC) 5.0.0 20141114 (experimental) Wouter - - - 8< - - - 8< - - - 8< - - - 8< - - - 8< - - - 8< - - - 8< - - - // Original code with 'manual' matrix multiplication float test_ok(float m[3][3], float x, float y, float z, float s, float b) { float p = x*s + b; float q = y*s + b; float r = z*s + b; float u = m[0][0]*p + m[1][0]*q + m[2][0]*r; float v = m[0][1]*p + m[1][1]*q + m[2][1]*r; float w = m[0][2]*p + m[1][2]*q + m[2][2]*r; return u + v + w; } // (Much simplified) vec3/mat3 types struct vec3 { vec3() {} vec3(float x, float y, float z) { e[0] = x; e[1] = y; e[2] = z; } float operator[](int i) const { return e[i]; } float& operator[](int i) { return e[i]; } private: float e[3]; }; struct mat3 { vec3 c[3]; }; inline vec3 operator+(const vec3& x, const vec3& y) { vec3 r; for (int i = 0; i < 3; ++i) r[i] = x[i] + y[i]; return r; } inline vec3 operator*(const vec3& x, float y) { vec3 r; for (int i = 0; i < 3; ++i) r[i] = x[i] * y; return r; } inline vec3 operator*(const vec3& x, const vec3& y) { vec3 r; for (int i = 0; i < 3; ++i) r[i] = x[i] * y[i]; return r; } inline vec3 operator*(const mat3& m, const vec3& v) { return m.c[0] * v[0] + m.c[1] * v[1] + m.c[2] * v[2]; } // Rewritten version of the original function float test_slow(mat3& m, float x, float y, float z, float s, float b) { vec3 t = m * (vec3(x,y,z) * s + vec3(b,b,b)); return t[0] + t[1] + t[2]; }