https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70546
Bug ID: 70546 Summary: ifconvert if(cond) ++count; to count += cond; Product: gcc Version: 6.0 Status: UNCONFIRMED Keywords: missed-optimization Severity: enhancement Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: glisse at gcc dot gnu.org Target Milestone: --- Target: x86_64-*-* In the example from http://stackoverflow.com/q/36414959/1918193 we fail to vectorize f0 (using g++ -O3) because of the condition in the loop. However, if I manually change the condition as in the #else case, the vectorizer is happy and we get a very nice performance boost. Could ifconvert (or some other phi optimization pass) handle this transformation? n is a local variable, so we shouldn't even need -ftree-loop-if-convert-stores. At -O1, it is also a clear win with the random distribution, but it becomes a slight loss if the vector v is sorted in increasing order and a big loss for the decreasing order, so I don't know for sure under which condition this should be done. #include <algorithm> #include <chrono> #include <random> #include <iomanip> #include <iostream> #include <vector> using namespace std; using namespace std::chrono; vector<int> v(1'000'000); int f0() { int n = 0; for (int i = 1; i < v.size()-1; ++i) { int a = v[i-1]; int b = v[i]; int c = v[i+1]; #ifndef IMPROVED if (a < b && b < c) ++n; #else n += (a < b && b < c); #endif } return n; } int f1() { int n = 0; for (int i = 1; i < v.size()-1; ++i) if (v[i-1] < v[i] && v[i] < v[i+1]) ++n; return n; } int main() { auto benchmark = [](int (*f)()) { const int N = 100; volatile long long result = 0; vector<long long> timings(N); for (int i = 0; i < N; ++i) { auto t0 = high_resolution_clock::now(); result += f(); auto t1 = high_resolution_clock::now(); timings[i] = duration_cast<nanoseconds>(t1-t0).count(); } sort(timings.begin(), timings.end()); cout << fixed << setprecision(6) << timings.front()/1'000'000.0 << "ms min\n"; cout << timings[timings.size()/2]/1'000'000.0 << "ms median\n" << "Result: " << result/N << "\n\n"; }; mt19937 generator (31415); // deterministic seed uniform_int_distribution<> distribution(0, 1023); for (auto& e: v) e = distribution(generator); benchmark(f0); benchmark(f1); cout << "\ndone\n"; return 0; }