https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86471
--- Comment #3 from Matt Bentley <mattreecebentley at gmail dot com> --- I thought I should note that there is also a missing optimization opportunity in the code. Clang optimizes the code I've listed to remove the benchmark loops entirely since it detects that the arrays aren't actually being used for anything. In order to get clang to benchmark it properly, I had to add a loop which adds the array contents to a total post-benchmark, as follows: #include <cstring> #include <algorithm> #include <benchmark/benchmark.h> double total = 0; static void memory_memset(benchmark::State& state) { int ints[50000]; for (auto _ : state) { std::memset(ints, 0, sizeof(int) * 50000); } for (int counter = 0; counter != 50000; ++counter) { total += ints[counter]; } } static void memory_filln(benchmark::State& state) { int ints[50000]; for (auto _ : state) { std::fill_n(ints, 50000, 0); } for (int counter = 0; counter != 50000; ++counter) { total += ints[counter]; } } static void memory_fill(benchmark::State& state) { int ints[50000]; for (auto _ : state) { std::fill(std::begin(ints), std::end(ints), 0); } for (int counter = 0; counter != 50000; ++counter) { total += ints[counter]; } } // Register the function as a benchmark BENCHMARK(memory_filln); BENCHMARK(memory_fill); BENCHMARK(memory_memset); int main (int argc, char ** argv) { benchmark::Initialize (&argc, argv); benchmark::RunSpecifiedBenchmarks (); printf("Total = %f\n", total); getchar(); return 0; }