typedef __SIZE_TYPE__ size_t; extern void *memset (void *, int, size_t); struct A { char a, b, c, d; int i, j, k, l; char p[64]; }; void __attribute__((noinline)) bar (struct A *p) { asm volatile ("" : : "r" (p) : "memory"); } void foo (void) { struct A a; memset (&a.b, '\0', sizeof (a) - 1); bar (&a); } int main (void) { int i; for (i = 0; i < 100000000; i++) foo (); return 0; }
at -O2 on x86_64-linux gives: $ ~/timing ./test-4.3 # 4.3.2 20081105 Strip out best and worst realtime result minimum: 1.319280371 sec real / 0.000023862 sec CPU maximum: 1.856280790 sec real / 0.000048852 sec CPU average: 1.476243858 sec real / 0.000025087 sec CPU stdev : 0.222421870 sec real / 0.000001019 sec CPU $ ~/timing ./test-4.4 # 4.4.0 20081211 Strip out best and worst realtime result minimum: 1.729227419 sec real / 0.000021394 sec CPU maximum: 1.746533792 sec real / 0.000042904 sec CPU average: 1.737414022 sec real / 0.000025143 sec CPU stdev : 0.003746136 sec real / 0.000000764 sec CPU $ ~/timing ./test-4.4-patched # 4.4.0 20081211 + patch Strip out best and worst realtime result minimum: 1.243328664 sec real / 0.000023905 sec CPU maximum: 1.256095660 sec real / 0.000046869 sec CPU average: 1.250533713 sec real / 0.000025471 sec CPU stdev : 0.002965601 sec real / 0.000001744 sec CPU Patched is current trunk with the http://gcc.gnu.org/ml/gcc-patches/2008-12/msg00706.html patch. -- Summary: [4.4 Regression] x86_64 generates much larger and slightly slower code for memset Product: gcc Version: 4.4.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: jakub at gcc dot gnu dot org http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38488