Hi! As the following testcase shows, when the prologue computes crtl->drap_reg (when stack_realign_drap) and something e.g. in the first bb uses that register, we can incorrectly shrink-wrap it and use uninitialized %ecx/%r10 etc. register.
Fixed by making the drap regno also added to SET_UP_BY_PROLOGUE regset. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2011-11-30 Jakub Jelinek <ja...@redhat.com> PR target/50725 * function.c (thread_prologue_and_epilogue_insns): If stack_realign_drap, add drap_reg to SET_UP_BY_PROLOGUE regset. * gcc.target/i386/pr50725.c: New test. --- gcc/function.c.jj 2011-11-29 08:58:52.000000000 +0100 +++ gcc/function.c 2011-11-30 10:29:18.720041709 +0100 @@ -5893,6 +5893,9 @@ thread_prologue_and_epilogue_insns (void if (pic_offset_table_rtx) add_to_hard_reg_set (&set_up_by_prologue, Pmode, PIC_OFFSET_TABLE_REGNUM); + if (stack_realign_drap && crtl->drap_reg) + add_to_hard_reg_set (&set_up_by_prologue, GET_MODE (crtl->drap_reg), + REGNO (crtl->drap_reg)); /* We don't use a different max size depending on optimize_bb_for_speed_p because increasing shrink-wrapping --- gcc/testsuite/gcc.target/i386/pr50725.c.jj 2011-11-30 10:45:00.703961208 +0100 +++ gcc/testsuite/gcc.target/i386/pr50725.c 2011-11-30 10:54:18.900654771 +0100 @@ -0,0 +1,48 @@ +/* PR target/50725 */ +/* { dg-do run { target avx_runtime } } */ +/* { dg-options "-O2 -mavx" } */ + +extern void abort (void); + +typedef int __attribute__((vector_size (32))) m256i; + +__attribute__((noinline, noclone)) void +foo (int *x, m256i *y) +{ + asm volatile ("" : : "r" (x), "r" (y) : "memory"); +} + +__attribute__((noinline, noclone)) int +bar (int x) +{ + if (x > 20) + return 24; + m256i i; + foo (__builtin_alloca (x), &i); + return 128; +} + +__attribute__((noinline, noclone)) int +baz (int d0, int d1, int d2, int d3, int d4, int d5, int x) +{ + if (x > 20) + return 24; + m256i i; + d0 += d1 + d2 + d3 + d4 + d5; d1 += d0; + foo (__builtin_alloca (x), &i); + return 128; +} + +int +main () +{ + if (bar (22) != 24 || bar (20) != 128) + abort (); +#ifdef __x86_64__ + register long r10 __asm__ ("r10") = 0xdeadbeefdeadbeefUL; + asm volatile ("" : "+r" (r10)); +#endif + if (baz (0, 0, 0, 0, 0, 0, 22) != 24 || baz (0, 0, 0, 0, 0, 0, 20) != 128) + abort (); + return 0; +} Jakub