Hi! With this option we generate a conditional library call. When expanding such a call, do_pending_stack_adjust () is performed from expand_call and if there are any needed sp adjustments, they are emitted. The problem is that this happens only in conditionally executed code, so on some path the args size level will be correct, on others it might be wrong.
Fixed by doing the adjustment before the first conditional jump if we know we'll emit a call conditionally. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? BTW, when looking at this, I've noticed something strange, expand_set_or_movmem_prologue_epilogue_by_misaligned_moves has bool dynamic_check argument, but the caller has int dynamic_check and in the caller as I understand the meaning is dynamic_check == -1 means no conditional library call, otherwise there is a conditional library call with some specific max size. When calling expand_set_or_movmem_prologue_epilogue_by_misaligned_moves, the dynamic_check value is passed to the bool argument though, so that means dynamic_check != 0 instead of dynamic_check != -1. Honza, can you please have a look at that? 2016-01-22 Jakub Jelinek <ja...@redhat.com> PR target/69432 * config/i386/i386.c: Include dojump.h. (expand_small_movmem_or_setmem, expand_set_or_movmem_prologue_epilogue_by_misaligned_moves): Spelling fixes. (ix86_expand_set_or_movmem): Call do_pending_stack_adjust () early if dynamic_check != -1. * g++.dg/opt/pr69432.C: New test. --- gcc/config/i386/i386.c.jj 2016-01-19 09:20:34.000000000 +0100 +++ gcc/config/i386/i386.c 2016-01-22 20:39:32.289457234 +0100 @@ -75,6 +75,7 @@ along with GCC; see the file COPYING3. #include "dbgcnt.h" #include "case-cfn-macros.h" #include "regrename.h" +#include "dojump.h" /* This file should be included last. */ #include "target-def.h" @@ -25700,7 +25701,7 @@ expand_small_movmem_or_setmem (rtx destm if (DYNAMIC_CHECK) Round COUNT down to multiple of SIZE << optional caller supplied zero size guard is here >> - << optional caller suppplied dynamic check is here >> + << optional caller supplied dynamic check is here >> << caller supplied main copy loop is here >> } done_label: @@ -25875,8 +25876,8 @@ expand_set_or_movmem_prologue_epilogue_b else *min_size = 0; - /* Our loops always round down the bock size, but for dispatch to library - we need precise value. */ + /* Our loops always round down the block size, but for dispatch to + library we need precise value. */ if (dynamic_check) *count = expand_simple_binop (GET_MODE (*count), AND, *count, GEN_INT (-size), *count, 1, OPTAB_DIRECT); @@ -26469,6 +26470,13 @@ ix86_expand_set_or_movmem (rtx dst, rtx size_needed = GET_MODE_SIZE (move_mode) * unroll_factor; epilogue_size_needed = size_needed; + /* If we are going to call any library calls conditionally, make sure any + pending stack adjustment happen before the first conditional branch, + otherwise they will be emitted before the library call only and won't + happen from the other branches. */ + if (dynamic_check != -1) + do_pending_stack_adjust (); + desired_align = decide_alignment (align, alg, expected_size, move_mode); if (!TARGET_ALIGN_STRINGOPS || noalign) align = desired_align; --- gcc/testsuite/g++.dg/opt/pr69432.C.jj 2016-01-22 20:51:19.463428357 +0100 +++ gcc/testsuite/g++.dg/opt/pr69432.C 2016-01-22 20:51:04.000000000 +0100 @@ -0,0 +1,62 @@ +// PR target/69432 +// { dg-do compile } +// { dg-options "-O3" } +// { dg-additional-options "-minline-stringops-dynamically" { target i?86-*-* x86_64-*-* } } + +template <typename S, typename T, typename U> +void +f1 (S x, T y, U z) +{ + for (; y; --y, ++x) + *x = z; +} + +template <typename S, typename T, typename U> +void f2 (S x, T y, U z) +{ + f1 (x, y, z); +} + +struct A {}; +struct B { static char f3 (A, unsigned); }; + +template <typename S, typename U> +void f4 (S, U); + +struct C +{ + template <typename S, typename T, typename U> + static S f5 (S x, T y, U z) { f2 (x, y, z); } +}; + +template <typename S, typename T, typename U> +void f6 (S x, T y, U z) { C::f5 (x, y, z); } + +template <typename S, typename T, typename U, typename V> +void f7 (S x, T y, U z, V) { f6 (x, y, z); } + +struct E +{ + struct D : A { char e; D (A); }; + A f; + E (int x) : g(f) { f8 (x); } + ~E (); + D g; + void f9 (int x) { x ? B::f3 (g, x) : char (); } + void f8 (int x) { f9 (x); } +}; + +struct F : E +{ + F (int x) : E(x) { f10 (x); f4 (this, 0); } + char h; + void f10 (int x) { f7 (&g.e, x, h, 0); } +}; + +long a; + +void +test () +{ + F b(a); +} Jakub