Jan,
This patch caused Bug 64982 - [5 Regression] Many g++ failures on
x86_64-apple-darwin14 with -m32.
Jack
On Sun, Feb 8, 2015 at 3:20 PM, Jan Hubicka <[email protected]> wrote:
> Hi,
> this patch makes i386.c ready for presence of aliases of local functions, but
> also fixes a wrong code issue where ix86_function_sseregparm
> does use caller TARET_SSE_MATH flags instead of callees. If callee disagree
> on this flag, it may get called with wrong calling convention.
>
> I added error message on calling local function with SSE ABI from function
> compiled with SSE disabled. We may work harder to simply disable SSE calling
> convention but that requires whole compilation unit analysis that is hard
> to do on ltrans. I hope it does not matter in practice. Otherwise we may want
> to introduce some hook and disqualify such functions from being local
> at WPA time.
>
> Bootstrapped/regtested x86_64-linux (includin 32bit testing), will commit it
> shortly.
>
> Honza
>
> PR ipa/63566
> * i386.c (ix86_function_regparm): Look through aliases to see if
> callee
> is local and optimized.
> (ix86_function_sseregparm): Likewise; also use target's SSE math
> settings; error out instead of silently generating wrong code
> on mismatches.
> (init_cumulative_args): Look through aliases.
>
> Index: config/i386/i386.c
> ===================================================================
> --- config/i386/i386.c (revision 220509)
> +++ config/i386/i386.c (working copy)
> @@ -5767,49 +5767,55 @@ ix86_function_regparm (const_tree type,
>
> /* Use register calling convention for local functions when possible. */
> if (decl
> - && TREE_CODE (decl) == FUNCTION_DECL
> + && TREE_CODE (decl) == FUNCTION_DECL)
> + {
> + cgraph_node *target = cgraph_node::get (decl);
> + if (target)
> + target = target->function_symbol ();
> +
> /* Caller and callee must agree on the calling convention, so
> checking here just optimize means that with
> __attribute__((optimize (...))) caller could use regparm convention
> and callee not, or vice versa. Instead look at whether the callee
> is optimized or not. */
> - && opt_for_fn (decl, optimize)
> - && !(profile_flag && !flag_fentry))
> - {
> - /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
> - cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE
> (decl));
> - if (i && i->local && i->can_change_signature)
> + if (target && opt_for_fn (target->decl, optimize)
> + && !(profile_flag && !flag_fentry))
> {
> - int local_regparm, globals = 0, regno;
> + cgraph_local_info *i = &target->local;
> + if (i && i->local && i->can_change_signature)
> + {
> + int local_regparm, globals = 0, regno;
>
> - /* Make sure no regparm register is taken by a
> - fixed register variable. */
> - for (local_regparm = 0; local_regparm < REGPARM_MAX;
> local_regparm++)
> - if (fixed_regs[local_regparm])
> - break;
> -
> - /* We don't want to use regparm(3) for nested functions as
> - these use a static chain pointer in the third argument. */
> - if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
> - local_regparm = 2;
> -
> - /* In 32-bit mode save a register for the split stack. */
> - if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
> - local_regparm = 2;
> -
> - /* Each fixed register usage increases register pressure,
> - so less registers should be used for argument passing.
> - This functionality can be overriden by an explicit
> - regparm value. */
> - for (regno = AX_REG; regno <= DI_REG; regno++)
> - if (fixed_regs[regno])
> - globals++;
> + /* Make sure no regparm register is taken by a
> + fixed register variable. */
> + for (local_regparm = 0; local_regparm < REGPARM_MAX;
> + local_regparm++)
> + if (fixed_regs[local_regparm])
> + break;
> +
> + /* We don't want to use regparm(3) for nested functions as
> + these use a static chain pointer in the third argument. */
> + if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
> + local_regparm = 2;
> +
> + /* Save a register for the split stack. */
> + if (local_regparm == 3 && flag_split_stack)
> + local_regparm = 2;
> +
> + /* Each fixed register usage increases register pressure,
> + so less registers should be used for argument passing.
> + This functionality can be overriden by an explicit
> + regparm value. */
> + for (regno = AX_REG; regno <= DI_REG; regno++)
> + if (fixed_regs[regno])
> + globals++;
>
> - local_regparm
> - = globals < local_regparm ? local_regparm - globals : 0;
> + local_regparm
> + = globals < local_regparm ? local_regparm - globals : 0;
>
> - if (local_regparm > regparm)
> - regparm = local_regparm;
> + if (local_regparm > regparm)
> + regparm = local_regparm;
> + }
> }
> }
>
> @@ -5848,15 +5854,37 @@ ix86_function_sseregparm (const_tree typ
> return 2;
> }
>
> + if (!decl)
> + return 0;
> +
> + cgraph_node *target = cgraph_node::get (decl);
> + if (target)
> + target = target->function_symbol ();
> +
> /* For local functions, pass up to SSE_REGPARM_MAX SFmode
> (and DFmode for SSE2) arguments in SSE registers. */
> - if (decl && TARGET_SSE_MATH && optimize
> + if (target
> + /* TARGET_SSE_MATH */
> + && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
> + && opt_for_fn (target->decl, optimize)
> && !(profile_flag && !flag_fentry))
> {
> - /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
> - cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
> + cgraph_local_info *i = &target->local;
> if (i && i->local && i->can_change_signature)
> - return TARGET_SSE2 ? 2 : 1;
> + {
> + /* Refuse to produce wrong code when local function with SSE enabled
> + is called from SSE disabled function.
> + We may work hard to work out these scenarios but hopefully
> + it doesnot matter in practice. */
> + if (!TARGET_SSE && warn)
> + {
> + error ("calling %qD with SSE caling convention without "
> + "SSE/SSE2 enabled", decl);
> + return 0;
> + }
> + return TARGET_SSE2_P (target_opts_for_fn (target->decl)
> + ->x_ix86_isa_flags) ? 2 : 1;
> + }
> }
>
> return 0;
> @@ -6343,20 +6371,25 @@ init_cumulative_args (CUMULATIVE_ARGS *c
> tree fndecl,
> int caller)
> {
> - struct cgraph_local_info *i;
> + struct cgraph_local_info *i = NULL;
> + struct cgraph_node *target = NULL;
>
> memset (cum, 0, sizeof (*cum));
>
> if (fndecl)
> {
> - i = cgraph_node::local_info (fndecl);
> - cum->call_abi = ix86_function_abi (fndecl);
> + target = cgraph_node::get (fndecl);
> + if (target)
> + {
> + target = target->function_symbol ();
> + i = cgraph_node::local_info (target->decl);
> + cum->call_abi = ix86_function_abi (target->decl);
> + }
> + else
> + cum->call_abi = ix86_function_abi (fndecl);
> }
> else
> - {
> - i = NULL;
> - cum->call_abi = ix86_function_type_abi (fntype);
> - }
> + cum->call_abi = ix86_function_type_abi (fntype);
>
> cum->caller = caller;
>
> @@ -6392,7 +6425,7 @@ init_cumulative_args (CUMULATIVE_ARGS *c
> helping K&R code.
> FIXME: once typesytem is fixed, we won't need this code anymore. */
> if (i && i->local && i->can_change_signature)
> - fntype = TREE_TYPE (fndecl);
> + fntype = TREE_TYPE (target->decl);
> cum->stdarg = stdarg_p (fntype);
> cum->maybe_vaarg = (fntype
> ? (!prototype_p (fntype) || stdarg_p (fntype))