Hi, this patch makes i386.c ready for presence of aliases of local functions, but also fixes a wrong code issue where ix86_function_sseregparm does use caller TARET_SSE_MATH flags instead of callees. If callee disagree on this flag, it may get called with wrong calling convention.
I added error message on calling local function with SSE ABI from function compiled with SSE disabled. We may work harder to simply disable SSE calling convention but that requires whole compilation unit analysis that is hard to do on ltrans. I hope it does not matter in practice. Otherwise we may want to introduce some hook and disqualify such functions from being local at WPA time. Bootstrapped/regtested x86_64-linux (includin 32bit testing), will commit it shortly. Honza PR ipa/63566 * i386.c (ix86_function_regparm): Look through aliases to see if callee is local and optimized. (ix86_function_sseregparm): Likewise; also use target's SSE math settings; error out instead of silently generating wrong code on mismatches. (init_cumulative_args): Look through aliases. Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 220509) +++ config/i386/i386.c (working copy) @@ -5767,49 +5767,55 @@ ix86_function_regparm (const_tree type, /* Use register calling convention for local functions when possible. */ if (decl - && TREE_CODE (decl) == FUNCTION_DECL + && TREE_CODE (decl) == FUNCTION_DECL) + { + cgraph_node *target = cgraph_node::get (decl); + if (target) + target = target->function_symbol (); + /* Caller and callee must agree on the calling convention, so checking here just optimize means that with __attribute__((optimize (...))) caller could use regparm convention and callee not, or vice versa. Instead look at whether the callee is optimized or not. */ - && opt_for_fn (decl, optimize) - && !(profile_flag && !flag_fentry)) - { - /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ - cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl)); - if (i && i->local && i->can_change_signature) + if (target && opt_for_fn (target->decl, optimize) + && !(profile_flag && !flag_fentry)) { - int local_regparm, globals = 0, regno; + cgraph_local_info *i = &target->local; + if (i && i->local && i->can_change_signature) + { + int local_regparm, globals = 0, regno; - /* Make sure no regparm register is taken by a - fixed register variable. */ - for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++) - if (fixed_regs[local_regparm]) - break; - - /* We don't want to use regparm(3) for nested functions as - these use a static chain pointer in the third argument. */ - if (local_regparm == 3 && DECL_STATIC_CHAIN (decl)) - local_regparm = 2; - - /* In 32-bit mode save a register for the split stack. */ - if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack) - local_regparm = 2; - - /* Each fixed register usage increases register pressure, - so less registers should be used for argument passing. - This functionality can be overriden by an explicit - regparm value. */ - for (regno = AX_REG; regno <= DI_REG; regno++) - if (fixed_regs[regno]) - globals++; + /* Make sure no regparm register is taken by a + fixed register variable. */ + for (local_regparm = 0; local_regparm < REGPARM_MAX; + local_regparm++) + if (fixed_regs[local_regparm]) + break; + + /* We don't want to use regparm(3) for nested functions as + these use a static chain pointer in the third argument. */ + if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) + local_regparm = 2; + + /* Save a register for the split stack. */ + if (local_regparm == 3 && flag_split_stack) + local_regparm = 2; + + /* Each fixed register usage increases register pressure, + so less registers should be used for argument passing. + This functionality can be overriden by an explicit + regparm value. */ + for (regno = AX_REG; regno <= DI_REG; regno++) + if (fixed_regs[regno]) + globals++; - local_regparm - = globals < local_regparm ? local_regparm - globals : 0; + local_regparm + = globals < local_regparm ? local_regparm - globals : 0; - if (local_regparm > regparm) - regparm = local_regparm; + if (local_regparm > regparm) + regparm = local_regparm; + } } } @@ -5848,15 +5854,37 @@ ix86_function_sseregparm (const_tree typ return 2; } + if (!decl) + return 0; + + cgraph_node *target = cgraph_node::get (decl); + if (target) + target = target->function_symbol (); + /* For local functions, pass up to SSE_REGPARM_MAX SFmode (and DFmode for SSE2) arguments in SSE registers. */ - if (decl && TARGET_SSE_MATH && optimize + if (target + /* TARGET_SSE_MATH */ + && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE) + && opt_for_fn (target->decl, optimize) && !(profile_flag && !flag_fentry)) { - /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ - cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl)); + cgraph_local_info *i = &target->local; if (i && i->local && i->can_change_signature) - return TARGET_SSE2 ? 2 : 1; + { + /* Refuse to produce wrong code when local function with SSE enabled + is called from SSE disabled function. + We may work hard to work out these scenarios but hopefully + it doesnot matter in practice. */ + if (!TARGET_SSE && warn) + { + error ("calling %qD with SSE caling convention without " + "SSE/SSE2 enabled", decl); + return 0; + } + return TARGET_SSE2_P (target_opts_for_fn (target->decl) + ->x_ix86_isa_flags) ? 2 : 1; + } } return 0; @@ -6343,20 +6371,25 @@ init_cumulative_args (CUMULATIVE_ARGS *c tree fndecl, int caller) { - struct cgraph_local_info *i; + struct cgraph_local_info *i = NULL; + struct cgraph_node *target = NULL; memset (cum, 0, sizeof (*cum)); if (fndecl) { - i = cgraph_node::local_info (fndecl); - cum->call_abi = ix86_function_abi (fndecl); + target = cgraph_node::get (fndecl); + if (target) + { + target = target->function_symbol (); + i = cgraph_node::local_info (target->decl); + cum->call_abi = ix86_function_abi (target->decl); + } + else + cum->call_abi = ix86_function_abi (fndecl); } else - { - i = NULL; - cum->call_abi = ix86_function_type_abi (fntype); - } + cum->call_abi = ix86_function_type_abi (fntype); cum->caller = caller; @@ -6392,7 +6425,7 @@ init_cumulative_args (CUMULATIVE_ARGS *c helping K&R code. FIXME: once typesytem is fixed, we won't need this code anymore. */ if (i && i->local && i->can_change_signature) - fntype = TREE_TYPE (fndecl); + fntype = TREE_TYPE (target->decl); cum->stdarg = stdarg_p (fntype); cum->maybe_vaarg = (fntype ? (!prototype_p (fntype) || stdarg_p (fntype))