Jan, This patch caused Bug 64982 - [5 Regression] Many g++ failures on x86_64-apple-darwin14 with -m32. Jack
On Sun, Feb 8, 2015 at 3:20 PM, Jan Hubicka <hubi...@ucw.cz> wrote: > Hi, > this patch makes i386.c ready for presence of aliases of local functions, but > also fixes a wrong code issue where ix86_function_sseregparm > does use caller TARET_SSE_MATH flags instead of callees. If callee disagree > on this flag, it may get called with wrong calling convention. > > I added error message on calling local function with SSE ABI from function > compiled with SSE disabled. We may work harder to simply disable SSE calling > convention but that requires whole compilation unit analysis that is hard > to do on ltrans. I hope it does not matter in practice. Otherwise we may want > to introduce some hook and disqualify such functions from being local > at WPA time. > > Bootstrapped/regtested x86_64-linux (includin 32bit testing), will commit it > shortly. > > Honza > > PR ipa/63566 > * i386.c (ix86_function_regparm): Look through aliases to see if > callee > is local and optimized. > (ix86_function_sseregparm): Likewise; also use target's SSE math > settings; error out instead of silently generating wrong code > on mismatches. > (init_cumulative_args): Look through aliases. > > Index: config/i386/i386.c > =================================================================== > --- config/i386/i386.c (revision 220509) > +++ config/i386/i386.c (working copy) > @@ -5767,49 +5767,55 @@ ix86_function_regparm (const_tree type, > > /* Use register calling convention for local functions when possible. */ > if (decl > - && TREE_CODE (decl) == FUNCTION_DECL > + && TREE_CODE (decl) == FUNCTION_DECL) > + { > + cgraph_node *target = cgraph_node::get (decl); > + if (target) > + target = target->function_symbol (); > + > /* Caller and callee must agree on the calling convention, so > checking here just optimize means that with > __attribute__((optimize (...))) caller could use regparm convention > and callee not, or vice versa. Instead look at whether the callee > is optimized or not. */ > - && opt_for_fn (decl, optimize) > - && !(profile_flag && !flag_fentry)) > - { > - /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ > - cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE > (decl)); > - if (i && i->local && i->can_change_signature) > + if (target && opt_for_fn (target->decl, optimize) > + && !(profile_flag && !flag_fentry)) > { > - int local_regparm, globals = 0, regno; > + cgraph_local_info *i = &target->local; > + if (i && i->local && i->can_change_signature) > + { > + int local_regparm, globals = 0, regno; > > - /* Make sure no regparm register is taken by a > - fixed register variable. */ > - for (local_regparm = 0; local_regparm < REGPARM_MAX; > local_regparm++) > - if (fixed_regs[local_regparm]) > - break; > - > - /* We don't want to use regparm(3) for nested functions as > - these use a static chain pointer in the third argument. */ > - if (local_regparm == 3 && DECL_STATIC_CHAIN (decl)) > - local_regparm = 2; > - > - /* In 32-bit mode save a register for the split stack. */ > - if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack) > - local_regparm = 2; > - > - /* Each fixed register usage increases register pressure, > - so less registers should be used for argument passing. > - This functionality can be overriden by an explicit > - regparm value. */ > - for (regno = AX_REG; regno <= DI_REG; regno++) > - if (fixed_regs[regno]) > - globals++; > + /* Make sure no regparm register is taken by a > + fixed register variable. */ > + for (local_regparm = 0; local_regparm < REGPARM_MAX; > + local_regparm++) > + if (fixed_regs[local_regparm]) > + break; > + > + /* We don't want to use regparm(3) for nested functions as > + these use a static chain pointer in the third argument. */ > + if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) > + local_regparm = 2; > + > + /* Save a register for the split stack. */ > + if (local_regparm == 3 && flag_split_stack) > + local_regparm = 2; > + > + /* Each fixed register usage increases register pressure, > + so less registers should be used for argument passing. > + This functionality can be overriden by an explicit > + regparm value. */ > + for (regno = AX_REG; regno <= DI_REG; regno++) > + if (fixed_regs[regno]) > + globals++; > > - local_regparm > - = globals < local_regparm ? local_regparm - globals : 0; > + local_regparm > + = globals < local_regparm ? local_regparm - globals : 0; > > - if (local_regparm > regparm) > - regparm = local_regparm; > + if (local_regparm > regparm) > + regparm = local_regparm; > + } > } > } > > @@ -5848,15 +5854,37 @@ ix86_function_sseregparm (const_tree typ > return 2; > } > > + if (!decl) > + return 0; > + > + cgraph_node *target = cgraph_node::get (decl); > + if (target) > + target = target->function_symbol (); > + > /* For local functions, pass up to SSE_REGPARM_MAX SFmode > (and DFmode for SSE2) arguments in SSE registers. */ > - if (decl && TARGET_SSE_MATH && optimize > + if (target > + /* TARGET_SSE_MATH */ > + && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE) > + && opt_for_fn (target->decl, optimize) > && !(profile_flag && !flag_fentry)) > { > - /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ > - cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl)); > + cgraph_local_info *i = &target->local; > if (i && i->local && i->can_change_signature) > - return TARGET_SSE2 ? 2 : 1; > + { > + /* Refuse to produce wrong code when local function with SSE enabled > + is called from SSE disabled function. > + We may work hard to work out these scenarios but hopefully > + it doesnot matter in practice. */ > + if (!TARGET_SSE && warn) > + { > + error ("calling %qD with SSE caling convention without " > + "SSE/SSE2 enabled", decl); > + return 0; > + } > + return TARGET_SSE2_P (target_opts_for_fn (target->decl) > + ->x_ix86_isa_flags) ? 2 : 1; > + } > } > > return 0; > @@ -6343,20 +6371,25 @@ init_cumulative_args (CUMULATIVE_ARGS *c > tree fndecl, > int caller) > { > - struct cgraph_local_info *i; > + struct cgraph_local_info *i = NULL; > + struct cgraph_node *target = NULL; > > memset (cum, 0, sizeof (*cum)); > > if (fndecl) > { > - i = cgraph_node::local_info (fndecl); > - cum->call_abi = ix86_function_abi (fndecl); > + target = cgraph_node::get (fndecl); > + if (target) > + { > + target = target->function_symbol (); > + i = cgraph_node::local_info (target->decl); > + cum->call_abi = ix86_function_abi (target->decl); > + } > + else > + cum->call_abi = ix86_function_abi (fndecl); > } > else > - { > - i = NULL; > - cum->call_abi = ix86_function_type_abi (fntype); > - } > + cum->call_abi = ix86_function_type_abi (fntype); > > cum->caller = caller; > > @@ -6392,7 +6425,7 @@ init_cumulative_args (CUMULATIVE_ARGS *c > helping K&R code. > FIXME: once typesytem is fixed, we won't need this code anymore. */ > if (i && i->local && i->can_change_signature) > - fntype = TREE_TYPE (fndecl); > + fntype = TREE_TYPE (target->decl); > cum->stdarg = stdarg_p (fntype); > cum->maybe_vaarg = (fntype > ? (!prototype_p (fntype) || stdarg_p (fntype))