On Sat, Nov 28, 2015 at 3:40 AM, Jakub Jelinek <[email protected]> wrote:
> Hi!
>
> The recent changes where vector sqrt is represented in the IL using
> IFN_SQRT instead of target specific builtins broke the discovery
> of vector rsqrt, as targetm.builtin_reciprocal is called only
> on builtin functions (not internal functions). Furthermore,
> for internal fns, not only the IFN_* is significant, but also the
> types (modes actually) of the lhs and/or arguments.
>
> This patch adjusts the target hook, so that the backends can just inspect
> the call (builtin or internal function), whatever it is.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2015-11-27 Jakub Jelinek <[email protected]>
>
> PR tree-optimization/68501
> * target.def (builtin_reciprocal): Replace the 3 arguments with
> a gcall * one, adjust description.
> * targhooks.h (default_builtin_reciprocal): Replace the 3 arguments
> with a gcall * one.
> * targhooks.c (default_builtin_reciprocal): Likewise.
> * tree-ssa-math-opts.c (pass_cse_reciprocals::execute): Use
> targetm.builtin_reciprocal even on internal functions, adjust
> the arguments and allow replacing an internal function with normal
> built-in.
> * config/i386/i386.c (ix86_builtin_reciprocal): Replace the 3
> arguments
> with a gcall * one. Handle internal fns too.
> * config/rs6000/rs6000.c (rs6000_builtin_reciprocal): Likewise.
> * config/aarch64/aarch64.c (aarch64_builtin_reciprocal): Likewise.
> * doc/tm.texi (builtin_reciprocal): Document.
>
> --- gcc/target.def.jj 2015-11-18 11:19:19.000000000 +0100
> +++ gcc/target.def 2015-11-27 16:37:07.870823670 +0100
> @@ -2463,13 +2463,9 @@ identical versions.",
> DEFHOOK
> (builtin_reciprocal,
> "This hook should return the DECL of a function that implements reciprocal
> of\n\
> -the builtin function with builtin function code @var{fn}, or\n\
> -@code{NULL_TREE} if such a function is not available. @var{md_fn} is true\n\
> -when @var{fn} is a code of a machine-dependent builtin function. When\n\
> -@var{sqrt} is true, additional optimizations that apply only to the
> reciprocal\n\
> -of a square root function are performed, and only reciprocals of
> @code{sqrt}\n\
> -function are valid.",
> - tree, (unsigned fn, bool md_fn, bool sqrt),
> +the builtin or internal function call @var{call}, or\n\
> +@code{NULL_TREE} if such a function is not available.",
> + tree, (gcall *call),
> default_builtin_reciprocal)
>
> /* For a vendor-specific TYPE, return a pointer to a statically-allocated
> --- gcc/targhooks.h.jj 2015-11-18 11:19:17.000000000 +0100
> +++ gcc/targhooks.h 2015-11-27 16:37:44.828301093 +0100
> @@ -90,7 +90,7 @@ extern tree default_builtin_vectorized_c
>
> extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt,
> tree, int);
>
> -extern tree default_builtin_reciprocal (unsigned int, bool, bool);
> +extern tree default_builtin_reciprocal (gcall *);
>
> extern HOST_WIDE_INT default_vector_alignment (const_tree);
>
> --- gcc/targhooks.c.jj 2015-11-18 11:19:17.000000000 +0100
> +++ gcc/targhooks.c 2015-11-27 16:38:21.461783097 +0100
> @@ -600,9 +600,7 @@ default_builtin_vectorization_cost (enum
> /* Reciprocal. */
>
> tree
> -default_builtin_reciprocal (unsigned int fn ATTRIBUTE_UNUSED,
> - bool md_fn ATTRIBUTE_UNUSED,
> - bool sqrt ATTRIBUTE_UNUSED)
> +default_builtin_reciprocal (gcall *)
> {
> return NULL_TREE;
> }
> --- gcc/tree-ssa-math-opts.c.jj 2015-11-25 09:57:47.000000000 +0100
> +++ gcc/tree-ssa-math-opts.c 2015-11-27 17:07:22.756162308 +0100
> @@ -601,19 +601,17 @@ pass_cse_reciprocals::execute (function
>
> if (is_gimple_call (stmt1)
> && gimple_call_lhs (stmt1)
> - && (fndecl = gimple_call_fndecl (stmt1))
> - && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
> - || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
> + && (gimple_call_internal_p (stmt1)
> + || ((fndecl = gimple_call_fndecl (stmt1))
> + && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
> + || (DECL_BUILT_IN_CLASS (fndecl)
> + == BUILT_IN_MD)))))
> {
> - enum built_in_function code;
> - bool md_code, fail;
> + bool fail;
> imm_use_iterator ui;
> use_operand_p use_p;
>
> - code = DECL_FUNCTION_CODE (fndecl);
> - md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
> -
> - fndecl = targetm.builtin_reciprocal (code, md_code, false);
> + fndecl = targetm.builtin_reciprocal (as_a <gcall *>
> (stmt1));
> if (!fndecl)
> continue;
>
> @@ -639,8 +637,28 @@ pass_cse_reciprocals::execute (function
> continue;
>
> gimple_replace_ssa_lhs (stmt1, arg1);
> - gimple_call_set_fndecl (stmt1, fndecl);
> - update_stmt (stmt1);
> + if (gimple_call_internal_p (stmt1))
> + {
> + auto_vec<tree, 4> args;
> + for (unsigned int i = 0;
> + i < gimple_call_num_args (stmt1); i++)
> + args.safe_push (gimple_call_arg (stmt1, i));
> + gcall *stmt2 = gimple_build_call_vec (fndecl, args);
> + gimple_call_set_lhs (stmt2, arg1);
> + if (gimple_vdef (stmt1))
> + {
> + gimple_set_vdef (stmt2, gimple_vdef (stmt1));
> + SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
> + }
> + gimple_set_vuse (stmt2, gimple_vuse (stmt1));
> + gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt1);
> + gsi_replace (&gsi2, stmt2, true);
> + }
> + else
> + {
> + gimple_call_set_fndecl (stmt1, fndecl);
> + update_stmt (stmt1);
> + }
> reciprocal_stats.rfuncs_inserted++;
>
> FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
> --- gcc/config/i386/i386.c.jj 2015-11-25 09:49:57.000000000 +0100
> +++ gcc/config/i386/i386.c 2015-11-27 17:24:30.743625244 +0100
> @@ -42680,16 +42680,40 @@ ix86_vectorize_builtin_scatter (const_tr
> reciprocal of the function, or NULL_TREE if not available. */
>
> static tree
> -ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
> +ix86_builtin_reciprocal (gcall *call)
> {
> if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
> && flag_finite_math_only && !flag_trapping_math
> && flag_unsafe_math_optimizations))
> return NULL_TREE;
>
> - if (md_fn)
> + if (gimple_call_internal_p (call))
> + switch (gimple_call_internal_fn (call))
> + {
> + tree type;
> + case IFN_SQRT:
> + type = TREE_TYPE (gimple_call_lhs (call));
> + switch (TYPE_MODE (type))
> + {
> + /* Vectorized version of sqrt to rsqrt conversion. */
> + case V4SFmode:
> + return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
> +
> + case V8SFmode:
> + return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
> +
> + default:
> + return NULL_TREE;
> + }
> +
> + default:
> + return NULL_TREE;
> + }
> +
> + tree fndecl = gimple_call_fndecl (call);
> + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
> /* Machine dependent builtins. */
> - switch (fn)
> + switch (DECL_FUNCTION_CODE (fndecl))
> {
> /* Vectorized version of sqrt to rsqrt conversion. */
> case IX86_BUILTIN_SQRTPS_NR:
> @@ -42703,7 +42727,7 @@ ix86_builtin_reciprocal (unsigned int fn
> }
> else
> /* Normal builtins. */
> - switch (fn)
> + switch (DECL_FUNCTION_CODE (fndecl))
> {
> /* Sqrt to rsqrt conversion. */
> case BUILT_IN_SQRTF:
> --- gcc/config/rs6000/rs6000.c.jj 2015-11-26 10:41:04.000000000 +0100
> +++ gcc/config/rs6000/rs6000.c 2015-11-27 17:26:11.238203965 +0100
> @@ -32643,14 +32643,42 @@ rs6000_memory_move_cost (machine_mode mo
> reciprocal of the function, or NULL_TREE if not available. */
>
> static tree
> -rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
> - bool sqrt ATTRIBUTE_UNUSED)
> +rs6000_builtin_reciprocal (gcall *call)
> {
> if (optimize_insn_for_size_p ())
> return NULL_TREE;
>
> - if (md_fn)
> - switch (fn)
> + if (gimple_call_internal_p (call))
> + switch (gimple_call_internal_fn (call))
> + {
> + tree type;
> + case IFN_SQRT:
> + type = TREE_TYPE (gimple_call_lhs (call));
> + switch (TYPE_MODE (type))
> + {
> + case V2DFmode:
> + if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
> + return NULL_TREE;
> +
> + return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
> +
> + case V4SFmode:
> + if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
> + return NULL_TREE;
> +
> + return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
> +
> + default:
> + return NULL_TREE;
> + }
> +
> + default:
> + return NULL_TREE;
> + }
> +
> + tree fndecl = gimple_call_fndecl (call);
> + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
> + switch (DECL_FUNCTION_CODE (fndecl))
> {
> case VSX_BUILTIN_XVSQRTDP:
> if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
> @@ -32669,7 +32697,7 @@ rs6000_builtin_reciprocal (unsigned int
> }
>
> else
> - switch (fn)
> + switch (DECL_FUNCTION_CODE (fndecl))
> {
> case BUILT_IN_SQRT:
> if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
> --- gcc/config/aarch64/aarch64.c.jj 2015-11-23 17:13:34.000000000 +0100
> +++ gcc/config/aarch64/aarch64.c 2015-11-27 17:14:22.295230125 +0100
> @@ -7103,19 +7103,21 @@ aarch64_memory_move_cost (machine_mode m
> reciprocal square root builtins. */
>
> static tree
> -aarch64_builtin_reciprocal (unsigned int fn,
> - bool md_fn,
> - bool)
> +aarch64_builtin_reciprocal (gcall *call)
> {
> if (flag_trapping_math
> || !flag_unsafe_math_optimizations
> || optimize_size
> || ! (aarch64_tune_params.extra_tuning_flags
> & AARCH64_EXTRA_TUNE_RECIP_SQRT))
> - {
> return NULL_TREE;
> - }
>
> + if (gimple_call_internal_p (call)
Missing ')'?
Thanks,
bin
> + return NULL_TREE;
> +
> + tree fndecl = gimple_call_fndecl (call);
> + enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
> + bool md_fn = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
> return aarch64_builtin_rsqrt (fn, md_fn);
> }
>
> --- gcc/doc/tm.texi.jj 2015-11-18 11:19:16.000000000 +0100
> +++ gcc/doc/tm.texi 2015-11-27 16:48:40.388031894 +0100
> @@ -5608,14 +5608,10 @@ be placed in an @code{object_block} stru
> The default version returns true for all decls.
> @end deftypefn
>
> -@deftypefn {Target Hook} tree TARGET_BUILTIN_RECIPROCAL (unsigned @var{fn},
> bool @var{md_fn}, bool @var{sqrt})
> +@deftypefn {Target Hook} tree TARGET_BUILTIN_RECIPROCAL (gcall *@var{call})
> This hook should return the DECL of a function that implements reciprocal of
> -the builtin function with builtin function code @var{fn}, or
> -@code{NULL_TREE} if such a function is not available. @var{md_fn} is true
> -when @var{fn} is a code of a machine-dependent builtin function. When
> -@var{sqrt} is true, additional optimizations that apply only to the
> reciprocal
> -of a square root function are performed, and only reciprocals of @code{sqrt}
> -function are valid.
> +the builtin or internal function call @var{call}, or
> +@code{NULL_TREE} if such a function is not available.
> @end deftypefn
>
> @deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD (void)
>
> Jakub