On Sun, Feb 23, 2014 at 12:09 PM, Marc Glisse <marc.gli...@inria.fr> wrote:
> Hello,
>
> a natural first step to optimize changes of rounding modes seems to be
> making these 2 functions builtins. I don't know exactly how far
> optimizations will be able to go (the fact that fesetround can fail
> complicates things a lot). What is included here:
>
> 1) fegetround is pure.
>
> 2) Neither function aliases (use or clobber) any memory. I expect this is
> likely not true on all platforms, some probably store the rounding mode in a
> global variable that is accessible through other means (though mixing direct
> accesses with calls to fe*etround seems a questionable style). Any opinion
> or advice here?
>
> Regtested on x86_64-linux-gnu, certainly not for 4.9.

Hohumm ... before making any of these functions less of a barrier than they
are (at least for loads and stores), shouldn't we think of, and fix, the lack of
any dependences between FP status word changes and actual arithmetic
instructions?

In fact, using 'pure' or 'not use/clobber memory' here is exactly walking
on shaking grounds.  Simply because we lack of a way to say that
this stmt uses/clobbers the FP state (fegetround would be 'const' when
following your logic in 2)).

Otherwise, what is it worth optimizing^breaking things even more than
we do now?

[not that I have an answer for the FP state dependency that I like]

Thanks,
Richard.

> 2014-02-23  Marc Glisse  <marc.gli...@inria.fr>
>
> gcc/
>         * builtins.def (BUILT_IN_FEGETROUND, BUILT_IN_FESETROUND): Add.
>         * tree-ssa-alias.c (ref_maybe_used_by_call_p_1,
>         call_may_clobber_ref_p_1): Handle them.
>
> gcc/testsuite/
>         * gcc.dg/tree-ssa/fegsetround.c: New file.
>
> --
> Marc Glisse
> Index: gcc/builtins.def
> ===================================================================
> --- gcc/builtins.def    (revision 208045)
> +++ gcc/builtins.def    (working copy)
> @@ -276,20 +276,22 @@ DEF_C99_BUILTIN        (BUILT_IN_EXPM1F,
>  DEF_C99_BUILTIN        (BUILT_IN_EXPM1L, "expm1l",
> BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
>  DEF_LIB_BUILTIN        (BUILT_IN_FABS, "fabs", BT_FN_DOUBLE_DOUBLE,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_C90RES_BUILTIN (BUILT_IN_FABSF, "fabsf", BT_FN_FLOAT_FLOAT,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_C90RES_BUILTIN (BUILT_IN_FABSL, "fabsl",
> BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_GCC_BUILTIN        (BUILT_IN_FABSD32, "fabsd32",
> BT_FN_DFLOAT32_DFLOAT32, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_GCC_BUILTIN        (BUILT_IN_FABSD64, "fabsd64",
> BT_FN_DFLOAT64_DFLOAT64, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_GCC_BUILTIN        (BUILT_IN_FABSD128, "fabsd128",
> BT_FN_DFLOAT128_DFLOAT128, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN        (BUILT_IN_FDIM, "fdim", BT_FN_DOUBLE_DOUBLE_DOUBLE,
> ATTR_MATHFN_FPROUNDING_ERRNO)
>  DEF_C99_BUILTIN        (BUILT_IN_FDIMF, "fdimf", BT_FN_FLOAT_FLOAT_FLOAT,
> ATTR_MATHFN_FPROUNDING_ERRNO)
>  DEF_C99_BUILTIN        (BUILT_IN_FDIML, "fdiml",
> BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
> +DEF_C99_BUILTIN        (BUILT_IN_FEGETROUND, "fegetround", BT_FN_INT,
> ATTR_PURE_NOTHROW_LEAF_LIST)
> +DEF_C99_BUILTIN        (BUILT_IN_FESETROUND, "fesetround", BT_FN_INT_INT,
> ATTR_NOTHROW_LEAF_LIST)
>  DEF_LIB_BUILTIN        (BUILT_IN_FLOOR, "floor", BT_FN_DOUBLE_DOUBLE,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_C90RES_BUILTIN (BUILT_IN_FLOORF, "floorf", BT_FN_FLOAT_FLOAT,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_C90RES_BUILTIN (BUILT_IN_FLOORL, "floorl",
> BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN        (BUILT_IN_FMA, "fma",
> BT_FN_DOUBLE_DOUBLE_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING)
>  DEF_C99_BUILTIN        (BUILT_IN_FMAF, "fmaf",
> BT_FN_FLOAT_FLOAT_FLOAT_FLOAT, ATTR_MATHFN_FPROUNDING)
>  DEF_C99_BUILTIN        (BUILT_IN_FMAL, "fmal",
> BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING)
>  DEF_C99_BUILTIN        (BUILT_IN_FMAX, "fmax", BT_FN_DOUBLE_DOUBLE_DOUBLE,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN        (BUILT_IN_FMAXF, "fmaxf", BT_FN_FLOAT_FLOAT_FLOAT,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN        (BUILT_IN_FMAXL, "fmaxl",
> BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN        (BUILT_IN_FMIN, "fmin", BT_FN_DOUBLE_DOUBLE_DOUBLE,
> ATTR_CONST_NOTHROW_LEAF_LIST)
> Index: gcc/testsuite/gcc.dg/tree-ssa/fegsetround.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/fegsetround.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/fegsetround.c (working copy)
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-std=c99 -O -fdump-tree-optimized" } */
> +
> +#include <fenv.h>
> +
> +int a;
> +int f ()
> +{
> +  a = 42;
> +  // don't read a
> +  int x = fegetround ();
> +  fesetround (x + 1);
> +  a = 0;
> +  return a;
> +}
> +int g ()
> +{
> +  a = 0;
> +  // don't write a
> +  int x = fegetround ();
> +  fesetround (x + 1);
> +  return a;
> +}
> +int h ()
> +{
> +  // pure
> +  return fegetround () - fegetround ();
> +}
> +
> +/* { dg-final { scan-tree-dump-times "return 0" 3 "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "a = 42" "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>
> Property changes on: gcc/testsuite/gcc.dg/tree-ssa/fegsetround.c
> ___________________________________________________________________
> Added: svn:keywords
> ## -0,0 +1 ##
> +Author Date Id Revision URL
> \ No newline at end of property
> Added: svn:eol-style
> ## -0,0 +1 ##
> +native
> \ No newline at end of property
> Index: gcc/tree-ssa-alias.c
> ===================================================================
> --- gcc/tree-ssa-alias.c        (revision 208045)
> +++ gcc/tree-ssa-alias.c        (working copy)
> @@ -1537,20 +1537,22 @@ ref_maybe_used_by_call_p_1 (gimple call,
>         case BUILT_IN_MODFF:
>         case BUILT_IN_MODFL:
>         case BUILT_IN_REMQUO:
>         case BUILT_IN_REMQUOF:
>         case BUILT_IN_REMQUOL:
>         case BUILT_IN_SINCOS:
>         case BUILT_IN_SINCOSF:
>         case BUILT_IN_SINCOSL:
>         case BUILT_IN_ASSUME_ALIGNED:
>         case BUILT_IN_VA_END:
> +       case BUILT_IN_FEGETROUND:
> +       case BUILT_IN_FESETROUND:
>           return false;
>         /* __sync_* builtins and some OpenMP builtins act as threading
>            barriers.  */
>  #undef DEF_SYNC_BUILTIN
>  #define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
>  #include "sync-builtins.def"
>  #undef DEF_SYNC_BUILTIN
>         case BUILT_IN_GOMP_ATOMIC_START:
>         case BUILT_IN_GOMP_ATOMIC_END:
>         case BUILT_IN_GOMP_BARRIER:
> @@ -1831,20 +1833,21 @@ call_may_clobber_ref_p_1 (gimple call, a
>         case BUILT_IN_STRNDUP:
>           /* Unix98 specifies that errno is set on allocation failure.  */
>           if (flag_errno_math
>               && targetm.ref_may_alias_errno (ref))
>             return true;
>           return false;
>         case BUILT_IN_STACK_SAVE:
>         case BUILT_IN_ALLOCA:
>         case BUILT_IN_ALLOCA_WITH_ALIGN:
>         case BUILT_IN_ASSUME_ALIGNED:
> +       case BUILT_IN_FESETROUND:
>           return false;
>         /* But posix_memalign stores a pointer into the memory pointed to
>            by its first argument.  */
>         case BUILT_IN_POSIX_MEMALIGN:
>           {
>             tree ptrptr = gimple_call_arg (call, 0);
>             ao_ref dref;
>             ao_ref_init_from_ptr_and_size (&dref, ptrptr,
>                                            TYPE_SIZE_UNIT (ptr_type_node));
>             return (refs_may_alias_p_1 (&dref, ref, false)
>

Reply via email to