On Mon, Jun 27, 2011 at 6:54 PM, Jakub Jelinek <ja...@redhat.com> wrote: > On Mon, Jun 27, 2011 at 12:17:40PM +0200, Richard Guenther wrote: >> Ok if you remove the builtins.c folding and instead verify arguments >> from check_builtin_function_arguments. > > Thanks, here is what I've committed after bootstrapping/regtesting > again on x86_64-linux and i686-linux.
Thanks Jakub. Probably worth an entry in changes.html. Richard. > 2011-06-27 Jakub Jelinek <ja...@redhat.com> > > * builtin-types.def (BT_FN_PTR_CONST_PTR_SIZE_VAR): New. > * builtins.def (BUILT_IN_ASSUME_ALIGNED): New builtin. > * tree-ssa-structalias.c (find_func_aliases_for_builtin_call, > find_func_clobbers): Handle BUILT_IN_ASSUME_ALIGNED. > * tree-ssa-ccp.c (bit_value_assume_aligned): New function. > (evaluate_stmt, execute_fold_all_builtins): Handle > BUILT_IN_ASSUME_ALIGNED. > * tree-ssa-dce.c (propagate_necessity): Likewise. > * tree-ssa-alias.c (ref_maybe_used_by_call_p_1, > call_may_clobber_ref_p_1): Likewise. > * builtins.c (is_simple_builtin, expand_builtin): Likewise. > (expand_builtin_assume_aligned): New function. > * doc/extend.texi (__builtin_assume_aligned): Document. > > * c-common.c (check_builtin_function_arguments): Handle > BUILT_IN_ASSUME_ALIGNED. > > * gcc.dg/builtin-assume-aligned-1.c: New test. > * gcc.dg/builtin-assume-aligned-2.c: New test. > * gcc.target/i386/builtin-assume-aligned-1.c: New test. > > --- gcc/builtin-types.def.jj 2011-06-26 09:55:16.000000000 +0200 > +++ gcc/builtin-types.def 2011-06-27 15:08:12.000000000 +0200 > @@ -454,6 +454,8 @@ DEF_FUNCTION_TYPE_VAR_2 (BT_FN_INT_CONST > BT_INT, BT_CONST_STRING, BT_CONST_STRING) > DEF_FUNCTION_TYPE_VAR_2 (BT_FN_INT_INT_CONST_STRING_VAR, > BT_INT, BT_INT, BT_CONST_STRING) > +DEF_FUNCTION_TYPE_VAR_2 (BT_FN_PTR_CONST_PTR_SIZE_VAR, BT_PTR, > + BT_CONST_PTR, BT_SIZE) > > DEF_FUNCTION_TYPE_VAR_3 (BT_FN_INT_STRING_SIZE_CONST_STRING_VAR, > BT_INT, BT_STRING, BT_SIZE, BT_CONST_STRING) > --- gcc/builtins.def.jj 2011-06-26 09:55:16.000000000 +0200 > +++ gcc/builtins.def 2011-06-27 15:08:12.000000000 +0200 > @@ -1,7 +1,7 @@ > /* This file contains the definitions and documentation for the > builtins used in the GNU compiler. > Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, > - 2010 Free Software Foundation, Inc. > + 2010, 2011 Free Software Foundation, Inc. > > This file is part of GCC. > > @@ -638,6 +638,7 @@ DEF_EXT_LIB_BUILTIN (BUILT_IN_EXE > DEF_EXT_LIB_BUILTIN (BUILT_IN_EXECVE, "execve", > BT_FN_INT_CONST_STRING_PTR_CONST_STRING_PTR_CONST_STRING, ATTR_NOTHROW_LIST) > DEF_LIB_BUILTIN (BUILT_IN_EXIT, "exit", BT_FN_VOID_INT, > ATTR_NORETURN_NOTHROW_LIST) > DEF_GCC_BUILTIN (BUILT_IN_EXPECT, "expect", BT_FN_LONG_LONG_LONG, > ATTR_CONST_NOTHROW_LEAF_LIST) > +DEF_GCC_BUILTIN (BUILT_IN_ASSUME_ALIGNED, "assume_aligned", > BT_FN_PTR_CONST_PTR_SIZE_VAR, ATTR_CONST_NOTHROW_LEAF_LIST) > DEF_GCC_BUILTIN (BUILT_IN_EXTEND_POINTER, "extend_pointer", > BT_FN_UNWINDWORD_PTR, ATTR_CONST_NOTHROW_LEAF_LIST) > DEF_GCC_BUILTIN (BUILT_IN_EXTRACT_RETURN_ADDR, "extract_return_addr", > BT_FN_PTR_PTR, ATTR_LEAF_LIST) > DEF_EXT_LIB_BUILTIN (BUILT_IN_FFS, "ffs", BT_FN_INT_INT, > ATTR_CONST_NOTHROW_LEAF_LIST) > --- gcc/tree-ssa-structalias.c.jj 2011-06-26 09:55:16.000000000 +0200 > +++ gcc/tree-ssa-structalias.c 2011-06-27 15:08:12.000000000 +0200 > @@ -4002,6 +4002,7 @@ find_func_aliases_for_builtin_call (gimp > case BUILT_IN_STPCPY_CHK: > case BUILT_IN_STRCAT_CHK: > case BUILT_IN_STRNCAT_CHK: > + case BUILT_IN_ASSUME_ALIGNED: > { > tree res = gimple_call_lhs (t); > tree dest = gimple_call_arg (t, (DECL_FUNCTION_CODE (fndecl) > @@ -4726,6 +4727,7 @@ find_func_clobbers (gimple origt) > return; > } > /* The following functions neither read nor clobber memory. */ > + case BUILT_IN_ASSUME_ALIGNED: > case BUILT_IN_FREE: > return; > /* Trampolines are of no interest to us. */ > --- gcc/tree-ssa-ccp.c.jj 2011-06-26 09:55:16.000000000 +0200 > +++ gcc/tree-ssa-ccp.c 2011-06-27 15:08:12.000000000 +0200 > @@ -1476,6 +1476,64 @@ bit_value_binop (enum tree_code code, tr > return val; > } > > +/* Return the propagation value when applying __builtin_assume_aligned to > + its arguments. */ > + > +static prop_value_t > +bit_value_assume_aligned (gimple stmt) > +{ > + tree ptr = gimple_call_arg (stmt, 0), align, misalign = NULL_TREE; > + tree type = TREE_TYPE (ptr); > + unsigned HOST_WIDE_INT aligni, misaligni = 0; > + prop_value_t ptrval = get_value_for_expr (ptr, true); > + prop_value_t alignval; > + double_int value, mask; > + prop_value_t val; > + if (ptrval.lattice_val == UNDEFINED) > + return ptrval; > + gcc_assert ((ptrval.lattice_val == CONSTANT > + && TREE_CODE (ptrval.value) == INTEGER_CST) > + || double_int_minus_one_p (ptrval.mask)); > + align = gimple_call_arg (stmt, 1); > + if (!host_integerp (align, 1)) > + return ptrval; > + aligni = tree_low_cst (align, 1); > + if (aligni <= 1 > + || (aligni & (aligni - 1)) != 0) > + return ptrval; > + if (gimple_call_num_args (stmt) > 2) > + { > + misalign = gimple_call_arg (stmt, 2); > + if (!host_integerp (misalign, 1)) > + return ptrval; > + misaligni = tree_low_cst (misalign, 1); > + if (misaligni >= aligni) > + return ptrval; > + } > + align = build_int_cst_type (type, -aligni); > + alignval = get_value_for_expr (align, true); > + bit_value_binop_1 (BIT_AND_EXPR, type, &value, &mask, > + type, value_to_double_int (ptrval), ptrval.mask, > + type, value_to_double_int (alignval), alignval.mask); > + if (!double_int_minus_one_p (mask)) > + { > + val.lattice_val = CONSTANT; > + val.mask = mask; > + gcc_assert ((mask.low & (aligni - 1)) == 0); > + gcc_assert ((value.low & (aligni - 1)) == 0); > + value.low |= misaligni; > + /* ??? Delay building trees here. */ > + val.value = double_int_to_tree (type, value); > + } > + else > + { > + val.lattice_val = VARYING; > + val.value = NULL_TREE; > + val.mask = double_int_minus_one; > + } > + return val; > +} > + > /* Evaluate statement STMT. > Valid only for assignments, calls, conditionals, and switches. */ > > @@ -1647,6 +1705,10 @@ evaluate_stmt (gimple stmt) > val = get_value_for_expr (gimple_call_arg (stmt, 0), true); > break; > > + case BUILT_IN_ASSUME_ALIGNED: > + val = bit_value_assume_aligned (stmt); > + break; > + > default:; > } > } > @@ -2186,6 +2248,11 @@ execute_fold_all_builtins (void) > result = integer_zero_node; > break; > > + case BUILT_IN_ASSUME_ALIGNED: > + /* Remove __builtin_assume_aligned. */ > + result = gimple_call_arg (stmt, 0); > + break; > + > case BUILT_IN_STACK_RESTORE: > result = optimize_stack_restore (i); > if (result) > --- gcc/tree-ssa-dce.c.jj 2011-06-26 09:55:16.000000000 +0200 > +++ gcc/tree-ssa-dce.c 2011-06-27 15:08:12.000000000 +0200 > @@ -837,7 +837,8 @@ propagate_necessity (struct edge_list *e > || DECL_FUNCTION_CODE (callee) == BUILT_IN_FREE > || DECL_FUNCTION_CODE (callee) == BUILT_IN_ALLOCA > || DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_SAVE > - || DECL_FUNCTION_CODE (callee) == > BUILT_IN_STACK_RESTORE)) > + || DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE > + || DECL_FUNCTION_CODE (callee) == > BUILT_IN_ASSUME_ALIGNED)) > continue; > > /* Calls implicitly load from memory, their arguments > --- gcc/tree-ssa-alias.c.jj 2011-06-26 09:55:16.000000000 +0200 > +++ gcc/tree-ssa-alias.c 2011-06-27 15:08:12.000000000 +0200 > @@ -1253,6 +1253,7 @@ ref_maybe_used_by_call_p_1 (gimple call, > case BUILT_IN_SINCOS: > case BUILT_IN_SINCOSF: > case BUILT_IN_SINCOSL: > + case BUILT_IN_ASSUME_ALIGNED: > return false; > /* __sync_* builtins and some OpenMP builtins act as threading > barriers. */ > @@ -1511,6 +1512,7 @@ call_may_clobber_ref_p_1 (gimple call, a > return false; > case BUILT_IN_STACK_SAVE: > case BUILT_IN_ALLOCA: > + case BUILT_IN_ASSUME_ALIGNED: > return false; > /* Freeing memory kills the pointed-to memory. More importantly > the call has to serve as a barrier for moving loads and stores > --- gcc/builtins.c.jj 2011-06-26 09:55:16.000000000 +0200 > +++ gcc/builtins.c 2011-06-27 15:13:34.000000000 +0200 > @@ -4604,6 +4604,23 @@ expand_builtin_expect (tree exp, rtx tar > return target; > } > > +/* Expand a call to __builtin_assume_aligned. We just return our first > + argument as the builtin_assume_aligned semantic should've been already > + executed by CCP. */ > + > +static rtx > +expand_builtin_assume_aligned (tree exp, rtx target) > +{ > + if (call_expr_nargs (exp) < 2) > + return const0_rtx; > + target = expand_expr (CALL_EXPR_ARG (exp, 0), target, VOIDmode, > + EXPAND_NORMAL); > + gcc_assert (!TREE_SIDE_EFFECTS (CALL_EXPR_ARG (exp, 1)) > + && (call_expr_nargs (exp) < 3 > + || !TREE_SIDE_EFFECTS (CALL_EXPR_ARG (exp, 2)))); > + return target; > +} > + > void > expand_builtin_trap (void) > { > @@ -5823,6 +5840,8 @@ expand_builtin (tree exp, rtx target, rt > return expand_builtin_va_copy (exp); > case BUILT_IN_EXPECT: > return expand_builtin_expect (exp, target); > + case BUILT_IN_ASSUME_ALIGNED: > + return expand_builtin_assume_aligned (exp, target); > case BUILT_IN_PREFETCH: > expand_builtin_prefetch (exp); > return const0_rtx; > @@ -13461,6 +13480,7 @@ is_simple_builtin (tree decl) > case BUILT_IN_OBJECT_SIZE: > case BUILT_IN_UNREACHABLE: > /* Simple register moves or loads from stack. */ > + case BUILT_IN_ASSUME_ALIGNED: > case BUILT_IN_RETURN_ADDRESS: > case BUILT_IN_EXTRACT_RETURN_ADDR: > case BUILT_IN_FROB_RETURN_ADDR: > --- gcc/doc/extend.texi.jj 2011-06-26 09:55:16.000000000 +0200 > +++ gcc/doc/extend.texi 2011-06-27 15:08:12.000000000 +0200 > @@ -7646,6 +7646,28 @@ int g (int c) > > @end deftypefn > > +@deftypefn {Built-in Function} void *__builtin_assume_aligned (const void > *@var{exp}, size_t @var{align}, ...) > +This function returns its first argument, and allows the compiler > +to assume that the returned pointer is at least @var{align} bytes > +aligned. This built-in can have either two or three arguments, > +if it has three, the third argument should have integer type, and > +if it is non-zero means misalignment offset. For example: > + > +@smallexample > +void *x = __builtin_assume_aligned (arg, 16); > +@end smallexample > + > +means that the compiler can assume x, set to arg, is at least > +16 byte aligned, while: > + > +@smallexample > +void *x = __builtin_assume_aligned (arg, 32, 8); > +@end smallexample > + > +means that the compiler can assume for x, set to arg, that > +(char *) x - 8 is 32 byte aligned. > +@end deftypefn > + > @deftypefn {Built-in Function} void __builtin___clear_cache (char > *@var{begin}, char *@var{end}) > This function is used to flush the processor's instruction cache for > the region of memory between @var{begin} inclusive and @var{end} > --- gcc/c-family/c-common.c.jj 2011-06-22 10:16:49.000000000 +0200 > +++ gcc/c-family/c-common.c 2011-06-27 15:18:39.000000000 +0200 > @@ -8166,6 +8166,18 @@ check_builtin_function_arguments (tree f > } > return false; > > + case BUILT_IN_ASSUME_ALIGNED: > + if (builtin_function_validate_nargs (fndecl, nargs, 2 + (nargs > 2))) > + { > + if (nargs >= 3 && TREE_CODE (TREE_TYPE (args[2])) != INTEGER_TYPE) > + { > + error ("non-integer argument 3 in call to function %qE", > fndecl); > + return false; > + } > + return true; > + } > + return false; > + > default: > return true; > } > --- gcc/testsuite/gcc.dg/builtin-assume-aligned-1.c.jj 2011-06-27 > 15:08:12.000000000 +0200 > +++ gcc/testsuite/gcc.dg/builtin-assume-aligned-1.c 2011-06-27 > 15:08:12.000000000 +0200 > @@ -0,0 +1,41 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > + > +void > +test1 (double *out1, double *out2, double *out3, double *in1, > + double *in2, int len) > +{ > + int i; > + double *__restrict o1 = __builtin_assume_aligned (out1, 16); > + double *__restrict o2 = __builtin_assume_aligned (out2, 16); > + double *__restrict o3 = __builtin_assume_aligned (out3, 16); > + double *__restrict i1 = __builtin_assume_aligned (in1, 16); > + double *__restrict i2 = __builtin_assume_aligned (in2, 16); > + for (i = 0; i < len; ++i) > + { > + o1[i] = i1[i] * i2[i]; > + o2[i] = i1[i] + i2[i]; > + o3[i] = i1[i] - i2[i]; > + } > +} > + > +void > +test2 (double *out1, double *out2, double *out3, double *in1, > + double *in2, int len) > +{ > + int i, align = 32, misalign = 16; > + out1 = __builtin_assume_aligned (out1, align, misalign); > + out2 = __builtin_assume_aligned (out2, align, 16); > + out3 = __builtin_assume_aligned (out3, 32, misalign); > + in1 = __builtin_assume_aligned (in1, 32, 16); > + in2 = __builtin_assume_aligned (in2, 32, 0); > + for (i = 0; i < len; ++i) > + { > + out1[i] = in1[i] * in2[i]; > + out2[i] = in1[i] + in2[i]; > + out3[i] = in1[i] - in2[i]; > + } > +} > + > +/* { dg-final { scan-tree-dump-not "__builtin_assume_aligned" "optimized" } > } */ > +/* { dg-final { cleanup-tree-dump "optimized" } } */ > --- gcc/testsuite/gcc.dg/builtin-assume-aligned-2.c.jj 2011-06-27 > 15:08:12.000000000 +0200 > +++ gcc/testsuite/gcc.dg/builtin-assume-aligned-2.c 2011-06-27 > 15:26:19.000000000 +0200 > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > + > +double *bar (void); > + > +void > +foo (double *ptr, int i) > +{ > + double *a = __builtin_assume_aligned (ptr, 16, 8, 7); /* { dg-error > "too many arguments to function" } */ > + double *b = __builtin_assume_aligned (bar (), 16); > + double *c = __builtin_assume_aligned (bar (), 16, 8); > + double *d = __builtin_assume_aligned (ptr, i, ptr); /* { dg-error > "non-integer argument 3 in call to function" } */ > + double *e = __builtin_assume_aligned (ptr, i, *ptr); /* { dg-error > "non-integer argument 3 in call to function" } */ > + *a = 0.0; > + *b = 0.0; > + *c = 0.0; > + *d = 0.0; > + *e = 0.0; > +} > --- gcc/testsuite/gcc.target/i386/builtin-assume-aligned-1.c.jj 2011-06-27 > 15:08:12.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/builtin-assume-aligned-1.c 2011-06-27 > 15:08:12.000000000 +0200 > @@ -0,0 +1,41 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -msse2 -mno-avx" } */ > + > +void > +test1 (double *out1, double *out2, double *out3, double *in1, > + double *in2, int len) > +{ > + int i; > + double *__restrict o1 = __builtin_assume_aligned (out1, 16); > + double *__restrict o2 = __builtin_assume_aligned (out2, 16); > + double *__restrict o3 = __builtin_assume_aligned (out3, 16); > + double *__restrict i1 = __builtin_assume_aligned (in1, 16); > + double *__restrict i2 = __builtin_assume_aligned (in2, 16); > + for (i = 0; i < len; ++i) > + { > + o1[i] = i1[i] * i2[i]; > + o2[i] = i1[i] + i2[i]; > + o3[i] = i1[i] - i2[i]; > + } > +} > + > +void > +test2 (double *out1, double *out2, double *out3, double *in1, > + double *in2, int len) > +{ > + int i, align = 32, misalign = 16; > + out1 = __builtin_assume_aligned (out1, align, misalign); > + out2 = __builtin_assume_aligned (out2, align, 16); > + out3 = __builtin_assume_aligned (out3, 32, misalign); > + in1 = __builtin_assume_aligned (in1, 32, 16); > + in2 = __builtin_assume_aligned (in2, 32, 0); > + for (i = 0; i < len; ++i) > + { > + out1[i] = in1[i] * in2[i]; > + out2[i] = in1[i] + in2[i]; > + out3[i] = in1[i] - in2[i]; > + } > +} > + > +/* { dg-final { scan-assembler-not "movhpd" } } */ > +/* { dg-final { scan-assembler-not "movlpd" } } */ > > > Jakub >