On Sat, May 11, 2013 at 11:38 AM, Marc Glisse <[email protected]> wrote:
> Second try.
>
> I removed the fold_single_bit_test thing (I thought I'd handle it, so I
> started by the easy part, and never did the rest).
>
> Adapting invert_truthvalue_loc for vectors, I thought: calling
> fold_truth_not_expr and build1 if it fails is just the same as fold_build1.
> Except that it wasn't: fold_unary_loc fold_convert to boolean before calling
> fold_truth_not_expr and then back to the required type. And instead of
> simply changing the type of an EQ_EXPR, fold_convert introduces a NOP_EXPR
> (one that STRIP_NOPS doesn't remove), which hides the comparison from many
> other parts of the front-end (affects warnings) and folding. I hesitated
> between removing this cast and enhancing fold_convert, and chose the one
> that removes code. As a side benefit, I got an XPASS :-)
>
>
> Passes bootstrap+testsuite on x86_64-linux-gnu.
>
> 2013-05-11 Marc Glisse <[email protected]>
>
>
> gcc/
> * fold-const.c (fold_negate_expr): Handle vectors.
> (fold_truth_not_expr): Make it static.
> (invert_truthvalue_loc): Handle vectors. Do not call
> fold_truth_not_expr directly.
> (fold_unary_loc) <BIT_NOT_EXPR>: Handle vector comparisons.
> <TRUTH_NOT_EXPR>: Do not cast to boolean.
> (fold_comparison): Handle vector constants.
> (fold_ternary_loc) <VEC_COND_EXPR>: Adapt more COND_EXPR
> optimizations.
> * tree.h (fold_truth_not_expr): Remove declaration.
>
>
> gcc/testsuite/
> * g++.dg/ext/vector22.C: New testcase.
> * gcc.dg/binop-xor3.c: Remove xfail.
>
> --
> Marc Glisse
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c (revision 198796)
> +++ gcc/fold-const.c (working copy)
> @@ -519,21 +519,21 @@ fold_negate_expr (location_t loc, tree t
> {
> tree type = TREE_TYPE (t);
> tree tem;
>
> switch (TREE_CODE (t))
> {
> /* Convert - (~A) to A + 1. */
> case BIT_NOT_EXPR:
> if (INTEGRAL_TYPE_P (type))
> return fold_build2_loc (loc, PLUS_EXPR, type, TREE_OPERAND (t, 0),
> - build_int_cst (type, 1));
> + build_one_cst (type));
> break;
>
> case INTEGER_CST:
> tem = fold_negate_const (t, type);
> if (TREE_OVERFLOW (tem) == TREE_OVERFLOW (t)
> || !TYPE_OVERFLOW_TRAPS (type))
> return tem;
> break;
>
> case REAL_CST:
> @@ -3078,21 +3078,21 @@ omit_two_operands_loc (location_t loc, t
> }
>
>
> /* Return a simplified tree node for the truth-negation of ARG. This
> never alters ARG itself. We assume that ARG is an operation that
> returns a truth value (0 or 1).
>
> FIXME: one would think we would fold the result, but it causes
> problems with the dominator optimizer. */
>
> -tree
> +static tree
> fold_truth_not_expr (location_t loc, tree arg)
> {
> tree type = TREE_TYPE (arg);
> enum tree_code code = TREE_CODE (arg);
> location_t loc1, loc2;
>
> /* If this is a comparison, we can simply invert it, except for
> floating-point non-equality comparisons, in which case we just
> enclose a TRUTH_NOT_EXPR around what we have. */
>
> @@ -3215,38 +3215,33 @@ fold_truth_not_expr (location_t loc, tre
> return build1_loc (loc, CLEANUP_POINT_EXPR, type,
> invert_truthvalue_loc (loc1, TREE_OPERAND (arg,
> 0)));
>
> default:
> return NULL_TREE;
> }
> }
>
> /* Return a simplified tree node for the truth-negation of ARG. This
> never alters ARG itself. We assume that ARG is an operation that
> - returns a truth value (0 or 1).
> -
> - FIXME: one would think we would fold the result, but it causes
> - problems with the dominator optimizer. */
> + returns a truth value (0 or 1 for scalars, 0 or -1 for vectors). */
>
> tree
> invert_truthvalue_loc (location_t loc, tree arg)
> {
> - tree tem;
> -
> if (TREE_CODE (arg) == ERROR_MARK)
> return arg;
>
> - tem = fold_truth_not_expr (loc, arg);
> - if (!tem)
> - tem = build1_loc (loc, TRUTH_NOT_EXPR, TREE_TYPE (arg), arg);
> -
> - return tem;
> + tree type = TREE_TYPE (arg);
> + return fold_build1_loc (loc, VECTOR_TYPE_P (type)
> + ? BIT_NOT_EXPR
> + : TRUTH_NOT_EXPR,
> + type, arg);
> }
>
> /* Given a bit-wise operation CODE applied to ARG0 and ARG1, see if both
> operands are another bit-wise operation with a common input. If so,
> distribute the bit operations to save an operation and possibly two if
> constants are involved. For example, convert
> (A | B) & (A | C) into A | (B & C)
> Further simplification will occur if B and C are constants.
>
> If this optimization cannot be done, 0 will be returned. */
> @@ -8274,28 +8269,34 @@ fold_unary_loc (location_t loc, enum tre
> {
> elem = VECTOR_CST_ELT (arg0, i);
> elem = fold_unary_loc (loc, BIT_NOT_EXPR, TREE_TYPE (type),
> elem);
> if (elem == NULL_TREE)
> break;
> elements[i] = elem;
> }
> if (i == count)
> return build_vector (type, elements);
> }
> + else if (COMPARISON_CLASS_P (arg0) && VECTOR_INTEGER_TYPE_P (type))
> + {
> + tree op_type = TREE_TYPE (TREE_OPERAND (arg0, 0));
> + enum tree_code subcode = invert_tree_comparison (TREE_CODE (arg0),
> + HONOR_NANS (TYPE_MODE (op_type)));
> + if (subcode != ERROR_MARK)
> + return build2_loc (loc, subcode, type, TREE_OPERAND (arg0, 0),
> + TREE_OPERAND (arg0, 1));
> + }
> +
I wonder why you restrict this to VECTOR_INTEGER_TYPE_P - for
TYPE_PRECISION == 1 type this should work, too. Also there should
never be a comparison resulting in a non-integer vector type, no?
> return NULL_TREE;
>
> case TRUTH_NOT_EXPR:
> - /* The argument to invert_truthvalue must have Boolean type. */
> - if (TREE_CODE (TREE_TYPE (arg0)) != BOOLEAN_TYPE)
> - arg0 = fold_convert_loc (loc, boolean_type_node, arg0);
> -
> /* Note that the operand of this must be an int
> and its values must be 0 or 1.
> ("true" is a fixed value perhaps depending on the language,
> but we don't handle values other than 1 correctly yet.) */
> tem = fold_truth_not_expr (loc, arg0);
> if (!tem)
> return NULL_TREE;
> return fold_convert_loc (loc, type, tem);
>
> case REALPART_EXPR:
> @@ -9579,21 +9580,21 @@ fold_comparison (location_t loc, enum tr
> {
> tree cmp_type = TREE_TYPE (TREE_OPERAND (arg0, 0));
> return fold_build2_loc (loc, code, type,
> fold_convert_loc (loc, cmp_type,
> TREE_OPERAND (arg1, 0)),
> TREE_OPERAND (arg0, 0));
> }
>
> /* Fold ~X op C as X op' ~C, where op' is the swapped comparison. */
> if (TREE_CODE (arg0) == BIT_NOT_EXPR
> - && TREE_CODE (arg1) == INTEGER_CST)
> + && (TREE_CODE (arg1) == INTEGER_CST || TREE_CODE (arg1) ==
> VECTOR_CST))
> {
> tree cmp_type = TREE_TYPE (TREE_OPERAND (arg0, 0));
> return fold_build2_loc (loc, swap_tree_comparison (code), type,
> TREE_OPERAND (arg0, 0),
> fold_build1_loc (loc, BIT_NOT_EXPR, cmp_type,
> fold_convert_loc (loc, cmp_type,
> arg1)));
> }
>
> return NULL_TREE;
> }
> @@ -14030,61 +14031,67 @@ fold_ternary_loc (location_t loc, enum t
> return tem;
> }
>
> if (COMPARISON_CLASS_P (arg0)
> && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0),
> op2,
> TREE_OPERAND (arg0, 1))
> && !HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (op2))))
> {
> location_t loc0 = expr_location_or (arg0, loc);
> - tem = fold_truth_not_expr (loc0, arg0);
> + tem = fold_unary_loc (loc0, VECTOR_TYPE_P (type)
> + ? BIT_NOT_EXPR
> + : TRUTH_NOT_EXPR,
> + TREE_TYPE (arg0), arg0);
since you don't restrict it here either ....
> if (tem && COMPARISON_CLASS_P (tem))
> {
> tem = fold_cond_expr_with_comparison (loc, type, tem, op2,
> op1);
> if (tem)
> return tem;
> }
> }
>
> - /* ??? Fixup the code below for VEC_COND_EXPR. */
> - if (code == VEC_COND_EXPR)
> - return NULL_TREE;
> -
> /* If the second operand is simpler than the third, swap them
> since that produces better jump optimization results. */
> if (truth_value_p (TREE_CODE (arg0))
> && tree_swap_operands_p (op1, op2, false))
> {
> location_t loc0 = expr_location_or (arg0, loc);
> /* See if this can be inverted. If it can't, possibly because
> it was a floating-point inequality comparison, don't do
> anything. */
> - tem = fold_truth_not_expr (loc0, arg0);
> + tem = fold_unary_loc (loc0, VECTOR_TYPE_P (type)
> + ? BIT_NOT_EXPR
> + : TRUTH_NOT_EXPR,
> + TREE_TYPE (arg0), arg0);
Btw, splitting this out to a helper would be nice.
The rest of the patch looks ok to me.
Thanks,
Richard.
> if (tem)
> return fold_build3_loc (loc, code, type, tem, op2, op1);
> }
>
> /* Convert A ? 1 : 0 to simply A. */
> - if (integer_onep (op1)
> + if ((code == VEC_COND_EXPR ? integer_all_onesp (op1)
> + : (integer_onep (op1)
> + && !VECTOR_TYPE_P (type)))
> && integer_zerop (op2)
> /* If we try to convert OP0 to our type, the
> call to fold will try to move the conversion inside
> a COND, which will recurse. In that case, the COND_EXPR
> is probably the best choice, so leave it alone. */
> && type == TREE_TYPE (arg0))
> return pedantic_non_lvalue_loc (loc, arg0);
>
> /* Convert A ? 0 : 1 to !A. This prefers the use of NOT_EXPR
> over COND_EXPR in cases such as floating point comparisons. */
> if (integer_zerop (op1)
> - && integer_onep (op2)
> + && (code == VEC_COND_EXPR ? integer_all_onesp (op2)
> + : (integer_onep (op2)
> + && !VECTOR_TYPE_P (type)))
> && truth_value_p (TREE_CODE (arg0)))
> return pedantic_non_lvalue_loc (loc,
> fold_convert_loc (loc, type,
> invert_truthvalue_loc (loc,
>
> arg0)));
>
> /* A < 0 ? <sign bit of A> : 0 is simply (A & <sign bit of A>). */
> if (TREE_CODE (arg0) == LT_EXPR
> && integer_zerop (TREE_OPERAND (arg0, 1))
> && integer_zerop (op2)
> @@ -14187,60 +14194,73 @@ fold_ternary_loc (location_t loc, enum t
> && TREE_CODE (TREE_OPERAND (arg0, 0)) == BIT_AND_EXPR
> && operand_equal_p (TREE_OPERAND (TREE_OPERAND (arg0, 0), 1),
> arg1, OEP_ONLY_CONST))
> return pedantic_non_lvalue_loc (loc,
> fold_convert_loc (loc, type,
> TREE_OPERAND (arg0,
> 0)));
>
> /* Convert A ? B : 0 into A && B if A and B are truth values. */
> if (integer_zerop (op2)
> && truth_value_p (TREE_CODE (arg0))
> - && truth_value_p (TREE_CODE (arg1)))
> - return fold_build2_loc (loc, TRUTH_ANDIF_EXPR, type,
> - fold_convert_loc (loc, type, arg0),
> - arg1);
> + && truth_value_p (TREE_CODE (arg1))
> + && (code == VEC_COND_EXPR || !VECTOR_TYPE_P (type)))
> + return fold_build2_loc (loc, code == VEC_COND_EXPR ? BIT_AND_EXPR
> + :
> TRUTH_ANDIF_EXPR,
> + type, fold_convert_loc (loc, type, arg0),
> arg1);
>
> /* Convert A ? B : 1 into !A || B if A and B are truth values. */
> - if (integer_onep (op2)
> + if (code == VEC_COND_EXPR ? integer_all_onesp (op2) : integer_onep
> (op2)
> && truth_value_p (TREE_CODE (arg0))
> - && truth_value_p (TREE_CODE (arg1)))
> + && truth_value_p (TREE_CODE (arg1))
> + && (code == VEC_COND_EXPR || !VECTOR_TYPE_P (type)))
> {
> location_t loc0 = expr_location_or (arg0, loc);
> /* Only perform transformation if ARG0 is easily inverted. */
> - tem = fold_truth_not_expr (loc0, arg0);
> + tem = fold_unary_loc (loc0, VECTOR_TYPE_P (type)
> + ? BIT_NOT_EXPR
> + : TRUTH_NOT_EXPR,
> + TREE_TYPE (arg0), arg0);
> if (tem)
> - return fold_build2_loc (loc, TRUTH_ORIF_EXPR, type,
> - fold_convert_loc (loc, type, tem),
> - arg1);
> + return fold_build2_loc (loc, code == VEC_COND_EXPR
> + ? BIT_IOR_EXPR
> + : TRUTH_ORIF_EXPR,
> + type, fold_convert_loc (loc, type, tem),
> + arg1);
> }
>
> /* Convert A ? 0 : B into !A && B if A and B are truth values. */
> if (integer_zerop (arg1)
> && truth_value_p (TREE_CODE (arg0))
> - && truth_value_p (TREE_CODE (op2)))
> + && truth_value_p (TREE_CODE (op2))
> + && (code == VEC_COND_EXPR || !VECTOR_TYPE_P (type)))
> {
> location_t loc0 = expr_location_or (arg0, loc);
> /* Only perform transformation if ARG0 is easily inverted. */
> - tem = fold_truth_not_expr (loc0, arg0);
> + tem = fold_unary_loc (loc0, VECTOR_TYPE_P (type)
> + ? BIT_NOT_EXPR
> + : TRUTH_NOT_EXPR,
> + TREE_TYPE (arg0), arg0);
> if (tem)
> - return fold_build2_loc (loc, TRUTH_ANDIF_EXPR, type,
> - fold_convert_loc (loc, type, tem),
> - op2);
> + return fold_build2_loc (loc, code == VEC_COND_EXPR
> + ? BIT_AND_EXPR : TRUTH_ANDIF_EXPR,
> + type, fold_convert_loc (loc, type, tem),
> + op2);
> }
>
> /* Convert A ? 1 : B into A || B if A and B are truth values. */
> - if (integer_onep (arg1)
> + if (code == VEC_COND_EXPR ? integer_all_onesp (arg1) : integer_onep
> (arg1)
> && truth_value_p (TREE_CODE (arg0))
> - && truth_value_p (TREE_CODE (op2)))
> - return fold_build2_loc (loc, TRUTH_ORIF_EXPR, type,
> - fold_convert_loc (loc, type, arg0),
> - op2);
> + && truth_value_p (TREE_CODE (op2))
> + && (code == VEC_COND_EXPR || !VECTOR_TYPE_P (type)))
> + return fold_build2_loc (loc, code == VEC_COND_EXPR
> + ? BIT_IOR_EXPR : TRUTH_ORIF_EXPR,
> + type, fold_convert_loc (loc, type, arg0),
> op2);
>
> return NULL_TREE;
>
> case CALL_EXPR:
> /* CALL_EXPRs used to be ternary exprs. Catch any mistaken uses
> of fold_ternary on them. */
> gcc_unreachable ();
>
> case BIT_FIELD_REF:
> if ((TREE_CODE (arg0) == VECTOR_CST
> Index: gcc/testsuite/gcc.dg/binop-xor3.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/binop-xor3.c (revision 198796)
> +++ gcc/testsuite/gcc.dg/binop-xor3.c (working copy)
> @@ -1,11 +1,11 @@
> /* { dg-do compile } */
> /* { dg-options "-O2 -fdump-tree-optimized" } */
>
> int
> foo (int a, int b)
> {
> return ((a && !b) || (!a && b));
> }
>
> -/* { dg-final { scan-tree-dump-times "\\\^" 1 "optimized" { xfail *-*-* } }
> } */
> +/* { dg-final { scan-tree-dump-times "\\\^" 1 "optimized" } } */
> /* { dg-final { cleanup-tree-dump "optimized" } } */
> Index: gcc/testsuite/g++.dg/ext/vector22.C
> ===================================================================
> --- gcc/testsuite/g++.dg/ext/vector22.C (revision 0)
> +++ gcc/testsuite/g++.dg/ext/vector22.C (revision 0)
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-gimple" } */
> +
> +typedef unsigned vec __attribute__((vector_size(4*sizeof(int))));
> +
> +void f(vec*a,vec*b){
> + *a=(*a)?-1:(*b<10);
> + *b=(*b)?(*a<10):0;
> +}
> +void g(vec*a,vec*b){
> + *a=(*a)?(*a<*a):-1;
> + *b=(*b)?-1:(*b<*b);
> +}
> +void h(vec*a){
> + *a=(~*a==5);
> +}
> +
> +/* { dg-final { scan-tree-dump-not "~" "gimple" } } */
> +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR" "gimple" } } */
> +/* { dg-final { cleanup-tree-dump "gimple" } } */
>
> Property changes on: gcc/testsuite/g++.dg/ext/vector22.C
> ___________________________________________________________________
> Added: svn:keywords
> + Author Date Id Revision URL
> Added: svn:eol-style
> + native
>
> Index: gcc/tree.h
> ===================================================================
> --- gcc/tree.h (revision 198796)
> +++ gcc/tree.h (working copy)
> @@ -5758,21 +5758,20 @@ extern int operand_equal_p (const_tree,
> extern int multiple_of_p (tree, const_tree, const_tree);
> #define omit_one_operand(T1,T2,T3)\
> omit_one_operand_loc (UNKNOWN_LOCATION, T1, T2, T3)
> extern tree omit_one_operand_loc (location_t, tree, tree, tree);
> #define omit_two_operands(T1,T2,T3,T4)\
> omit_two_operands_loc (UNKNOWN_LOCATION, T1, T2, T3, T4)
> extern tree omit_two_operands_loc (location_t, tree, tree, tree, tree);
> #define invert_truthvalue(T)\
> invert_truthvalue_loc(UNKNOWN_LOCATION, T)
> extern tree invert_truthvalue_loc (location_t, tree);
> -extern tree fold_truth_not_expr (location_t, tree);
> extern tree fold_unary_to_constant (enum tree_code, tree, tree);
> extern tree fold_binary_to_constant (enum tree_code, tree, tree, tree);
> extern tree fold_read_from_constant_string (tree);
> extern tree int_const_binop (enum tree_code, const_tree, const_tree);
> #define build_fold_addr_expr(T)\
> build_fold_addr_expr_loc (UNKNOWN_LOCATION, (T))
> extern tree build_fold_addr_expr_loc (location_t, tree);
> #define build_fold_addr_expr_with_type(T,TYPE)\
> build_fold_addr_expr_with_type_loc (UNKNOWN_LOCATION, (T), TYPE)
> extern tree build_fold_addr_expr_with_type_loc (location_t, tree, tree);
>