[C++ PATCH 2/3] Support implicit parameter packs.
* parser.c (convert_generic_types_to_packs): New function to transform a range of implicitly introduced non-pack template parms to be parameter packs. (cp_parser_parameter_declaration_list): If a function parameter pack contains generic types, convert them to packs prior to grokdeclarator. --- gcc/cp/parser.c | 81 - 1 file changed, 74 insertions(+), 7 deletions(-) diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index b699ac4..10b9f72 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -2109,6 +2109,8 @@ static tree cp_parser_late_parsing_omp_declare_simd static tree synthesize_implicit_template_parm (cp_parser *); +static tree convert_generic_types_to_packs + (tree, tree, int, int); static tree finish_fully_implicit_template (cp_parser *, tree); @@ -18069,7 +18071,7 @@ static tree cp_parser_parameter_declaration_list (cp_parser* parser, bool *is_error) { tree parameters = NULL_TREE; - tree *tail = ¶meters; + tree *tail = ¶meters; bool saved_in_unbraced_linkage_specification_p; int index = 0; @@ -18078,7 +18080,7 @@ cp_parser_parameter_declaration_list (cp_parser* parser, bool *is_error) /* The special considerations that apply to a function within an unbraced linkage specifications do not apply to the parameters to the function. */ - saved_in_unbraced_linkage_specification_p + saved_in_unbraced_linkage_specification_p = parser->in_unbraced_linkage_specification_p; parser->in_unbraced_linkage_specification_p = false; @@ -18088,6 +18090,10 @@ cp_parser_parameter_declaration_list (cp_parser* parser, bool *is_error) cp_parameter_declarator *parameter; tree decl = error_mark_node; bool parenthesized_p = false; + int template_parm_idx = (parser->num_template_parameter_lists? + TREE_VEC_LENGTH (INNERMOST_TEMPLATE_PARMS + (current_template_parms)) : 0); + /* Parse the parameter. */ parameter = cp_parser_parameter_declaration (parser, @@ -18099,11 +18105,30 @@ cp_parser_parameter_declaration_list (cp_parser* parser, bool *is_error) deprecated_state = DEPRECATED_SUPPRESS; if (parameter) - decl = grokdeclarator (parameter->declarator, - ¶meter->decl_specifiers, - PARM, - parameter->default_argument != NULL_TREE, - ¶meter->decl_specifiers.attributes); + { + /* If a function parameter pack was specified and an implicit template +parameter was introduced during cp_parser_parameter_declaration, +change any implicit parameters introduced into packs. */ + if (parser->implicit_template_parms + && parameter->declarator + && parameter->declarator->parameter_pack_p) + { + int latest_template_parm_idx = TREE_VEC_LENGTH + (INNERMOST_TEMPLATE_PARMS (current_template_parms)); + + if (latest_template_parm_idx != template_parm_idx) + parameter->decl_specifiers.type = convert_generic_types_to_packs + (parameter->decl_specifiers.type, + current_template_parms, + template_parm_idx, latest_template_parm_idx); + } + + decl = grokdeclarator (parameter->declarator, +¶meter->decl_specifiers, +PARM, +parameter->default_argument != NULL_TREE, +¶meter->decl_specifiers.attributes); + } deprecated_state = DEPRECATED_NORMAL; @@ -31213,6 +31238,48 @@ synthesize_implicit_template_parm (cp_parser *parser) return new_type; } +/* Convert the generic type parameters in PARM that match the types given in the + range [START_IDX, END_IDX) from the template parameters CURRENT into generic + type packs. */ + +tree +convert_generic_types_to_packs (tree parm, + tree current, int start_idx, int end_idx) +{ + int depth = TMPL_PARMS_DEPTH (current); + current = INNERMOST_TEMPLATE_PARMS (current); + tree replacement = make_tree_vec (TREE_VEC_LENGTH (current)); + + for (int i = start_idx; i < end_idx; ++i) +{ + /* Create a distinct parameter pack type from the current parm and add it +to the replacement args to tsubst below into the generic function +parameter. */ + + tree t = copy_type (TREE_TYPE (TREE_VALUE (TREE_VEC_ELT (current, i; + TYPE_STUB_DECL (t) = TYPE_NAME (t) = TEMPLATE_TYPE_DECL (t); + TYPE_MAIN_VARIANT (t) = t; + TEMPLATE_TYPE_PARAMETER_PACK (t) = true; + SET_TYPE_STRUCTURAL_EQUALITY (t); + TREE_VEC_ELT (replacement, i) = t; +} + + if (depth > 1) +{ + /* Build up a tree vec of
Re: [SKETCH] Refactor implicit function template implementation and fix 58534, 58536, 58548, 58549 and 58637.
Hi Jason, I've got the tsubst solution for implicit parameter packs working now. I've also improved the efficiency of incremental template parameter synthesis and added some testcases. All C++14 generic lambda examples pass and no new regressions. Cheers, Adam Patch summary (3): Refactor implicit function template implementation and fix 58534, 58536, 58548, 58549 and 58637. Support implicit parameter packs. Add some generic lambda test cases. gcc/cp/decl.c | 30 +- gcc/cp/parser.c| 331 +++-- gcc/cp/parser.h| 19 ++ gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C | 25 ++ gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C| 42 +++ gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C | 10 + gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C| 51 .../g++.dg/cpp1y/lambda-generic-variadic.C | 15 + gcc/testsuite/g++.dg/cpp1y/lambda-generic-x.C | 25 ++ gcc/testsuite/g++.dg/cpp1y/lambda-generic-xcfun.C | 25 ++ gcc/testsuite/g++.dg/cpp1y/lambda-generic-xudt.C | 4 + gcc/testsuite/g++.dg/cpp1y/lambda-generic.C| 23 ++ gcc/testsuite/g++.dg/cpp1y/pr58534.C | 9 + gcc/testsuite/g++.dg/cpp1y/pr58536.C | 12 + gcc/testsuite/g++.dg/cpp1y/pr58548.C | 10 + gcc/testsuite/g++.dg/cpp1y/pr58549.C | 10 + gcc/testsuite/g++.dg/cpp1y/pr58637.C | 7 + gcc/tree.c | 22 ++ gcc/tree.h | 5 + 19 files changed, 561 insertions(+), 114 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-variadic.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-x.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-xcfun.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-xudt.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58534.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58536.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58548.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58549.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58637.C -- 1.8.4
[C++ PATCH 3/3] Add some generic lambda test cases.
gcc/testsuite/g++.dg/cpp1y/ * lambda-generic.C: New test case. * lambda-generic-cfun.C: New test case. * lambda-generic-dep.C: New test case. * lambda-generic-udt.C: New test case. * lambda-generic-variadic.C: New test case. * lambda-generic-x.C: New test case. * lambda-generic-xcfun.C: New test case. * lambda-generic-xudt.C: New test case. * lambda-generic-mixed.C: New test case. --- gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C | 25 +++ gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C| 42 ++ gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C | 10 + gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C| 51 ++ .../g++.dg/cpp1y/lambda-generic-variadic.C | 15 +++ gcc/testsuite/g++.dg/cpp1y/lambda-generic-x.C | 25 +++ gcc/testsuite/g++.dg/cpp1y/lambda-generic-xcfun.C | 25 +++ gcc/testsuite/g++.dg/cpp1y/lambda-generic-xudt.C | 4 ++ gcc/testsuite/g++.dg/cpp1y/lambda-generic.C| 23 ++ 9 files changed, 220 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-variadic.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-x.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-xcfun.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-xudt.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic.C diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C new file mode 100644 index 000..5e51526 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C @@ -0,0 +1,25 @@ +// Generic lambda conversion to function ptr test from N3690 5.1.2.6 +// { dg-options "-std=c++1y" } + +void f1(int (*)(int)) { } +void f2(char (*)(int)) { } +void g(int (*)(int)) { } // #1 +void g(char (*)(char)) { } // #2 +void h(int (*)(int)) { } // #3 +void h(char (*)(int)) { } // #4 + +int main() +{ + auto glambda = [](auto a) { return a; }; + int (*fp)(int) = glambda; + f1(glambda); // OK + f2(glambda); // { dg-error "invalid user-defined conversion" } + g(glambda); // { dg-error "ambiguous" } + h(glambda); // OK: calls #3 since it is convertible from ID + int& (*fpi)(int*) = [](auto* a) -> auto& { return *a; }; // OK + + auto GL = [](auto a) { return a; }; + int (*GL_int)(int) = GL; // OK: through conversion function template + GL_int(3); +} + diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C new file mode 100644 index 000..bb68738 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C @@ -0,0 +1,42 @@ +// Generic lambda type dependence test part from N3690 5.1.2.12 +// { dg-options "-std=c++1y" } + +void f(int, const int (&)[2] = {}) { } // #1 +void f(const int&, const int (&)[1]) { } // #2 + +void test() +{ + const int x = 17; + auto g = [](auto a) { +f(x); // OK: calls #1, does not capture x + }; + auto g2 = [=](auto a) { +int selector[sizeof(a) == 1 ? 1 : 2]{}; +f(x, selector); // OK: is a dependent expression, so captures x + }; +} + +struct S { + struct N { +auto test () { return 7.f; } + }; +}; + +#include + +int main() +{ + auto f = [] (T const& s) mutable { +typename T::N x; +return x.test (); + }; + auto g = [] (auto const& s) { +typename std::decay::type::N x; +return x.test (); + }; + + S i; + f(i); + g(i); +} + diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C new file mode 100644 index 000..4e26fc5 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C @@ -0,0 +1,10 @@ +// Mixed explicit and implicit generic lambda test. +// { dg-options "-std=c++1y" } + +int main() +{ + auto f = [] (T a, auto b) { return a + b; }; + auto g = [] (auto a, T b) { return a + b; }; + + return f (1.0, 3) + g (1.0, 3); +} diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C new file mode 100644 index 000..9f6d45a --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C @@ -0,0 +1,51 @@ +// Ensure that generic lambdas properly construct and destroy user types. +// { dg-options "-std=c++1y -DUSE_AUTO_SYNTAX" } +// { dg-do run } + +int i = 3; + +struct S +{ + S () { ++i; } + S (S const&) { ++i; } + S (S&& old) { old.shadow = true; i += 2; } + ~S () { if (shadow) i -= 2; else --i; } + + bool shadow = false; +}; + +extern "C" void printf(...); +#define assert(e) if (e); else \ +
[C++ PATCH 1/3] Refactor implicit function template implementation and fix 58534, 58536, 58548, 58549 and 58637.
gcc/ * tree.c (grow_tree_vec_stat): New function ... * tree.h (grow_tree_vec_stat) (grow_tree_vec): ... and its declaration and macro front-end. gcc/cp/ PR c++/58534 PR c++/58536 PR c++/58548 PR c++/58549 PR c++/58637 * parser.h (struct cp_parser): New members implicit_template_parms, implicit_template_scope and auto_is_implicit_function_template_parm_p. * parser.c (add_implicit_template_parms): Refactor as ... (synthesize_implicit_template_parm): ... this to append a new template type parm to the current template parameter list (introducing a new list if necessary). (cp_parser_new): Initialize new cp_parser members. (cp_parser_parameter_declaration_clause): Consider auto as implicit template parm when parsing a parameter declaration (unless paring an explicit specialization). (cp_parser_parameter_declaration_list): Remove local implicit_template_parms counter and reset cp_parser implicit template state when complete. (cp_parser_lambda_expression): Reset implicit template cp_parser members whilst generating lambda class. (cp_parser_function_definition_after_declarator): Reset implicit template cp_parser members whilst parsing function definition. (make_generic_type_name): Respell '' as 'auto:N' which works better with template diagnostics. (cp_parser_simple_type_specifier): Synthesize implicit template parm on parsing 'auto' if auto_is_implicit_function_template_parm_p and provide diagnostics ... * decl.c (grokdeclarator): ... that were previously done here. gcc/testsuite/g++.dg/ * cpp1y/pr58534.C: New testcase. * cpp1y/pr58536.C: New testcase. * cpp1y/pr58548.C: New testcase. * cpp1y/pr58549.C: New testcase. * cpp1y/pr58637.C: New testcase. --- gcc/cp/decl.c| 30 +--- gcc/cp/parser.c | 278 +++ gcc/cp/parser.h | 19 +++ gcc/testsuite/g++.dg/cpp1y/pr58534.C | 9 ++ gcc/testsuite/g++.dg/cpp1y/pr58536.C | 12 ++ gcc/testsuite/g++.dg/cpp1y/pr58548.C | 10 ++ gcc/testsuite/g++.dg/cpp1y/pr58549.C | 10 ++ gcc/testsuite/g++.dg/cpp1y/pr58637.C | 7 + gcc/tree.c | 22 +++ gcc/tree.h | 5 + 10 files changed, 281 insertions(+), 121 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58534.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58536.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58548.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58549.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58637.C diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 09c1daa..786814c 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -10375,33 +10375,11 @@ grokdeclarator (const cp_declarator *declarator, if (type_uses_auto (type)) { - if (template_parm_flag) - { - error ("template parameter declared %"); - type = error_mark_node; - } - else if (decl_context == CATCHPARM) - { - error ("catch parameter declared %"); - type = error_mark_node; - } - else if (current_class_type && LAMBDA_TYPE_P (current_class_type)) - { - if (cxx_dialect < cxx1y) - pedwarn (location_of (type), 0, -"use of % in lambda parameter declaration " -"only available with " -"-std=c++1y or -std=gnu++1y"); - } - else if (cxx_dialect < cxx1y) - pedwarn (location_of (type), 0, -"use of % in parameter declaration " -"only available with " -"-std=c++1y or -std=gnu++1y"); + if (cxx_dialect >= cxx1y) + error ("% parameter not permitted in this context"); else - pedwarn (location_of (type), OPT_Wpedantic, -"ISO C++ forbids use of % in parameter " -"declaration"); + error ("parameter declared %"); + type = error_mark_node; } /* A parameter declared as an array of T is really a pointer to T. diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index bbc8e75..b699ac4 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -2107,8 +2107,8 @@ static bool cp_parser_ctor_initializer_opt_and_function_body static tree cp_parser_late_parsing_omp_declare_simd (cp_parser *, tree); -static tree add_implicit_template_parms - (cp_parser *, size_t, tree); +static tree synthesize_implicit_template_parm + (cp_parser *); static tree finish_fully_implicit_template (cp_parser *, tree); @@ -3443,7 +3443,10 @@ cp_parser_new (void) parser->num_template_parameter_lists = 0; /* Not decla
[wide-int] Fix aarch{32,64} builds
I decided to lump these together since the problems were the same. There were some typos in the real_to_integer invocation, while changing: /* There must be no padding. */ if (!host_integerp (TYPE_SIZE (type), 1) || (tree_low_cst (TYPE_SIZE (type), 1) != count * GET_MODE_BITSIZE (*modep))) return -1; to: if (!tree_fits_uhwi_p (TYPE_SIZE (type)) || (tree_to_uhwi (TYPE_SIZE (type)) != count * GET_MODE_BITSIZE (*modep))) return -1; introduced a signed/unsigned warning. Tested on aarch64-linux-gnueabi & arm-linux-gnueabi and applied as obvious. Thanks, Richard Index: gcc/config/aarch64/aarch64.c === --- gcc/config/aarch64/aarch64.c(revision 204311) +++ gcc/config/aarch64/aarch64.c(working copy) @@ -6030,9 +6030,7 @@ - tree_to_uhwi (TYPE_MIN_VALUE (index))); /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || (tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -6060,9 +6058,7 @@ } /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || (tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -6092,9 +6088,7 @@ } /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || (tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -7433,7 +7427,7 @@ int exponent; unsigned HOST_WIDE_INT mantissa, mask; REAL_VALUE_TYPE r, m; - bool &fail + bool fail; if (!CONST_DOUBLE_P (x)) return false; @@ -7457,7 +7451,7 @@ WARNING: If we ever have a representation using more than 2 * H_W_I - 1 bits for the mantissa, this can fail (low bits will be lost). */ real_ldexp (&m, &r, point_pos - exponent); - w = real_to_integer (m, &fail, HOST_BITS_PER_WIDE_INT * 2); + wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2); /* If the low part of the mantissa has bits set we cannot represent the value. */ Index: gcc/config/arm/arm.c === --- gcc/config/arm/arm.c(revision 204311) +++ gcc/config/arm/arm.c(working copy) @@ -4678,9 +4678,7 @@ - tree_to_uhwi (TYPE_MIN_VALUE (index))); /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || (tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -4708,9 +4706,7 @@ } /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || (tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -4740,9 +4736,7 @@ } /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || (tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -11299,7 +11293,7 @@ WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1 bits for the mantissa, this may fail (low bits would be lost). */ real_ldexp (&m, &r, point_pos - exponent); - wide_int w = real_to_integer (m, &fail, HOST_BITS_PER_WIDE_INT * 2); + wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2); mantissa = w.elt (0); mant_hi = w.elt (1);
[wide-int] Fix inverted bfin test
A test for "size > 8" had got inverted. Tested on bfin-elf and applied as obvious. Thanks, Richard Index: gcc/config/bfin/bfin.c === --- gcc/config/bfin/bfin.c (revision 204311) +++ gcc/config/bfin/bfin.c (working copy) @@ -3286,7 +3286,7 @@ memcpy can use 32 bit loads/stores. */ if (TYPE_SIZE (type) && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST - && !wi::gtu_p (TYPE_SIZE (type), 8) + && wi::gtu_p (TYPE_SIZE (type), 8) && align < 32) return 32; return align;
[wide-int] Fix exact_log2 zext test
...which I'd fluffed when doing the wi:: conversion. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixed a spurious gcc.dg difference on alpha-linux-gnu. Applied as obvious. Thanks, Richard Index: gcc/wide-int.cc === --- gcc/wide-int.cc (revision 204311) +++ gcc/wide-int.cc (working copy) @@ -2044,7 +2044,7 @@ /* Get a zero-extended form of block CRUX. */ unsigned HOST_WIDE_INT hwi = x.val[crux]; - if (crux * HOST_BITS_PER_WIDE_INT > x.precision) + if ((crux + 1) * HOST_BITS_PER_WIDE_INT > x.precision) hwi = zext_hwi (hwi, x.precision % HOST_BITS_PER_WIDE_INT); /* Now it's down to whether HWI is a power of 2. */
[wide-int] Fix mask inversions in expand_copysign_bit
At some point the original "~"s got straightened to "-"s. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. This fixed a testsuite difference in bfin-elf. Applied as obvious. Thanks, Richard Index: gcc/optabs.c === --- gcc/optabs.c(revision 204311) +++ gcc/optabs.c(working copy) @@ -3678,7 +3678,7 @@ if (!op0_is_abs) op0_piece = expand_binop (imode, and_optab, op0_piece, - immed_wide_int_const (-mask, imode), + immed_wide_int_const (~mask, imode), NULL_RTX, 1, OPTAB_LIB_WIDEN); op1 = expand_binop (imode, and_optab, operand_subword_force (op1, i, mode), @@ -3708,7 +3708,7 @@ op0 = gen_lowpart (imode, op0); if (!op0_is_abs) op0 = expand_binop (imode, and_optab, op0, - immed_wide_int_const (-mask, imode), + immed_wide_int_const (~mask, imode), NULL_RTX, 1, OPTAB_LIB_WIDEN); temp = expand_binop (imode, ior_optab, op0, op1, gen_lowpart (imode, target), 1, OPTAB_LIB_WIDEN);
[wide-int] Do not treat rtxes as sign-extended
Bah. After all that effort, it turns out that -- by design -- there is one special case where CONST_INTs are not sign-extended. Nonzero/true BImode integers are stored as STORE_FLAG_VALUE, which can be 1 rather than -1. So (const_int 1) can be a valid BImode integer -- and consequently (const_int -1) can be wrong -- even though BImode only has 1 bit. It might be nice to change that, but for wide-int I think we should just treat rtxes like trees for now. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some ICEs seen on bfin-elf. OK to install? Thanks, Richard Index: gcc/rtl.h === --- gcc/rtl.h (revision 204311) +++ gcc/rtl.h (working copy) @@ -1408,7 +1408,9 @@ { static const enum precision_type precision_type = VAR_PRECISION; static const bool host_dependent_precision = false; -static const bool is_sign_extended = true; +/* This ought to be true, except for the special case that BImode + is canonicalized to STORE_FLAG_VALUE, which might be 1. */ +static const bool is_sign_extended = false; static unsigned int get_precision (const rtx_mode_t &); static wi::storage_ref decompose (HOST_WIDE_INT *, unsigned int, const rtx_mode_t &); @@ -1430,10 +1432,6 @@ switch (GET_CODE (x.first)) { case CONST_INT: - if (precision < HOST_BITS_PER_WIDE_INT) - gcc_checking_assert (INTVAL (x.first) -== sext_hwi (INTVAL (x.first), precision)); - return wi::storage_ref (&INTVAL (x.first), 1, precision); case CONST_WIDE_INT:
Re: [wide-int] Do not treat rtxes as sign-extended
On Nov 2, 2013, at 3:30 AM, Richard Sandiford wrote: > Bah. After all that effort, it turns out that -- by design -- > there is one special case where CONST_INTs are not sign-extended. I'm thinking this needs commentary in wide-int.h, though, not sure what we'd say...
[wide-int] Another tweak to convert_modes
It turns out that when doing a vector shift by 2, the optab routine passes (const_int 2) to convert_modes with oldmode set to the mode of the shift (e.g. something like V8HI). When the target mode is a real integer mode like SImode, mainline just ignores that oldmode and returns a (const_int 2) regardless, but wide-int doesn't. Saying that (const_int 2) has a vector mode is almost certainly a bug that ought to be trapped by an assert, but we're not trying to fight that battle here. The current code: if (CONST_SCALAR_INT_P (x) && GET_MODE_CLASS (mode) == MODE_INT && (oldmode == VOIDmode || GET_MODE_CLASS (oldmode) == MODE_INT)) is already coping with bogus oldmodes, just not in the way that other routines seem to expect. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. This fixed several testsuite changes on the ARM targets. OK to install? Thanks, Richard Index: gcc/expr.c === --- gcc/expr.c 2013-11-02 10:34:44.083635650 + +++ gcc/expr.c 2013-11-02 10:42:55.179233840 + @@ -712,13 +712,12 @@ convert_modes (enum machine_mode mode, e return x; if (CONST_SCALAR_INT_P (x) - && GET_MODE_CLASS (mode) == MODE_INT - && (oldmode == VOIDmode || GET_MODE_CLASS (oldmode) == MODE_INT)) + && GET_MODE_CLASS (mode) == MODE_INT) { /* If the caller did not tell us the old mode, then there is not much to do with respect to canonization. We have to assume that all the bits are significant. */ - if (oldmode == VOIDmode) + if (GET_MODE_CLASS (oldmode) != MODE_INT) oldmode = MAX_MODE_INT; wide_int w = wide_int::from (std::make_pair (x, oldmode), GET_MODE_PRECISION (mode),
[wide-int] doloop fixes
The first part of this is a simple type mismatch -- get_max_loop_iterations returns a widest_int aka max_wide_int -- and I'd have installed it as obvious. The second part isn't as obvious though. The old code stored the maximum iterations as: if (!get_max_loop_iterations (loop, &iter) || !iter.fits_shwi ()) iterations_max = const0_rtx; else iterations_max = GEN_INT (iter.to_shwi ()); and the new code uses: if (!get_max_loop_iterations (loop, &iter) || !wi::fits_shwi_p (iter)) iterations_max = const0_rtx; else iterations_max = immed_wide_int_const (iter, mode); which includes an extra canonicalisation. I agree it would be good to do that in principle, but I'm not sure it copes correctly with the case where the loop iterates 1 << GET_MODE_PRECISION (mode) times. Plus I think the real fix would be to avoid the host dependence altogether, i.e. get rid of the fits_shwi_p too. As it stands, this breaks bfin-elf's pattern, which has: /* Due to limitations in the hardware (an initial loop count of 0 does not loop 2^32 times) we must avoid to generate a hardware loops when we cannot rule out this case. */ if (!flag_unsafe_loop_optimizations && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x) FAIL; With the sign-extending conversion, this now triggers more often than it was supposed to. Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too, and since we still use that form in the doloop_begin code, I think we should just back the immed_wide_int_const out. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some unwanted testsuite changes in bfin-elf. OK to install? Thanks, Richard Index: gcc/loop-doloop.c === --- gcc/loop-doloop.c 2013-11-02 10:49:37.463178153 + +++ gcc/loop-doloop.c 2013-11-02 10:49:55.927314661 + @@ -549,7 +549,7 @@ doloop_modify (struct loop *loop, struct { rtx init; unsigned level = get_loop_level (loop) + 1; -wide_int iter; +widest_int iter; rtx iter_rtx; if (!get_max_loop_iterations (loop, &iter) @@ -673,7 +673,7 @@ doloop_optimize (struct loop *loop) || !wi::fits_shwi_p (iter)) iterations_max = const0_rtx; else -iterations_max = immed_wide_int_const (iter, mode); +iterations_max = GEN_INT (iter.to_shwi ()); level = get_loop_level (loop) + 1; /* Generate looping insn. If the pattern FAILs then give up trying
[wide-int] Fix fold_ternary VEC_PERM_EXPR handling
The wide-int conversion for the fold_ternary VEC_PERM_EXPR case converted a mask of valid element numbers to an element count, which is one greater. The old code set need_mask_canon if an index was greater than the mask, but the new code sets it if an index is greater than the element count, giving an off-by-one error. This patch restores the mainline mask handling and uses that in the gtu_p call instead. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some unwanted testsuite differences for one of the ARM targets. OK to install? Thanks, Richard Index: gcc/fold-const.c === --- gcc/fold-const.c2013-11-02 11:07:33.097149207 + +++ gcc/fold-const.c2013-11-02 11:07:38.107188425 + @@ -14360,7 +14360,7 @@ fold_ternary_loc (location_t loc, enum t case VEC_PERM_EXPR: if (TREE_CODE (arg2) == VECTOR_CST) { - unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i; + unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask; unsigned char *sel = XALLOCAVEC (unsigned char, nelts); bool need_mask_canon = false; bool all_in_vec0 = true; @@ -14368,23 +14368,22 @@ fold_ternary_loc (location_t loc, enum t bool maybe_identity = true; bool single_arg = (op0 == op1); bool changed = false; - int nelts_cnt = single_arg ? nelts : nelts * 2; + mask = single_arg ? (nelts - 1) : (2 * nelts - 1); gcc_assert (nelts == VECTOR_CST_NELTS (arg2)); for (i = 0; i < nelts; i++) { tree val = VECTOR_CST_ELT (arg2, i); - if (TREE_CODE (val) != INTEGER_CST) return NULL_TREE; /* Make sure that the perm value is in an acceptable range. */ wide_int t = val; - if (wi::gtu_p (t, nelts_cnt)) + if (wi::gtu_p (t, mask)) { need_mask_canon = true; - sel[i] = t.to_uhwi () & (nelts_cnt - 1); + sel[i] = t.to_uhwi () & mask; } else sel[i] = t.to_uhwi ();
Re: [wide-int] Do not treat rtxes as sign-extended
Mike Stump writes: > On Nov 2, 2013, at 3:30 AM, Richard Sandiford > wrote: >> Bah. After all that effort, it turns out that -- by design -- >> there is one special case where CONST_INTs are not sign-extended. > > I'm thinking this needs commentary in wide-int.h, though, not sure what > we'd say... AIUI the only valid values of STORE_FLAG_VALUE are 1 and -1. If others are used, the rtx decompose routine would need to use the scratch array. So wide-int itself shouldn't need to care. It just means that a 1-precision rtx input can be sign or zero extended, just like for N-precision trees. And we indicate that by setting is_sign_extended to false. Thanks, Richard
[PATCH, i386]: Introduce Ts and Tv address constraints
Hello! These two constraints go together with address_no_seg_operand and vsib_address_operand operand predicates. 2013-11-02 Uros Bizjak * config/i386/constraints.md (Ts, Tv): New address constrains. * config/i386/i386.md (*lea, *_): Use Ts constraint for address_no_seg_operand. * config/i386/sse.md (*avx512pf_gatherpf_mask) (*avx512pf_gatherpf, *avx512pf_scatterpf_mask) (*avx512pf_scatterpf, *avx2_gathersi) (*avx2_gathersi_2, *avx2_gatherdi, *avx2_gatherdi_2) (*avx2_gatherdi_3, *avx2_gatherdi_4) (*avx512f_gathersi, *avx512f_gathersi_2) (*avx512f_gatherdi, *avx512f_gatherdi_2) (*avx512f_scattersi *avx512f_scatterdi): Use Tv constraint for vsib_address_operand. Bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN. Uros. Index: config/i386/constraints.md === --- config/i386/constraints.md (revision 204298) +++ config/i386/constraints.md (working copy) @@ -237,9 +237,19 @@ (match_operand 0 "x86_64_zext_immediate_operand")) ;; T prefix is used for different address constraints +;; v - VSIB address +;; s - address with no segment register ;; i - address with no index and no rip ;; b - address with no base and no rip +(define_address_constraint "Tv" + "VSIB address operand" + (match_operand 0 "vsib_address_operand")) + +(define_address_constraint "Ts" + "Address operand without segment register" + (match_operand 0 "address_no_seg_operand")) + (define_address_constraint "Ti" "MPX address operand without index" (match_operand 0 "address_mpx_no_index_operand")) Index: config/i386/i386.md === --- config/i386/i386.md (revision 204298) +++ config/i386/i386.md (working copy) @@ -5394,7 +5394,7 @@ (define_insn_and_split "*lea" [(set (match_operand:SWI48 0 "register_operand" "=r") - (match_operand:SWI48 1 "address_no_seg_operand" "p"))] + (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))] "" { if (SImode_address_operand (operands[1], VOIDmode)) @@ -18297,7 +18297,7 @@ (define_insn "*_" [(parallel [(unspec [(match_operand:BND 0 "register_operand" "B") - (match_operand: 1 "address_no_seg_operand" "p")] BNDCHECK) + (match_operand: 1 "address_no_seg_operand" "Ts")] BNDCHECK) (set (match_operand:BLK 2 "bnd_mem_operator") (unspec:BLK [(match_dup 2)] UNSPEC_MPX_FENCE))])] "TARGET_MPX" Index: config/i386/sse.md === --- config/i386/sse.md (revision 204298) +++ config/i386/sse.md (working copy) @@ -11462,7 +11462,7 @@ [(match_operand: 0 "register_operand" "k") (match_operator: 5 "vsib_mem_operator" [(unspec:P - [(match_operand:P 2 "vsib_address_operand" "p") + [(match_operand:P 2 "vsib_address_operand" "Tv") (match_operand:VI48_512 1 "register_operand" "v") (match_operand:SI 3 "const1248_operand" "n")] UNSPEC_VSIBADDR)]) @@ -11489,7 +11489,7 @@ [(const_int -1) (match_operator: 4 "vsib_mem_operator" [(unspec:P - [(match_operand:P 1 "vsib_address_operand" "p") + [(match_operand:P 1 "vsib_address_operand" "Tv") (match_operand:VI48_512 0 "register_operand" "v") (match_operand:SI 2 "const1248_operand" "n")] UNSPEC_VSIBADDR)]) @@ -11533,7 +11533,7 @@ [(match_operand: 0 "register_operand" "k") (match_operator: 5 "vsib_mem_operator" [(unspec:P - [(match_operand:P 2 "vsib_address_operand" "p") + [(match_operand:P 2 "vsib_address_operand" "Tv") (match_operand:VI48_512 1 "register_operand" "v") (match_operand:SI 3 "const1248_operand" "n")] UNSPEC_VSIBADDR)]) @@ -11560,7 +11560,7 @@ [(const_int -1) (match_operator: 4 "vsib_mem_operator" [(unspec:P - [(match_operand:P 1 "vsib_address_operand" "p") + [(match_operand:P 1 "vsib_address_operand" "Tv") (match_operand:VI48_512 0 "register_operand" "v") (match_operand:SI 2 "const1248_operand" "n")] UNSPEC_VSIBADDR)]) @@ -13650,7 +13650,7 @@ [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0") (match_operator: 7 "vsib_mem_operator" [(unspec:P - [(match_operand:P 3 "vsib_address_operand" "p") + [(match_operand:P 3 "vsib_address_operand" "Tv") (match_operand: 4 "register_operand" "x") (match_operand:SI 6 "const1248_operand" "n")] UNSPEC_VSIBADDR)]) @@ -13670,7 +13670,7 @@ [(pc) (match_operator: 6 "vsib_mem_operator" [(unspec:P - [(match_operand:P 2 "vsib_address_operand" "p") + [(match_operand:P 2 "vsi
[wide-int] Make shifted_mask more forgiving
r201806 added some extra checks to the handling of CONCAT when expanding assignments. This was to fix an ICE on gcc.dg/pr48335-2.c for ppc. I tripped over this because it also causes the assembly output for the test to change. I tried backing out the patch and the ICE was from the assert in wide-int.cc:shifted_mask: gcc_assert (start < 4 * MAX_BITSIZE_MODE_ANY_INT); However, in this case, the mask was being generated by mask_rtx, which specifically allows inputs that are out of range of the mode: /* Return a constant integer mask value of mode MODE with BITSIZE ones followed by BITPOS zeros, or the complement of that if COMPLEMENT. The mask is truncated if necessary to the width of mode MODE. The mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ So I don't think these inputs should trigger an ICE, even if in this particularly case they're conceptually wrong for some reason. Since wide-int.cc:shifted_mask already handles zero-width masks and cases where the end bit is out of range, I think we should handle the start being out of range too. It might be that 201806 is a good thing anyway (no idea either way), but I think it should go directly on mainline first if so. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes the pr48335-2.c assembly differences. OK to install? Thanks, Richard Index: gcc/expr.c === --- gcc/expr.c 2013-11-02 11:15:32.739837369 + +++ gcc/expr.c 2013-11-02 11:15:42.562911693 + @@ -4765,14 +4765,12 @@ expand_assignment (tree to, tree from, b && (bitpos == 0 || bitpos == mode_bitsize / 2)) result = store_expr (from, XEXP (to_rtx, bitpos != 0), false, nontemporal); - else if (bitpos + bitsize <= mode_bitsize / 2 - && bitpos+bitsize <= mode_bitsize) + else if (bitpos + bitsize <= mode_bitsize / 2) result = store_field (XEXP (to_rtx, 0), bitsize, bitpos, bitregion_start, bitregion_end, mode1, from, get_alias_set (to), nontemporal); - else if (bitpos >= mode_bitsize / 2 - && bitpos+bitsize <= mode_bitsize) + else if (bitpos >= mode_bitsize / 2) result = store_field (XEXP (to_rtx, 1), bitsize, bitpos - mode_bitsize / 2, bitregion_start, bitregion_end, @@ -4791,12 +4789,8 @@ expand_assignment (tree to, tree from, b } else { - HOST_WIDE_INT extra = 0; - if (bitpos+bitsize > mode_bitsize) -extra = bitpos+bitsize - mode_bitsize; rtx temp = assign_stack_temp (GET_MODE (to_rtx), - GET_MODE_SIZE (GET_MODE (to_rtx)) - + extra); + GET_MODE_SIZE (GET_MODE (to_rtx))); write_complex_part (temp, XEXP (to_rtx, 0), false); write_complex_part (temp, XEXP (to_rtx, 1), true); result = store_field (temp, bitsize, bitpos, Index: gcc/wide-int.cc === --- gcc/wide-int.cc 2013-11-02 11:15:32.740837376 + +++ gcc/wide-int.cc 2013-11-02 11:15:42.562911693 + @@ -748,18 +748,16 @@ unsigned int wi::shifted_mask (HOST_WIDE_INT *val, unsigned int start, unsigned int width, bool negate, unsigned int prec) { - gcc_assert (start < 4 * MAX_BITSIZE_MODE_ANY_INT); - - if (start + width > prec) -width = prec - start; - unsigned int end = start + width; - - if (width == 0) + if (start >= prec || width == 0) { val[0] = negate ? -1 : 0; return 1; } + if (width > prec - start) +width = prec - start; + unsigned int end = start + width; + unsigned int i = 0; while (i < start / HOST_BITS_PER_WIDE_INT) val[i++] = negate ? -1 : 0;
[gomp4] fix simd clones with no return value
The second testcase currently ICEs I guess during simd cloning, just wanted to make it clear that while simd clones without any arguments probably don't make any sense (other than const, but those really should be hoisted out of the loop much earlier), simd clones with no return value make sense. The problem here is that we are currently bailing out when there is no return value, thus keeping the return in the basic block which will be redirected elsewhere. Fixed with the attached patch and committed to branch. Note: I am purposely inhibiting warnings (-w) in the test to avoid failing with the aforementioned: a.c:2:6: warning: AVX vector argument without AVX enabled changes the ABI [enabled by default] We already have a test that fails because of this. No sense having another one. After we fix the ABI problem we can remove the -w. Aldy gcc/ChangeLog.gomp * omp-low.c (ipa_simd_modify_function_body): Handle empty returns. diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 7852723..d30fb17 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -11098,17 +11098,23 @@ ipa_simd_modify_function_body (struct cgraph_node *node, { case GIMPLE_RETURN: { - /* Replace `return foo' by `retval_array[iter] = foo'. */ tree old_retval = gimple_return_retval (stmt); - if (!old_retval) - break; - stmt = gimple_build_assign (build4 (ARRAY_REF, - TREE_TYPE (old_retval), - retval_array, iter, - NULL, NULL), - old_retval); - gsi_replace (&gsi, stmt, true); - modified = true; + if (old_retval) + { + /* Replace `return foo' by `retval_array[iter] = foo'. */ + stmt = gimple_build_assign (build4 (ARRAY_REF, + TREE_TYPE (old_retval), + retval_array, iter, + NULL, NULL), + old_retval); + gsi_replace (&gsi, stmt, true); + modified = true; + } + else + { + gsi_remove (&gsi, true); + continue; + } break; } diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-5.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-5.c new file mode 100644 index 000..801c24f --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-5.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-fopenmp -w" } */ + +/* ?? The -w above is to inhibit the following warning for now: + a.c:2:6: warning: AVX vector argument without AVX enabled changes + the ABI [enabled by default]. */ + +#pragma omp declare simd notinbranch simdlen(4) +void foo (int *a) +{ + *a = 555; +}
Re: [PATCH][ubsan] Add VLA bound instrumentation
On Fri, Nov 01, 2013 at 04:39:09PM -0400, Jason Merrill wrote: > On 11/01/2013 03:10 PM, Marek Polacek wrote: > >+ /* We need to stabilize side-effects in VLA sizes for regular array > >+ declarations too, not just pointers to arrays. */ > > This comment isn't really relevant to its new location. :) > > OK with that removed. Sure, thanks. Passed various testing, will install this soon. Marek
[gomp4] mangle linear step of 1 with just 'l'
Hi Jakub. Your patch mangling negative linear steps caused a regression in simd-clones-2.c. Well, it already had a failure, but now it has two :). The problem is that AFAIU, a linear step of 1, is mangled with just 'l', not 'l1'. I am not completely sure of this, and was hoping Balaji could clear this up, but on page 7 of the Intel vector ABI document, the example for: __declspec(vector(uniform(a), aligned(a:32), linear(k:1))) extern float setArray(float *a, float x, int k) ...is mangled as _ZGVxN4ua32vl_setArray, and in the subsequent explanatory paragraph, the document specifically says: “l” indicates linear(k:1) – k is a linear variable whose stride is 1. However, since the spec itself says nothing about a default linear stride 1, I don't know whether this is an oversight in the BNF grammar or a typo in the example. Balaji? If a linear step of 1 is mangled as 'l' as the example suggests, then I'd like to commit the following patch. Aldy gcc/ChangeLog.gomp * omp-low.c (simd_clone_mangle): Linear step of 1 is mangled as 'l'. diff --git a/gcc/omp-low.c b/gcc/omp-low.c index d30fb17..e895cca 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -10825,9 +10825,9 @@ simd_clone_mangle (struct cgraph_node *old_node, struct cgraph_node *new_node) { gcc_assert (arg.linear_step != 0); pp_character (&pp, 'l'); - if (arg.linear_step > 0) + if (arg.linear_step > 1) pp_unsigned_wide_integer (&pp, arg.linear_step); - else + else if (arg.linear_step < 0) { pp_character (&pp, 'n'); pp_unsigned_wide_integer (&pp, (-(unsigned HOST_WIDE_INT)
Re: [wide-int] Fix aarch{32,64} builds
I always like patches that change host dependencies into more general code. ok from my perspective. kenny On 11/02/2013 06:13 AM, Richard Sandiford wrote: I decided to lump these together since the problems were the same. There were some typos in the real_to_integer invocation, while changing: /* There must be no padding. */ if (!host_integerp (TYPE_SIZE (type), 1) || (tree_low_cst (TYPE_SIZE (type), 1) != count * GET_MODE_BITSIZE (*modep))) return -1; to: if (!tree_fits_uhwi_p (TYPE_SIZE (type)) || (tree_to_uhwi (TYPE_SIZE (type)) != count * GET_MODE_BITSIZE (*modep))) return -1; introduced a signed/unsigned warning. Tested on aarch64-linux-gnueabi & arm-linux-gnueabi and applied as obvious. Thanks, Richard Index: gcc/config/aarch64/aarch64.c === --- gcc/config/aarch64/aarch64.c(revision 204311) +++ gcc/config/aarch64/aarch64.c(working copy) @@ -6030,9 +6030,7 @@ - tree_to_uhwi (TYPE_MIN_VALUE (index))); /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || (tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -6060,9 +6058,7 @@ } /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || (tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -6092,9 +6088,7 @@ } /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || (tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -7433,7 +7427,7 @@ int exponent; unsigned HOST_WIDE_INT mantissa, mask; REAL_VALUE_TYPE r, m; - bool &fail + bool fail; if (!CONST_DOUBLE_P (x)) return false; @@ -7457,7 +7451,7 @@ WARNING: If we ever have a representation using more than 2 * H_W_I - 1 bits for the mantissa, this can fail (low bits will be lost). */ real_ldexp (&m, &r, point_pos - exponent); - w = real_to_integer (m, &fail, HOST_BITS_PER_WIDE_INT * 2); + wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2); /* If the low part of the mantissa has bits set we cannot represent the value. */ Index: gcc/config/arm/arm.c === --- gcc/config/arm/arm.c(revision 204311) +++ gcc/config/arm/arm.c(working copy) @@ -4678,9 +4678,7 @@ - tree_to_uhwi (TYPE_MIN_VALUE (index))); /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || (tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -4708,9 +4706,7 @@ } /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || (tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -4740,9 +4736,7 @@ } /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || (tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -11299,7 +11293,7 @@ WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1 bits for the mantissa, this may fail (low bits would be lost). */ real_ldexp (&m, &r, point_pos - exponent); - wide_int w = real_to_integer (m, &fail, HOST_BITS_PER_WIDE_INT * 2); + wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2); mantissa = w.elt (0); mant_hi = w.elt (1);
Re: [wide-int] Fix exact_log2 zext test
On 11/02/2013 06:19 AM, Richard Sandiford wrote: ...which I'd fluffed when doing the wi:: conversion. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixed a spurious gcc.dg difference on alpha-linux-gnu. Applied as obvious. Thanks, Richard Index: gcc/wide-int.cc === --- gcc/wide-int.cc (revision 204311) +++ gcc/wide-int.cc (working copy) @@ -2044,7 +2044,7 @@ /* Get a zero-extended form of block CRUX. */ unsigned HOST_WIDE_INT hwi = x.val[crux]; - if (crux * HOST_BITS_PER_WIDE_INT > x.precision) + if ((crux + 1) * HOST_BITS_PER_WIDE_INT > x.precision) hwi = zext_hwi (hwi, x.precision % HOST_BITS_PER_WIDE_INT); /* Now it's down to whether HWI is a power of 2. */ ok by me. kenny
Re: [wide-int] Do not treat rtxes as sign-extended
On 11/02/2013 06:30 AM, Richard Sandiford wrote: Bah. After all that effort, it turns out that -- by design -- there is one special case where CONST_INTs are not sign-extended. Nonzero/true BImode integers are stored as STORE_FLAG_VALUE, which can be 1 rather than -1. So (const_int 1) can be a valid BImode integer -- and consequently (const_int -1) can be wrong -- even though BImode only has 1 bit. It might be nice to change that, but for wide-int I think we should just treat rtxes like trees for now. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some ICEs seen on bfin-elf. OK to install? do we have to throw away the baby with the bath water here? I guess what you are saying is that it is worse to have is_sign_extended be a variable that is almost always true than to be a hard false. also we could preserve the test and make it not apply to bimode. kenny Thanks, Richard Index: gcc/rtl.h === --- gcc/rtl.h (revision 204311) +++ gcc/rtl.h (working copy) @@ -1408,7 +1408,9 @@ { static const enum precision_type precision_type = VAR_PRECISION; static const bool host_dependent_precision = false; -static const bool is_sign_extended = true; +/* This ought to be true, except for the special case that BImode + is canonicalized to STORE_FLAG_VALUE, which might be 1. */ +static const bool is_sign_extended = false; static unsigned int get_precision (const rtx_mode_t &); static wi::storage_ref decompose (HOST_WIDE_INT *, unsigned int, const rtx_mode_t &); @@ -1430,10 +1432,6 @@ switch (GET_CODE (x.first)) { case CONST_INT: - if (precision < HOST_BITS_PER_WIDE_INT) - gcc_checking_assert (INTVAL (x.first) -== sext_hwi (INTVAL (x.first), precision)); - return wi::storage_ref (&INTVAL (x.first), 1, precision); case CONST_WIDE_INT:
Re: [wide-int] Another tweak to convert_modes
On 11/02/2013 06:48 AM, Richard Sandiford wrote: It turns out that when doing a vector shift by 2, the optab routine passes (const_int 2) to convert_modes with oldmode set to the mode of the shift (e.g. something like V8HI). When the target mode is a real integer mode like SImode, mainline just ignores that oldmode and returns a (const_int 2) regardless, but wide-int doesn't. Saying that (const_int 2) has a vector mode is almost certainly a bug that ought to be trapped by an assert, but we're not trying to fight that battle here. The current code: if (CONST_SCALAR_INT_P (x) && GET_MODE_CLASS (mode) == MODE_INT && (oldmode == VOIDmode || GET_MODE_CLASS (oldmode) == MODE_INT)) is already coping with bogus oldmodes, just not in the way that other routines seem to expect. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. This fixed several testsuite changes on the ARM targets. OK to install? Thanks, Richard or we can fix it on trunk and pull the patch up.If you as an (former) rtl maintainer want to go in this direction, then fine. Index: gcc/expr.c === --- gcc/expr.c 2013-11-02 10:34:44.083635650 + +++ gcc/expr.c 2013-11-02 10:42:55.179233840 + @@ -712,13 +712,12 @@ convert_modes (enum machine_mode mode, e return x; if (CONST_SCALAR_INT_P (x) - && GET_MODE_CLASS (mode) == MODE_INT - && (oldmode == VOIDmode || GET_MODE_CLASS (oldmode) == MODE_INT)) + && GET_MODE_CLASS (mode) == MODE_INT) { /* If the caller did not tell us the old mode, then there is not much to do with respect to canonization. We have to assume that all the bits are significant. */ - if (oldmode == VOIDmode) + if (GET_MODE_CLASS (oldmode) != MODE_INT) oldmode = MAX_MODE_INT; wide_int w = wide_int::from (std::make_pair (x, oldmode), GET_MODE_PRECISION (mode),
Re: [wide-int] doloop fixes
On 11/02/2013 07:06 AM, Richard Sandiford wrote: The first part of this is a simple type mismatch -- get_max_loop_iterations returns a widest_int aka max_wide_int -- and I'd have installed it as obvious. The second part isn't as obvious though. The old code stored the maximum iterations as: if (!get_max_loop_iterations (loop, &iter) || !iter.fits_shwi ()) iterations_max = const0_rtx; else iterations_max = GEN_INT (iter.to_shwi ()); and the new code uses: if (!get_max_loop_iterations (loop, &iter) || !wi::fits_shwi_p (iter)) iterations_max = const0_rtx; else iterations_max = immed_wide_int_const (iter, mode); which includes an extra canonicalisation. I agree it would be good to do that in principle, but I'm not sure it copes correctly with the case where the loop iterates 1 << GET_MODE_PRECISION (mode) times. Plus I think the real fix would be to avoid the host dependence altogether, i.e. get rid of the fits_shwi_p too. As it stands, this breaks bfin-elf's pattern, which has: /* Due to limitations in the hardware (an initial loop count of 0 does not loop 2^32 times) we must avoid to generate a hardware loops when we cannot rule out this case. */ if (!flag_unsafe_loop_optimizations && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x) FAIL; With the sign-extending conversion, this now triggers more often than it was supposed to. Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too, and since we still use that form in the doloop_begin code, I think we should just back the immed_wide_int_const out. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some unwanted testsuite changes in bfin-elf. OK to install? I dislike this a lot. I think that this is a badly written pattern in the bfin port if it depends on the host size. As machines get bigger, (they may not be getting faster but they are still getting bigger) having traps like this at the portable level will bite us. in truth this code should really be iterations_max = immed_wide_int_const (iter, mode) with no tests at all. Thanks, Richard Index: gcc/loop-doloop.c === --- gcc/loop-doloop.c 2013-11-02 10:49:37.463178153 + +++ gcc/loop-doloop.c 2013-11-02 10:49:55.927314661 + @@ -549,7 +549,7 @@ doloop_modify (struct loop *loop, struct { rtx init; unsigned level = get_loop_level (loop) + 1; -wide_int iter; +widest_int iter; rtx iter_rtx; if (!get_max_loop_iterations (loop, &iter) @@ -673,7 +673,7 @@ doloop_optimize (struct loop *loop) || !wi::fits_shwi_p (iter)) iterations_max = const0_rtx; else -iterations_max = immed_wide_int_const (iter, mode); +iterations_max = GEN_INT (iter.to_shwi ()); level = get_loop_level (loop) + 1; /* Generate looping insn. If the pattern FAILs then give up trying
Re: [wide-int] Make shifted_mask more forgiving
this is fine with me. kenny On 11/02/2013 07:52 AM, Richard Sandiford wrote: r201806 added some extra checks to the handling of CONCAT when expanding assignments. This was to fix an ICE on gcc.dg/pr48335-2.c for ppc. I tripped over this because it also causes the assembly output for the test to change. I tried backing out the patch and the ICE was from the assert in wide-int.cc:shifted_mask: gcc_assert (start < 4 * MAX_BITSIZE_MODE_ANY_INT); However, in this case, the mask was being generated by mask_rtx, which specifically allows inputs that are out of range of the mode: /* Return a constant integer mask value of mode MODE with BITSIZE ones followed by BITPOS zeros, or the complement of that if COMPLEMENT. The mask is truncated if necessary to the width of mode MODE. The mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ So I don't think these inputs should trigger an ICE, even if in this particularly case they're conceptually wrong for some reason. Since wide-int.cc:shifted_mask already handles zero-width masks and cases where the end bit is out of range, I think we should handle the start being out of range too. It might be that 201806 is a good thing anyway (no idea either way), but I think it should go directly on mainline first if so. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes the pr48335-2.c assembly differences. OK to install? Thanks, Richard Index: gcc/expr.c === --- gcc/expr.c 2013-11-02 11:15:32.739837369 + +++ gcc/expr.c 2013-11-02 11:15:42.562911693 + @@ -4765,14 +4765,12 @@ expand_assignment (tree to, tree from, b && (bitpos == 0 || bitpos == mode_bitsize / 2)) result = store_expr (from, XEXP (to_rtx, bitpos != 0), false, nontemporal); - else if (bitpos + bitsize <= mode_bitsize / 2 - && bitpos+bitsize <= mode_bitsize) + else if (bitpos + bitsize <= mode_bitsize / 2) result = store_field (XEXP (to_rtx, 0), bitsize, bitpos, bitregion_start, bitregion_end, mode1, from, get_alias_set (to), nontemporal); - else if (bitpos >= mode_bitsize / 2 - && bitpos+bitsize <= mode_bitsize) + else if (bitpos >= mode_bitsize / 2) result = store_field (XEXP (to_rtx, 1), bitsize, bitpos - mode_bitsize / 2, bitregion_start, bitregion_end, @@ -4791,12 +4789,8 @@ expand_assignment (tree to, tree from, b } else { - HOST_WIDE_INT extra = 0; - if (bitpos+bitsize > mode_bitsize) -extra = bitpos+bitsize - mode_bitsize; rtx temp = assign_stack_temp (GET_MODE (to_rtx), - GET_MODE_SIZE (GET_MODE (to_rtx)) - + extra); + GET_MODE_SIZE (GET_MODE (to_rtx))); write_complex_part (temp, XEXP (to_rtx, 0), false); write_complex_part (temp, XEXP (to_rtx, 1), true); result = store_field (temp, bitsize, bitpos, Index: gcc/wide-int.cc === --- gcc/wide-int.cc 2013-11-02 11:15:32.740837376 + +++ gcc/wide-int.cc 2013-11-02 11:15:42.562911693 + @@ -748,18 +748,16 @@ unsigned int wi::shifted_mask (HOST_WIDE_INT *val, unsigned int start, unsigned int width, bool negate, unsigned int prec) { - gcc_assert (start < 4 * MAX_BITSIZE_MODE_ANY_INT); - - if (start + width > prec) -width = prec - start; - unsigned int end = start + width; - - if (width == 0) + if (start >= prec || width == 0) { val[0] = negate ? -1 : 0; return 1; } + if (width > prec - start) +width = prec - start; + unsigned int end = start + width; + unsigned int i = 0; while (i < start / HOST_BITS_PER_WIDE_INT) val[i++] = negate ? -1 : 0;
Re: [wide-int] Fix fold_ternary VEC_PERM_EXPR handling
On 11/02/2013 07:14 AM, Richard Sandiford wrote: The wide-int conversion for the fold_ternary VEC_PERM_EXPR case converted a mask of valid element numbers to an element count, which is one greater. The old code set need_mask_canon if an index was greater than the mask, but the new code sets it if an index is greater than the element count, giving an off-by-one error. This patch restores the mainline mask handling and uses that in the gtu_p call instead. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some unwanted testsuite differences for one of the ARM targets. OK to install? Thanks, Richard looks ok to me. Index: gcc/fold-const.c === --- gcc/fold-const.c2013-11-02 11:07:33.097149207 + +++ gcc/fold-const.c2013-11-02 11:07:38.107188425 + @@ -14360,7 +14360,7 @@ fold_ternary_loc (location_t loc, enum t case VEC_PERM_EXPR: if (TREE_CODE (arg2) == VECTOR_CST) { - unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i; + unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask; unsigned char *sel = XALLOCAVEC (unsigned char, nelts); bool need_mask_canon = false; bool all_in_vec0 = true; @@ -14368,23 +14368,22 @@ fold_ternary_loc (location_t loc, enum t bool maybe_identity = true; bool single_arg = (op0 == op1); bool changed = false; - int nelts_cnt = single_arg ? nelts : nelts * 2; + mask = single_arg ? (nelts - 1) : (2 * nelts - 1); gcc_assert (nelts == VECTOR_CST_NELTS (arg2)); for (i = 0; i < nelts; i++) { tree val = VECTOR_CST_ELT (arg2, i); - if (TREE_CODE (val) != INTEGER_CST) return NULL_TREE; /* Make sure that the perm value is in an acceptable range. */ wide_int t = val; - if (wi::gtu_p (t, nelts_cnt)) + if (wi::gtu_p (t, mask)) { need_mask_canon = true; - sel[i] = t.to_uhwi () & (nelts_cnt - 1); + sel[i] = t.to_uhwi () & mask; } else sel[i] = t.to_uhwi ();
Re: [wide-int] Do not treat rtxes as sign-extended
Kenneth Zadeck writes: > On 11/02/2013 06:30 AM, Richard Sandiford wrote: >> Bah. After all that effort, it turns out that -- by design -- >> there is one special case where CONST_INTs are not sign-extended. >> Nonzero/true BImode integers are stored as STORE_FLAG_VALUE, >> which can be 1 rather than -1. So (const_int 1) can be a valid >> BImode integer -- and consequently (const_int -1) can be wrong -- >> even though BImode only has 1 bit. >> >> It might be nice to change that, but for wide-int I think we should >> just treat rtxes like trees for now. >> >> Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some ICEs >> seen on bfin-elf. OK to install? > do we have to throw away the baby with the bath water here? I guess > what you are saying is that it is worse to have is_sign_extended be a > variable that is almost always true than to be a hard false. Right. is_sign_extended is only useful if it's a compile-time value. Making it a run-time value would negate the benefit. I think in practice STORE_FLAG_VALUE is a compile-time constant too, so we could set is_sign_extended to STORE_FLAG_VALUE == -1. But AFAICT that would only help SPU and m68k. > also we could preserve the test and make it not apply to bimode. You mean the one in the assert? Yeah, OK. How about this version? Richard Index: gcc/rtl.h === --- gcc/rtl.h 2013-11-02 11:06:12.738517644 + +++ gcc/rtl.h 2013-11-02 14:22:05.636007860 + @@ -1408,7 +1408,9 @@ typedef std::pair ::decompose ( case CONST_INT: if (precision < HOST_BITS_PER_WIDE_INT) gcc_checking_assert (INTVAL (x.first) -== sext_hwi (INTVAL (x.first), precision)); +== sext_hwi (INTVAL (x.first), precision) +|| (precision == 1 && INTVAL (x.first) == 1)); return wi::storage_ref (&INTVAL (x.first), 1, precision);
Re: [wide-int] doloop fixes
Kenneth Zadeck writes: > On 11/02/2013 07:06 AM, Richard Sandiford wrote: >> The first part of this is a simple type mismatch -- get_max_loop_iterations >> returns a widest_int aka max_wide_int -- and I'd have installed it as >> obvious. The second part isn't as obvious though. The old code stored >> the maximum iterations as: >> >>if (!get_max_loop_iterations (loop, &iter) >>|| !iter.fits_shwi ()) >> iterations_max = const0_rtx; >>else >> iterations_max = GEN_INT (iter.to_shwi ()); >> >> and the new code uses: >> >>if (!get_max_loop_iterations (loop, &iter) >>|| !wi::fits_shwi_p (iter)) >> iterations_max = const0_rtx; >>else >> iterations_max = immed_wide_int_const (iter, mode); >> >> which includes an extra canonicalisation. I agree it would be good to do >> that in principle, but I'm not sure it copes correctly with the case where >> the loop iterates 1 << GET_MODE_PRECISION (mode) times. Plus I think the >> real fix would be to avoid the host dependence altogether, i.e. get rid >> of the fits_shwi_p too. >> >> As it stands, this breaks bfin-elf's pattern, which has: >> >>/* Due to limitations in the hardware (an initial loop count of 0 >> does not loop 2^32 times) we must avoid to generate a hardware >> loops when we cannot rule out this case. */ >>if (!flag_unsafe_loop_optimizations >>&& (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x) >> FAIL; >> >> With the sign-extending conversion, this now triggers more often than >> it was supposed to. >> >> Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too, >> and since we still use that form in the doloop_begin code, I think we should >> just back the immed_wide_int_const out. >> >> Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some >> unwanted testsuite changes in bfin-elf. OK to install? > I dislike this a lot. I think that this is a badly written pattern > in the bfin port if it depends on the host size. As machines get > bigger, (they may not be getting faster but they are still getting > bigger) having traps like this at the portable level will bite us. But this isn't at the portable level, it's backend-specific code. It knows that it's dealing with a 32-bit counter. > in truth this code should really be iterations_max = > immed_wide_int_const (iter, mode) with no tests at all. But like I say, that won't cope with loops that iterate 1 << GET_MODE_PRECISION (mode) times, even if that fits in a signed HWI. (That case could happen if the hardware supports it and if the counter starts out as 0.) The mainline code is self-consistent in that the number of iterations is passed as a positive signed HWI, using 0 if the value isn't known or is greater than HOST_WIDE_INT_MAX. In the current interface, the CONST_INT doesn't really have a mode. It's just used as a convenient way of passing a HWI to an expander. I'm not saying that it's the ideal interface. But if we want to change it, let's do that separately, and test it on targets that have doloops. Until then, the wide-int branch can provide the current interface just as easily as mainline. Thanks, Richard
Re: [wide-int] Do not treat rtxes as sign-extended
On 11/02/2013 10:25 AM, Richard Sandiford wrote: Kenneth Zadeck writes: On 11/02/2013 06:30 AM, Richard Sandiford wrote: Bah. After all that effort, it turns out that -- by design -- there is one special case where CONST_INTs are not sign-extended. Nonzero/true BImode integers are stored as STORE_FLAG_VALUE, which can be 1 rather than -1. So (const_int 1) can be a valid BImode integer -- and consequently (const_int -1) can be wrong -- even though BImode only has 1 bit. It might be nice to change that, but for wide-int I think we should just treat rtxes like trees for now. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some ICEs seen on bfin-elf. OK to install? do we have to throw away the baby with the bath water here? I guess what you are saying is that it is worse to have is_sign_extended be a variable that is almost always true than to be a hard false. Right. is_sign_extended is only useful if it's a compile-time value. Making it a run-time value would negate the benefit. I think in practice STORE_FLAG_VALUE is a compile-time constant too, so we could set is_sign_extended to STORE_FLAG_VALUE == -1. But AFAICT that would only help SPU and m68k. also we could preserve the test and make it not apply to bimode. You mean the one in the assert? Yeah, OK. How about this version? Richard Index: gcc/rtl.h === --- gcc/rtl.h 2013-11-02 11:06:12.738517644 + +++ gcc/rtl.h 2013-11-02 14:22:05.636007860 + @@ -1408,7 +1408,9 @@ typedef std::pair ::decompose ( case CONST_INT: if (precision < HOST_BITS_PER_WIDE_INT) gcc_checking_assert (INTVAL (x.first) -== sext_hwi (INTVAL (x.first), precision)); +== sext_hwi (INTVAL (x.first), precision) +|| (precision == 1 && INTVAL (x.first) == 1)); return wi::storage_ref (&INTVAL (x.first), 1, precision); yes, this is fine. kenny
Re: [wide-int] doloop fixes
On 11/02/2013 10:47 AM, Richard Sandiford wrote: Kenneth Zadeck writes: On 11/02/2013 07:06 AM, Richard Sandiford wrote: The first part of this is a simple type mismatch -- get_max_loop_iterations returns a widest_int aka max_wide_int -- and I'd have installed it as obvious. The second part isn't as obvious though. The old code stored the maximum iterations as: if (!get_max_loop_iterations (loop, &iter) || !iter.fits_shwi ()) iterations_max = const0_rtx; else iterations_max = GEN_INT (iter.to_shwi ()); and the new code uses: if (!get_max_loop_iterations (loop, &iter) || !wi::fits_shwi_p (iter)) iterations_max = const0_rtx; else iterations_max = immed_wide_int_const (iter, mode); which includes an extra canonicalisation. I agree it would be good to do that in principle, but I'm not sure it copes correctly with the case where the loop iterates 1 << GET_MODE_PRECISION (mode) times. Plus I think the real fix would be to avoid the host dependence altogether, i.e. get rid of the fits_shwi_p too. As it stands, this breaks bfin-elf's pattern, which has: /* Due to limitations in the hardware (an initial loop count of 0 does not loop 2^32 times) we must avoid to generate a hardware loops when we cannot rule out this case. */ if (!flag_unsafe_loop_optimizations && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x) FAIL; With the sign-extending conversion, this now triggers more often than it was supposed to. Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too, and since we still use that form in the doloop_begin code, I think we should just back the immed_wide_int_const out. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some unwanted testsuite changes in bfin-elf. OK to install? I dislike this a lot. I think that this is a badly written pattern in the bfin port if it depends on the host size. As machines get bigger, (they may not be getting faster but they are still getting bigger) having traps like this at the portable level will bite us. But this isn't at the portable level, it's backend-specific code. It knows that it's dealing with a 32-bit counter. yes, but the patch is dumbing down the portable part of the compiler. in truth this code should really be iterations_max = immed_wide_int_const (iter, mode) with no tests at all. But like I say, that won't cope with loops that iterate 1 << GET_MODE_PRECISION (mode) times, even if that fits in a signed HWI. (That case could happen if the hardware supports it and if the counter starts out as 0.) The mainline code is self-consistent in that the number of iterations is passed as a positive signed HWI, using 0 if the value isn't known or is greater than HOST_WIDE_INT_MAX. In the current interface, the CONST_INT doesn't really have a mode. It's just used as a convenient way of passing a HWI to an expander. I'm not saying that it's the ideal interface. But if we want to change it, let's do that separately, and test it on targets that have doloops. Until then, the wide-int branch can provide the current interface just as easily as mainline. we do not have a predicate at the rtl level like we do at the tree level where we can ask if the value can be represented in a mode without loosing any information. But the proper thing to do here is to check to see if iter fits in the mode, if it does, then generate the immed_wide_int_const and if not generate the 0. In this way it does the right thing for loops that iterate 1
Re: [PATCH, rs6000] Fix rs6000_expand_vector_set for little endian
On Thu, Oct 31, 2013 at 9:45 PM, Bill Schmidt wrote: > Hi, > > Brooks Moses reported a bug with code that sets a single element of a > vector to a given value and the rest of the vector to zero. This is > implemented in rs6000_expand_vector_set, which uses a vperm instruction > to place the nonzero value. As usual, we need to adjust the permute > control vector and swap the order of the input operands. I added a test > case based on the bug report. > > Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no > regressions. The new test now passes for both endiannesses. Is this ok > for trunk? > > Thanks, > Bill > > > gcc: > > 2013-10-31 Bill Schmidt > > * config/rs6000/rs6000.c (rs6000_expand_vector_set): Adjust for > little endian. This is okay. > + if (!BYTES_BIG_ENDIAN) But negating the expression and reversing the branches seems superfluous. Thanks, David
Re: [wide-int] doloop fixes
Kenneth Zadeck writes: > On 11/02/2013 10:47 AM, Richard Sandiford wrote: >> Kenneth Zadeck writes: >>> On 11/02/2013 07:06 AM, Richard Sandiford wrote: The first part of this is a simple type mismatch -- get_max_loop_iterations returns a widest_int aka max_wide_int -- and I'd have installed it as obvious. The second part isn't as obvious though. The old code stored the maximum iterations as: if (!get_max_loop_iterations (loop, &iter) || !iter.fits_shwi ()) iterations_max = const0_rtx; else iterations_max = GEN_INT (iter.to_shwi ()); and the new code uses: if (!get_max_loop_iterations (loop, &iter) || !wi::fits_shwi_p (iter)) iterations_max = const0_rtx; else iterations_max = immed_wide_int_const (iter, mode); which includes an extra canonicalisation. I agree it would be good to do that in principle, but I'm not sure it copes correctly with the case where the loop iterates 1 << GET_MODE_PRECISION (mode) times. Plus I think the real fix would be to avoid the host dependence altogether, i.e. get rid of the fits_shwi_p too. As it stands, this breaks bfin-elf's pattern, which has: /* Due to limitations in the hardware (an initial loop count of 0 does not loop 2^32 times) we must avoid to generate a hardware loops when we cannot rule out this case. */ if (!flag_unsafe_loop_optimizations && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x) FAIL; With the sign-extending conversion, this now triggers more often than it was supposed to. Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too, and since we still use that form in the doloop_begin code, I think we should just back the immed_wide_int_const out. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some unwanted testsuite changes in bfin-elf. OK to install? >>> I dislike this a lot. I think that this is a badly written pattern >>> in the bfin port if it depends on the host size. As machines get >>> bigger, (they may not be getting faster but they are still getting >>> bigger) having traps like this at the portable level will bite us. >> But this isn't at the portable level, it's backend-specific code. >> It knows that it's dealing with a 32-bit counter. > yes, but the patch is dumbing down the portable part of the compiler. >> >>> in truth this code should really be iterations_max = >>> immed_wide_int_const (iter, mode) with no tests at all. >> But like I say, that won't cope with loops that iterate >> 1 << GET_MODE_PRECISION (mode) times, even if that fits >> in a signed HWI. (That case could happen if the hardware >> supports it and if the counter starts out as 0.) >> >> The mainline code is self-consistent in that the number of >> iterations is passed as a positive signed HWI, using 0 if the >> value isn't known or is greater than HOST_WIDE_INT_MAX. >> In the current interface, the CONST_INT doesn't really have a mode. >> It's just used as a convenient way of passing a HWI to an expander. >> >> I'm not saying that it's the ideal interface. But if we want to change it, >> let's do that separately, and test it on targets that have doloops. >> Until then, the wide-int branch can provide the current interface >> just as easily as mainline. > we do not have a predicate at the rtl level like we do at the tree level > where we can ask if the value can be represented in a mode without > loosing any information. But the proper thing to do here is to check > to see if iter fits in the mode, if it does, then generate the > immed_wide_int_const and if not generate the 0. In this way it does > the right thing for loops that iterate 1< i.e. it will produce a 0. And that will be correct even if the mode is > SI or TI. Well, like I say, I don't see the current rtx has having a mode, even conceptually. The CONST_INT is just a convenient way of passing an iteration count to a function that only accepts rtxes as arguments. If we could, we'd be passing the widest_int directly as a pointer or reference. But since I'm just repeating what I said above, I should probably bow out of this one. Note that the current wide-int code doesn't allow any more cases than the current mainline code, it just changes their values (and so breaks the ports that relied on the old values). So I think we both agree that the current wide-int code needs to change in some way. And I'd prefer to change it to follow the existing interface, rather than (a) combining a subtle interface change with a big patch and (b) potentially putting the merge back unnecessarily. If you and Mike want to change the interface on wide-int first, and if the maintainers agree that's OK, then I won't object. But you'll need to test it on port
Re: [gomp4] mangle linear step of 1 with just 'l'
On Sat, Nov 02, 2013 at 08:25:28AM -0500, Aldy Hernandez wrote: > Your patch mangling negative linear steps caused a regression in > simd-clones-2.c. Well, it already had a failure, but now it has two > :). > > The problem is that AFAIU, a linear step of 1, is mangled with just > 'l', not 'l1'. > > I am not completely sure of this, and was hoping Balaji could clear > this up, but on page 7 of the Intel vector ABI document, the example > for: > > __declspec(vector(uniform(a), aligned(a:32), linear(k:1))) > extern float setArray(float *a, float x, int k) > > ...is mangled as _ZGVxN4ua32vl_setArray, and in the subsequent > explanatory paragraph, the document specifically says: > > “l” indicates linear(k:1) – k is a linear variable whose stride is 1. > > However, since the spec itself says nothing about a default linear > stride 1, I don't know whether this is an oversight in the BNF > grammar or a typo in the example. Balaji? Ah, I was reading just the grammar and it didn't look like the number was optional. BTW, as I said on IRC yesterday, for say: #pragma omp declare simd simdlen(8) notinbranch int foo (int a, int b); in the 'x' ISA (i.e. SSE2), we are supposed to pass it as typedef int V __attribute__((vector_size (16))); foo (V a.1, V a.2, V b.1, V b.2) which probably isn't that hard for arguments, but are supposed to return also the return value in two V registers (%xmm0/%xmm1). We can't pass that as vector(8) int, so perhaps pretend the return value is ARRAY_TYPE of 2 vector(4) ints and have some special code in the backend to pass that return value in two xmm registers. Similarly for 4 and 8. Jakub
Re: [Patch, fortran] PR57445 - [4.8/4.9 Regression] ICE in gfc_conv_class_to_class - for OPTIONAL polymorphic array
On Fri, Nov 01, 2013 at 04:56:36PM +0100, Paul Richard Thomas wrote: > > This one is trivial. The ICE was caused by an assert that turns out > to be spurious and has been removed. > > Bootstrapped and regtested on FC17/x86_64 - OK for trunk and 4.8? > OK. -- Steve
Re: libsanitizer merge from upstream r191666
On Fri, Nov 01, 2013 at 04:13:05PM -0700, Konstantin Serebryany wrote: > Jukub, > > This works! > New patch attached. Konstantin, This patch, when applied on top of r204305, bootstraps fine on x86_64-apple-darwin12 but produces a lot of regressions with... make -k check RUNTESTFLAGS="asan.exp --target_board=unix'{-m64}'" Native configuration is x86_64-apple-darwin12.5.0 === g++ tests === Running target unix/-m64 FAIL: c-c++-common/asan/global-overflow-1.c -O0 execution test FAIL: c-c++-common/asan/global-overflow-1.c -O1 execution test FAIL: c-c++-common/asan/global-overflow-1.c -O2 execution test FAIL: c-c++-common/asan/global-overflow-1.c -O3 -fomit-frame-pointer execution test FAIL: c-c++-common/asan/global-overflow-1.c -O3 -g execution test FAIL: c-c++-common/asan/global-overflow-1.c -Os execution test FAIL: c-c++-common/asan/global-overflow-1.c -O2 -flto -flto-partition=none execution test FAIL: c-c++-common/asan/global-overflow-1.c -O2 -flto execution test FAIL: c-c++-common/asan/heap-overflow-1.c -O0 execution test FAIL: c-c++-common/asan/heap-overflow-1.c -O1 execution test FAIL: c-c++-common/asan/heap-overflow-1.c -O2 execution test FAIL: c-c++-common/asan/heap-overflow-1.c -O3 -fomit-frame-pointer execution test FAIL: c-c++-common/asan/heap-overflow-1.c -O3 -g execution test FAIL: c-c++-common/asan/heap-overflow-1.c -Os execution test FAIL: c-c++-common/asan/heap-overflow-1.c -O2 -flto -flto-partition=none execution test FAIL: c-c++-common/asan/heap-overflow-1.c -O2 -flto execution test FAIL: c-c++-common/asan/memcmp-1.c -O0 execution test FAIL: c-c++-common/asan/memcmp-1.c -O1 execution test FAIL: c-c++-common/asan/memcmp-1.c -O2 execution test FAIL: c-c++-common/asan/memcmp-1.c -O3 -fomit-frame-pointer execution test FAIL: c-c++-common/asan/memcmp-1.c -O3 -g execution test FAIL: c-c++-common/asan/memcmp-1.c -Os execution test FAIL: c-c++-common/asan/memcmp-1.c -O2 -flto -flto-partition=none execution test FAIL: c-c++-common/asan/memcmp-1.c -O2 -flto execution test FAIL: c-c++-common/asan/null-deref-1.c -O0 execution test FAIL: c-c++-common/asan/null-deref-1.c -O1 execution test FAIL: c-c++-common/asan/null-deref-1.c -O2 execution test FAIL: c-c++-common/asan/null-deref-1.c -O3 -fomit-frame-pointer execution test FAIL: c-c++-common/asan/null-deref-1.c -O3 -g execution test FAIL: c-c++-common/asan/null-deref-1.c -Os execution test FAIL: c-c++-common/asan/null-deref-1.c -O2 -flto -flto-partition=none execution test FAIL: c-c++-common/asan/null-deref-1.c -O2 -flto execution test FAIL: c-c++-common/asan/sanity-check-pure-c-1.c -O0 execution test FAIL: c-c++-common/asan/sanity-check-pure-c-1.c -O1 execution test FAIL: c-c++-common/asan/sanity-check-pure-c-1.c -O2 execution test FAIL: c-c++-common/asan/sanity-check-pure-c-1.c -O3 -fomit-frame-pointer execution test FAIL: c-c++-common/asan/sanity-check-pure-c-1.c -O3 -g execution test FAIL: c-c++-common/asan/sanity-check-pure-c-1.c -Os execution test FAIL: c-c++-common/asan/sanity-check-pure-c-1.c -O2 -flto -flto-partition=none execution test FAIL: c-c++-common/asan/sanity-check-pure-c-1.c -O2 -flto execution test FAIL: c-c++-common/asan/sleep-before-dying-1.c -O2 execution test FAIL: c-c++-common/asan/sleep-before-dying-1.c -O2 -flto -flto-partition=none execution test FAIL: c-c++-common/asan/sleep-before-dying-1.c -O2 -flto execution test FAIL: c-c++-common/asan/stack-overflow-1.c -O0 execution test FAIL: c-c++-common/asan/stack-overflow-1.c -O1 execution test FAIL: c-c++-common/asan/stack-overflow-1.c -O2 execution test FAIL: c-c++-common/asan/stack-overflow-1.c -O3 -fomit-frame-pointer execution test FAIL: c-c++-common/asan/stack-overflow-1.c -O3 -g execution test FAIL: c-c++-common/asan/stack-overflow-1.c -Os execution test FAIL: c-c++-common/asan/stack-overflow-1.c -O2 -flto -flto-partition=none execution test FAIL: c-c++-common/asan/stack-overflow-1.c -O2 -flto execution test FAIL: c-c++-common/asan/strip-path-prefix-1.c -O2 execution test FAIL: c-c++-common/asan/strip-path-prefix-1.c -O2 -flto -flto-partition=none execution test FAIL: c-c++-common/asan/strip-path-prefix-1.c -O2 -flto execution test FAIL: c-c++-common/asan/strncpy-overflow-1.c -O0 execution test FAIL: c-c++-common/asan/strncpy-overflow-1.c -O1 execution test FAIL: c-c++-common/asan/strncpy-overflow-1.c -O2 execution test FAIL: c-c++-common/asan/strncpy-overflow-1.c -O3 -fomit-frame-pointer execution test FAIL: c-c++-common/asan/strncpy-overflow-1.c -O3 -g execution test FAIL: c-c++-common/asan/strncpy-overflow-1.c -Os execution test FAIL: c-c++-common/asan/strncpy-overflow-1.c -O2 -flto -flto-partition=none execution test FAIL: c-c++-common/asan/strncpy-overflow-1.c -O2 -flto execution test FAIL: c-c++-common/asan/use-after-free-1.c -O0 execution test FAIL: c-c++-common/asan/use-after-free-1.c -O1 execution test
Re: [wide-int] doloop fixes
On 11/02/2013 11:31 AM, Richard Sandiford wrote: Kenneth Zadeck writes: On 11/02/2013 10:47 AM, Richard Sandiford wrote: Kenneth Zadeck writes: On 11/02/2013 07:06 AM, Richard Sandiford wrote: The first part of this is a simple type mismatch -- get_max_loop_iterations returns a widest_int aka max_wide_int -- and I'd have installed it as obvious. The second part isn't as obvious though. The old code stored the maximum iterations as: if (!get_max_loop_iterations (loop, &iter) || !iter.fits_shwi ()) iterations_max = const0_rtx; else iterations_max = GEN_INT (iter.to_shwi ()); and the new code uses: if (!get_max_loop_iterations (loop, &iter) || !wi::fits_shwi_p (iter)) iterations_max = const0_rtx; else iterations_max = immed_wide_int_const (iter, mode); which includes an extra canonicalisation. I agree it would be good to do that in principle, but I'm not sure it copes correctly with the case where the loop iterates 1 << GET_MODE_PRECISION (mode) times. Plus I think the real fix would be to avoid the host dependence altogether, i.e. get rid of the fits_shwi_p too. As it stands, this breaks bfin-elf's pattern, which has: /* Due to limitations in the hardware (an initial loop count of 0 does not loop 2^32 times) we must avoid to generate a hardware loops when we cannot rule out this case. */ if (!flag_unsafe_loop_optimizations && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x) FAIL; With the sign-extending conversion, this now triggers more often than it was supposed to. Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too, and since we still use that form in the doloop_begin code, I think we should just back the immed_wide_int_const out. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some unwanted testsuite changes in bfin-elf. OK to install? I dislike this a lot. I think that this is a badly written pattern in the bfin port if it depends on the host size. As machines get bigger, (they may not be getting faster but they are still getting bigger) having traps like this at the portable level will bite us. But this isn't at the portable level, it's backend-specific code. It knows that it's dealing with a 32-bit counter. yes, but the patch is dumbing down the portable part of the compiler. in truth this code should really be iterations_max = immed_wide_int_const (iter, mode) with no tests at all. But like I say, that won't cope with loops that iterate 1 << GET_MODE_PRECISION (mode) times, even if that fits in a signed HWI. (That case could happen if the hardware supports it and if the counter starts out as 0.) The mainline code is self-consistent in that the number of iterations is passed as a positive signed HWI, using 0 if the value isn't known or is greater than HOST_WIDE_INT_MAX. In the current interface, the CONST_INT doesn't really have a mode. It's just used as a convenient way of passing a HWI to an expander. I'm not saying that it's the ideal interface. But if we want to change it, let's do that separately, and test it on targets that have doloops. Until then, the wide-int branch can provide the current interface just as easily as mainline. we do not have a predicate at the rtl level like we do at the tree level where we can ask if the value can be represented in a mode without loosing any information. But the proper thing to do here is to check to see if iter fits in the mode, if it does, then generate the immed_wide_int_const and if not generate the 0. In this way it does the right thing for loops that iterate 1< Well, like I say, I don't see the current rtx has having a mode, even conceptually. The CONST_INT is just a convenient way of passing an iteration count to a function that only accepts rtxes as arguments. If we could, we'd be passing the widest_int directly as a pointer or reference. But since I'm just repeating what I said above, I should probably bow out of this one. you point seems to be that this is a place where the "assumption" that rtxes are signed bites us. if mode happens to be SImode but the variable was unsigned and large, then we will sign extend this and get a pessimistic answer for this pattern.on the other hand, if we do what you suggest, then we are forever limited by the host precision. I see your point, but i do not know what to do. kenny Note that the current wide-int code doesn't allow any more cases than the current mainline code, it just changes their values (and so breaks the ports that relied on the old values). So I think we both agree that the current wide-int code needs to change in some way. And I'd prefer to change it to follow the existing interface, rather than (a) combining a subtle interface change with a big patch and (b) potentially putting the merge back unnecessarily. If you and Mike want to change the interface on wide-int first, and if the main
Re: [wide-int] doloop fixes
Kenneth Zadeck writes: > On 11/02/2013 11:31 AM, Richard Sandiford wrote: >> Kenneth Zadeck writes: >>> On 11/02/2013 10:47 AM, Richard Sandiford wrote: Kenneth Zadeck writes: > On 11/02/2013 07:06 AM, Richard Sandiford wrote: >> The first part of this is a simple type mismatch -- >> get_max_loop_iterations >> returns a widest_int aka max_wide_int -- and I'd have installed it as >> obvious. The second part isn't as obvious though. The old code stored >> the maximum iterations as: >> >> if (!get_max_loop_iterations (loop, &iter) >> || !iter.fits_shwi ()) >>iterations_max = const0_rtx; >> else >>iterations_max = GEN_INT (iter.to_shwi ()); >> >> and the new code uses: >> >> if (!get_max_loop_iterations (loop, &iter) >> || !wi::fits_shwi_p (iter)) >>iterations_max = const0_rtx; >> else >>iterations_max = immed_wide_int_const (iter, mode); >> >> which includes an extra canonicalisation. I agree it would be good to do >> that in principle, but I'm not sure it copes correctly with the case >> where >> the loop iterates 1 << GET_MODE_PRECISION (mode) times. Plus I think the >> real fix would be to avoid the host dependence altogether, i.e. get rid >> of the fits_shwi_p too. >> >> As it stands, this breaks bfin-elf's pattern, which has: >> >> /* Due to limitations in the hardware (an initial loop count of 0 >> does not loop 2^32 times) we must avoid to generate a hardware >> loops when we cannot rule out this case. */ >> if (!flag_unsafe_loop_optimizations >> && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x) >>FAIL; >> >> With the sign-extending conversion, this now triggers more often than >> it was supposed to. >> >> Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int >> too, >> and since we still use that form in the doloop_begin code, I think we >> should >> just back the immed_wide_int_const out. >> >> Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some >> unwanted testsuite changes in bfin-elf. OK to install? > I dislike this a lot. I think that this is a badly written pattern > in the bfin port if it depends on the host size. As machines get > bigger, (they may not be getting faster but they are still getting > bigger) having traps like this at the portable level will bite us. But this isn't at the portable level, it's backend-specific code. It knows that it's dealing with a 32-bit counter. >>> yes, but the patch is dumbing down the portable part of the compiler. > in truth this code should really be iterations_max = > immed_wide_int_const (iter, mode) with no tests at all. But like I say, that won't cope with loops that iterate 1 << GET_MODE_PRECISION (mode) times, even if that fits in a signed HWI. (That case could happen if the hardware supports it and if the counter starts out as 0.) The mainline code is self-consistent in that the number of iterations is passed as a positive signed HWI, using 0 if the value isn't known or is greater than HOST_WIDE_INT_MAX. In the current interface, the CONST_INT doesn't really have a mode. It's just used as a convenient way of passing a HWI to an expander. I'm not saying that it's the ideal interface. But if we want to change it, let's do that separately, and test it on targets that have doloops. Until then, the wide-int branch can provide the current interface just as easily as mainline. >>> we do not have a predicate at the rtl level like we do at the tree level >>> where we can ask if the value can be represented in a mode without >>> loosing any information. But the proper thing to do here is to check >>> to see if iter fits in the mode, if it does, then generate the >>> immed_wide_int_const and if not generate the 0. In this way it does >>> the right thing for loops that iterate 1<>> i.e. it will produce a 0. And that will be correct even if the mode is >>> SI or TI. >> Well, like I say, I don't see the current rtx has having a mode, >> even conceptually. The CONST_INT is just a convenient way of passing >> an iteration count to a function that only accepts rtxes as arguments. >> If we could, we'd be passing the widest_int directly as a pointer or >> reference. >> >> But since I'm just repeating what I said above, I should probably bow >> out of this one. > you point seems to be that this is a place where the "assumption" that > rtxes are signed bites us. > if mode happens to be SImode but the variable was unsigned and large, > then we will sign extend this and get a pessimistic answer for this > pattern. Right. > on the other hand, if we do what you suggest, then we are > fo
Re: [Patch] Implementation of n3793
On 2013-11-01 21:46, Paolo Carlini wrote: Hi, Il giorno 01/nov/2013, alle ore 21:09, Jonathan Wakely ha scritto: Here's the final version of Luc's optional implementation that I'm committing, tested on x86_64-linux. Great. Just noticed a minor nit: the fallback __constexpr_addressof appears not to be inline. Paolo Can you expand? I think it's just as much inline as the other overload -- does it need to be different?
Re: [Patch] Implementation of n3793
>Can you expand? I think it's just as much inline as the other overload >-- does it need to be different? The other overload is constexpr thus it's implicitly inline. The fall back is very simple too and I think it should be declared inline, unless you analyzed the assembly and believe it normally boils down to more than, say, 5 instructions. Paolo
Re: [wide-int] doloop fixes
On 11/02/2013 01:51 PM, Richard Sandiford wrote: Kenneth Zadeck writes: On 11/02/2013 11:31 AM, Richard Sandiford wrote: Kenneth Zadeck writes: On 11/02/2013 10:47 AM, Richard Sandiford wrote: Kenneth Zadeck writes: On 11/02/2013 07:06 AM, Richard Sandiford wrote: The first part of this is a simple type mismatch -- get_max_loop_iterations returns a widest_int aka max_wide_int -- and I'd have installed it as obvious. The second part isn't as obvious though. The old code stored the maximum iterations as: if (!get_max_loop_iterations (loop, &iter) || !iter.fits_shwi ()) iterations_max = const0_rtx; else iterations_max = GEN_INT (iter.to_shwi ()); and the new code uses: if (!get_max_loop_iterations (loop, &iter) || !wi::fits_shwi_p (iter)) iterations_max = const0_rtx; else iterations_max = immed_wide_int_const (iter, mode); which includes an extra canonicalisation. I agree it would be good to do that in principle, but I'm not sure it copes correctly with the case where the loop iterates 1 << GET_MODE_PRECISION (mode) times. Plus I think the real fix would be to avoid the host dependence altogether, i.e. get rid of the fits_shwi_p too. As it stands, this breaks bfin-elf's pattern, which has: /* Due to limitations in the hardware (an initial loop count of 0 does not loop 2^32 times) we must avoid to generate a hardware loops when we cannot rule out this case. */ if (!flag_unsafe_loop_optimizations && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x) FAIL; With the sign-extending conversion, this now triggers more often than it was supposed to. Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too, and since we still use that form in the doloop_begin code, I think we should just back the immed_wide_int_const out. Tested on powerpc64-linux-gnu and x86_64-linux-gnu. It fixes some unwanted testsuite changes in bfin-elf. OK to install? I dislike this a lot. I think that this is a badly written pattern in the bfin port if it depends on the host size. As machines get bigger, (they may not be getting faster but they are still getting bigger) having traps like this at the portable level will bite us. But this isn't at the portable level, it's backend-specific code. It knows that it's dealing with a 32-bit counter. yes, but the patch is dumbing down the portable part of the compiler. in truth this code should really be iterations_max = immed_wide_int_const (iter, mode) with no tests at all. But like I say, that won't cope with loops that iterate 1 << GET_MODE_PRECISION (mode) times, even if that fits in a signed HWI. (That case could happen if the hardware supports it and if the counter starts out as 0.) The mainline code is self-consistent in that the number of iterations is passed as a positive signed HWI, using 0 if the value isn't known or is greater than HOST_WIDE_INT_MAX. In the current interface, the CONST_INT doesn't really have a mode. It's just used as a convenient way of passing a HWI to an expander. I'm not saying that it's the ideal interface. But if we want to change it, let's do that separately, and test it on targets that have doloops. Until then, the wide-int branch can provide the current interface just as easily as mainline. we do not have a predicate at the rtl level like we do at the tree level where we can ask if the value can be represented in a mode without loosing any information. But the proper thing to do here is to check to see if iter fits in the mode, if it does, then generate the immed_wide_int_const and if not generate the 0. In this way it does the right thing for loops that iterate 1< Well, like I say, I don't see the current rtx has having a mode, even conceptually. The CONST_INT is just a convenient way of passing an iteration count to a function that only accepts rtxes as arguments. If we could, we'd be passing the widest_int directly as a pointer or reference. But since I'm just repeating what I said above, I should probably bow out of this one. you point seems to be that this is a place where the "assumption" that rtxes are signed bites us. if mode happens to be SImode but the variable was unsigned and large, then we will sign extend this and get a pessimistic answer for this pattern. Right. on the other hand, if we do what you suggest, then we are forever limited by the host precision. I see your point, but i do not know what to do. Note that what I'm suggesting is simply restoring the mainline "else" code, rather than something new. wide-int doesn't change which of the if-else arms are chosen, and if we keep the else arm as-is, existing code will be happy. "Forever" is a bit pessimistic (though probably true). We're only limited until somehow who cares about wide counters comes along and changes things. Or if you feel particularly motivated, you could do it after the merg
Re: [Patch] Implementation of n3793
On 2013-11-02 19:02, Paolo Carlini wrote: Can you expand? I think it's just as much inline as the other overload -- does it need to be different? The other overload is constexpr thus it's implicitly inline. The fall back is very simple too and I think it should be declared inline, unless you analyzed the assembly and believe it normally boils down to more than, say, 5 instructions. Paolo I see. It didn't occur to me to declare it inline, as I only ever use the keyword to satisfy ODR requirements. E.g. the non-member swap isn't declared inline either. For future reference, is there a rule of thumb in use?
Re: [Patch] Implementation of n3793
Hi >I see. It didn't occur to me to declare it inline, as I only ever use >the keyword to satisfy >ODR requirements. E.g. the non-member swap isn't declared inline >either. > >For future reference, is there a rule of thumb in use? In general we are very careful with code bloat, but free functions which just forward to other functions should be definiyely inline, otherwise typically at widely used optimization levels like -O2 users get suboptimal performance for no reason. But please feel free to experiment and report your findings on the mailing list! Paolo
LRA vs reload on powerpc: 2 extra FAILs that are actually improvements?
Hello, Today's powerpc64-linux gcc has 2 extra failures with -mlra vs. reload (i.e. svn unpatched). (I'm excluding guality failure differences here because there are too many of them that seem to fail at random after minimal changes anywhere in the compiler...). Test results are posted here: reload: http://gcc.gnu.org/ml/gcc-testresults/2013-11/msg00128.html lra: http://gcc.gnu.org/ml/gcc-testresults/2013-11/msg00129.html The new failures and total score is as follows (+=lra, -=reload): +FAIL: gcc.target/powerpc/pr53199.c scan-assembler-times stwbrx 6 +FAIL: gcc.target/powerpc/pr58330.c scan-assembler-not stwbrx === gcc Summary === -# of expected passes 97887 -# of unexpected failures 536 +# of expected passes 97903 +# of unexpected failures 538 # of unexpected successes 38 # of expected failures 244 -# of unsupported tests 1910 +# of unsupported tests 1892 The failure of pr53199.c is because of different instruction selection for bswap. Test case is reduced to just one function: /* { dg-options "-O2 -mcpu=power6 -mavoid-indexed-addresses" } */ long long reg_reverse (long long x) { return __builtin_bswap64 (x); } Reload left vs. LRA right: reg_reverse: reg_reverse: srdi 8,3,32 | addi 8,1,-16 rlwinm 7,3,8,0x | srdi 10,3,32 rlwinm 9,8,8,0x | addi 9,8,4 rlwimi 7,3,24,0,7| stwbrx 3,0,8 rlwimi 7,3,24,16,23 | stwbrx 10,0,9 rlwimi 9,8,24,0,7| ld 3,-16(1) rlwimi 9,8,24,16,23 < sldi 7,7,32 < or 7,7,9 < mr 3,7 < blrblr This same difference is responsible for the failure of pr58330.c which also uses __builtin_bswap64(). The difference in RTL for the test case is this (after reload vs. after LRA): - 11: {%7:DI=bswap(%3:DI);clobber %8:DI;clobber %9:DI;clobber %10:DI;} - 20: %3:DI=%7:DI + 20: %8:DI=%1:DI-0x10 + 21: %8:DI=%8:DI // stupid no-op move + 11: {[%8:DI]=bswap(%3:DI);clobber %9:DI;clobber %10:DI;clobber scratch;} + 19: %3:DI=[%1:DI-0x10] So LRA believes going through memory is better than using a register, even though obviously there are plenty registers available. What LRA does: Creating newreg=129 Removing SCRATCH in insn #11 (nop 2) Creating newreg=130 Removing SCRATCH in insn #11 (nop 3) Creating newreg=131 Removing SCRATCH in insn #11 (nop 4) // at this point the insn would be a bswapdi2_64bit: // 11: {%3:DI=bswap(%3:DI);clobber r129;clobber r130;clobber r131;} // cost calculation for the insn alternatives: 0 Early clobber: reject++ 1 Non-pseudo reload: reject+=2 1 Spill pseudo in memory: reject+=3 2 Scratch win: reject+=2 3 Scratch win: reject+=2 4 Scratch win: reject+=2 alt=0,overall=18,losers=1,rld_nregs=0 0 Non-pseudo reload: reject+=2 0 Spill pseudo in memory: reject+=3 0 Non input pseudo reload: reject++ 2 Scratch win: reject+=2 3 Scratch win: reject+=2 alt=1,overall=16,losers=1,rld_nregs=0 Staticly defined alt reject+=12 0 Early clobber: reject++ 2 Scratch win: reject+=2 3 Scratch win: reject+=2 4 Scratch win: reject+=2 0 Conflict early clobber reload: reject-- alt=2,overall=24,losers=1,rld_nregs=0 Choosing alt 1 in insn 11: (0) Z (1) r (2) &b (3) &r (4) X {*bswapdi2_64bit} Change to class BASE_REGS for r129 Change to class GENERAL_REGS for r130 Creating newreg=132 from oldreg=3, assigning class NO_REGS to r132 Change to class NO_REGS for r131 11: {r132:DI=bswap(%3:DI);clobber r129:DI;clobber r130:DI;clobber r131:DI;} REG_UNUSED r131:DI REG_UNUSED r130:DI REG_UNUSED r129:DI LRA selects alternative 1 (Z,r,&b,&r,X) which seems to be the right choice, from looking at the constraints. Reload selects alternative 2 which is slightly^2 discouraged: (??&r,r,&r,&r,&r). Is this an improvement or a regression? If it's an improvement then these two test cases should be adjusted :-) Ciao! Steven
Re: lto-plugin: mismatch between ld's architecture and GCC's configure --host
Hi! Ping. On Fri, 25 Oct 2013 09:23:24 +0200, I wrote: > Ping. To sum it up, with these patches applied, there are no changes for > a "regular" build (not using the new configure options). On the other > hand, configuring GCC as described, it is possible use the 32-bit x86 > linker for/with a x86_64 build, and get the very same GCC test results as > when using a x86_64 linker. > > > On Mon, 14 Oct 2013 12:28:11 +0200, I wrote: > > On Sat, 12 Oct 2013 12:20:19 +0200, I wrote: > > > This is a bit of a weird scenario -- but it is supposed to work fine in > > > my opinion (but doesn't). > > > > > > I have a GNU toolchain as 32-bit x86 GNU/Linux executables, configured to > > > to generate code for 32-bit x86 by default, and using -m64 for x86_64. > > > > > > This toolchain I'm using on a x86_64 system (which can execute 32-bit > > > executables) to build a *native* GCC, that is I'm using the 32-bit > > > toolchain to build a x86_64 GCC (configuring with CC='gcc -m64' CXX='g++ > > > -m64'). I intend to continue using the 32-bit toolchain's linker, which > > > also is a 32-bit executable (GNU ld). That one also defaults to x86 > > > code, but can handle the x86_64 case fine if passed -m elf_x86_64, which > > > GCC does. > > > > > > That the linker is a 32-bit executable is an implementation detail that > > > is not important generally: it's a separate process living in its own > > > address space. However it becomes relevant in the case of linker > > > plugins: the native x86_64 GCC that I'm building also builds a x86_64 > > > lto-plugin, which the 32-bit ld cannot load: > > > > > > $ gcc/xgcc -B[...] [...]/gcc.c-torture/execute/ieee/2320-1.c > > > [...] -flto [...] > > > [...]/ld: [...]/gcc/liblto_plugin.so: error loading plugin: > > > [...]/gcc/liblto_plugin.so: wrong ELF class: ELFCLASS64 > > > collect2: error: ld returned 1 exit status > > > > > > So, aside from building a 64-bit ld (which is the "lame" alternative), I > > > now need to teach GCC's build system that the lto-plugin may need special > > > configuration: CC='gcc -m32' -- and [...] its own build of libiberty, > > > too [...] > > > > > Instead of auto-detecting the linker's > > > architecture (and then, what to do with that information?), I intend to > > > make this a manual process (so, some new top-level configure > > > argument(s)). Adding yet another set of {...,CC,...}_FOR_[something] is > > > probably overkill -- I'll try to find something simpler. > > > > > > Any comments on this scenario? > > > > Here are the patches. Unless the new option is exercised, there are no > > effects on a native x86_64 GNU/Linux bootstrap build (the build trees' > > *.o files are identical, as are the test results). OK to commit? > > > Allow overriding the libiberty used for building the LTO plugin. > > > > lto-plugin/ > > * configure.ac (--with-libiberty): New configure option. > > * configure: Regenerate. > > * Makefile.am (libiberty, libiberty_pic): New variables. > > (liblto_plugin_la_LIBADD, liblto_plugin_la_LDFLAGS) > > (liblto_plugin_la_DEPENDENCIES): Use them. > > * Makefile.in: Regenerate. > > --- > > lto-plugin/Makefile.am | 20 +++- > > lto-plugin/Makefile.in | 22 -- > > lto-plugin/configure| 17 +++-- > > lto-plugin/configure.ac | 5 + > > 4 files changed, 43 insertions(+), 21 deletions(-) > > > > diff --git lto-plugin/Makefile.am lto-plugin/Makefile.am > > index b24015e..8b7bb54 100644 > > --- lto-plugin/Makefile.am > > +++ lto-plugin/Makefile.am > > @@ -15,17 +15,19 @@ libexecsub_LTLIBRARIES = liblto_plugin.la > > gcc_build_dir = ../$(host_subdir)/gcc > > in_gcc_libs = $(foreach lib, $(libexecsub_LTLIBRARIES), > > $(gcc_build_dir)/$(lib)) > > > > -# Can be removed when libiberty becomes a normal convenience library > > -Wc=-Wc, > > - > > liblto_plugin_la_SOURCES = lto-plugin.c > > +# Note that we intentionally override the bindir supplied by > > ACX_LT_HOST_FLAGS. > > +liblto_plugin_la_LDFLAGS = $(lt_host_flags) -module -bindir > > $(libexecsubdir) > > +# Can be simplified when libiberty becomes a normal convenience library. > > +libiberty=$(with_libiberty)/libiberty.a > > +libiberty_pic=$(with_libiberty)/pic/libiberty.a > > +Wc=-Wc, > > liblto_plugin_la_LIBADD = \ > > - $(if $(wildcard > > ../libiberty/pic/libiberty.a),$(Wc)../libiberty/pic/libiberty.a,) > > -# Note that we intentionally override the bindir supplied by > > ACX_LT_HOST_FLAGS > > -liblto_plugin_la_LDFLAGS = $(lt_host_flags) -module -bindir > > $(libexecsubdir) \ > > - $(if $(wildcard > > ../libiberty/pic/libiberty.a),,-Wc,../libiberty/libiberty.a) > > -liblto_plugin_la_DEPENDENCIES = $(if $(wildcard \ > > - ../libiberty/pic/libiberty.a),../libiberty/pic/libiberty.a,) > > + $(if $(wildcard $(libiberty_pic)),$(Wc)$(libiberty_pic),) > > +liblto_plugin_la_LDFLAGS += \ > > + $(if $(wildcard $(libiberty_pic)),,-Wc,$(libiberty)) > > +lib
[PATCH, rs6000] Re-permute source register for postreload splits of VSX LE stores
Hi, When I created the patch to split VSX loads and stores to add permutes so the register image was correctly little endian, I forgot to implement a known requirement. When a VSX store is split after reload, we must reuse the source register for the permute, which leaves it in a swapped state after the store. If the register remains live, this is invalid. After the store, we need to permute the register back to its original value. For each of the little endian VSX store patterns, I've replaced the define_insn_and_split with a define_insn and two define_splits, one for prior to reload and one for after reload. The post-reload split has the extra behavior. I don't know of a way to set the insn's length attribute conditionally, so I'm optimistically setting this to 8, though it will be 12 in the post-reload case. Is there any concern with that? Is there a way to make it fully accurate? Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no regressions. This fixes two failing test cases. Is this ok for trunk? Thanks! Bill 2013-11-02 Bill Schmidt * config/rs6000/vsx.md (*vsx_le_perm_store_ for VSX_D): Replace the define_insn_and_split with a define_insn and two define_splits, with the split after reload re-permuting the source register to its original value. (*vsx_le_perm_store_ for VSX_W): Likewise. (*vsx_le_perm_store_v8hi): Likewise. (*vsx_le_perm_store_v16qi): Likewise. Index: gcc/config/rs6000/vsx.md === --- gcc/config/rs6000/vsx.md(revision 204192) +++ gcc/config/rs6000/vsx.md(working copy) @@ -333,12 +333,18 @@ [(set_attr "type" "vecload") (set_attr "length" "8")]) -(define_insn_and_split "*vsx_le_perm_store_" +(define_insn "*vsx_le_perm_store_" [(set (match_operand:VSX_D 0 "memory_operand" "=Z") (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))] "!BYTES_BIG_ENDIAN && TARGET_VSX" "#" - "!BYTES_BIG_ENDIAN && TARGET_VSX" + [(set_attr "type" "vecstore") + (set_attr "length" "8")]) + +(define_split + [(set (match_operand:VSX_D 0 "memory_operand" "") +(match_operand:VSX_D 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed" [(set (match_dup 2) (vec_select: (match_dup 1) @@ -347,21 +353,43 @@ (vec_select: (match_dup 2) (parallel [(const_int 1) (const_int 0)])))] - " { operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) : operands[1]; -} - " - [(set_attr "type" "vecstore") - (set_attr "length" "8")]) +}) -(define_insn_and_split "*vsx_le_perm_store_" +;; The post-reload split requires that we re-permute the source +;; register in case it is still live. +(define_split + [(set (match_operand:VSX_D 0 "memory_operand" "") +(match_operand:VSX_D 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed" + [(set (match_dup 1) +(vec_select: + (match_dup 1) + (parallel [(const_int 1) (const_int 0)]))) + (set (match_dup 0) +(vec_select: + (match_dup 1) + (parallel [(const_int 1) (const_int 0)]))) + (set (match_dup 1) +(vec_select: + (match_dup 1) + (parallel [(const_int 1) (const_int 0)])))] + "") + +(define_insn "*vsx_le_perm_store_" [(set (match_operand:VSX_W 0 "memory_operand" "=Z") (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))] "!BYTES_BIG_ENDIAN && TARGET_VSX" "#" - "!BYTES_BIG_ENDIAN && TARGET_VSX" + [(set_attr "type" "vecstore") + (set_attr "length" "8")]) + +(define_split + [(set (match_operand:VSX_W 0 "memory_operand" "") +(match_operand:VSX_W 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed" [(set (match_dup 2) (vec_select: (match_dup 1) @@ -372,21 +400,46 @@ (match_dup 2) (parallel [(const_int 2) (const_int 3) (const_int 0) (const_int 1)])))] - " { operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) : operands[1]; -} - " - [(set_attr "type" "vecstore") - (set_attr "length" "8")]) +}) -(define_insn_and_split "*vsx_le_perm_store_v8hi" +;; The post-reload split requires that we re-permute the source +;; register in case it is still live. +(define_split + [(set (match_operand:VSX_W 0 "memory_operand" "") +(match_operand:VSX_W 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed" + [(set (match_dup 1) +(vec_select: + (match_dup 1) + (parallel [(const_int 2) (const_int 3) +(const_int 0) (const_int 1)]))) + (set (match_dup 0) +(vec_select: + (match_dup 1) + (parallel [(const_int 2) (const_int 3) +
Re: [Patch] Implementation of n3793
On 2013-11-02 23:31, Paolo Carlini wrote: In general we are very careful with code bloat, but free functions which just forward to other functions should be definiyely inline, otherwise typically at widely used optimization levels like -O2 users get suboptimal performance for no reason. But please feel free to experiment and report your findings on the mailing list! Paolo After review, the free functions that only ever forward to something else and are not implicitly inline appear to be that __constexpr_addressof overload and non-member swap. Here's a patch (against master) that marks them inline, all tests pass. I briefly and casually attempted to notice any effect, but to be honest in my smallish testcases GCC appeared to already see through everything. >From 0175e61f3295159fe7ac2fefec05b829ecd6bc4d Mon Sep 17 00:00:00 2001 From: Michael Brune Date: Sun, 3 Nov 2013 01:37:16 +0100 Subject: [PATCH] Marked some forwarding free functions as inline that weren't already. --- libstdc++-v3/include/experimental/optional | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/experimental/optional b/libstdc++-v3/include/experimental/optional index 5915892..06e0f29 100644 --- a/libstdc++-v3/include/experimental/optional +++ b/libstdc++-v3/include/experimental/optional @@ -157,7 +157,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ template::value, int>::type...> -_Tp* __constexpr_addressof(_Tp& __t) +inline _Tp* __constexpr_addressof(_Tp& __t) { return std::__addressof(__t); } /** @@ -790,7 +790,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // [X.Y.11] template -void +inline void swap(optional<_Tp>& __lhs, optional<_Tp>& __rhs) noexcept(noexcept(__lhs.swap(__rhs))) { __lhs.swap(__rhs); } -- 1.8.1.2