[C++ PATCH 2/3] Support implicit parameter packs.

2013-11-02 Thread Adam Butcher
* parser.c (convert_generic_types_to_packs): New function to transform
a range of implicitly introduced non-pack template parms to be parameter
packs.
(cp_parser_parameter_declaration_list): If a function parameter pack
contains generic types, convert them to packs prior to grokdeclarator.
---
 gcc/cp/parser.c | 81 -
 1 file changed, 74 insertions(+), 7 deletions(-)

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index b699ac4..10b9f72 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -2109,6 +2109,8 @@ static tree cp_parser_late_parsing_omp_declare_simd
 
 static tree synthesize_implicit_template_parm
   (cp_parser *);
+static tree convert_generic_types_to_packs
+  (tree, tree, int, int);
 static tree finish_fully_implicit_template
   (cp_parser *, tree);
 
@@ -18069,7 +18071,7 @@ static tree
 cp_parser_parameter_declaration_list (cp_parser* parser, bool *is_error)
 {
   tree parameters = NULL_TREE;
-  tree *tail = ¶meters; 
+  tree *tail = ¶meters;
   bool saved_in_unbraced_linkage_specification_p;
   int index = 0;
 
@@ -18078,7 +18080,7 @@ cp_parser_parameter_declaration_list (cp_parser* 
parser, bool *is_error)
   /* The special considerations that apply to a function within an
  unbraced linkage specifications do not apply to the parameters
  to the function.  */
-  saved_in_unbraced_linkage_specification_p 
+  saved_in_unbraced_linkage_specification_p
 = parser->in_unbraced_linkage_specification_p;
   parser->in_unbraced_linkage_specification_p = false;
 
@@ -18088,6 +18090,10 @@ cp_parser_parameter_declaration_list (cp_parser* 
parser, bool *is_error)
   cp_parameter_declarator *parameter;
   tree decl = error_mark_node;
   bool parenthesized_p = false;
+  int template_parm_idx = (parser->num_template_parameter_lists?
+  TREE_VEC_LENGTH (INNERMOST_TEMPLATE_PARMS
+   (current_template_parms)) : 0);
+
   /* Parse the parameter.  */
   parameter
= cp_parser_parameter_declaration (parser,
@@ -18099,11 +18105,30 @@ cp_parser_parameter_declaration_list (cp_parser* 
parser, bool *is_error)
   deprecated_state = DEPRECATED_SUPPRESS;
 
   if (parameter)
-   decl = grokdeclarator (parameter->declarator,
-  ¶meter->decl_specifiers,
-  PARM,
-  parameter->default_argument != NULL_TREE,
-  ¶meter->decl_specifiers.attributes);
+   {
+ /* If a function parameter pack was specified and an implicit template
+parameter was introduced during cp_parser_parameter_declaration,
+change any implicit parameters introduced into packs.  */
+ if (parser->implicit_template_parms
+ && parameter->declarator
+ && parameter->declarator->parameter_pack_p)
+   {
+ int latest_template_parm_idx = TREE_VEC_LENGTH
+   (INNERMOST_TEMPLATE_PARMS (current_template_parms));
+
+ if (latest_template_parm_idx != template_parm_idx)
+   parameter->decl_specifiers.type = convert_generic_types_to_packs
+ (parameter->decl_specifiers.type,
+  current_template_parms,
+  template_parm_idx, latest_template_parm_idx);
+   }
+
+ decl = grokdeclarator (parameter->declarator,
+¶meter->decl_specifiers,
+PARM,
+parameter->default_argument != NULL_TREE,
+¶meter->decl_specifiers.attributes);
+   }
 
   deprecated_state = DEPRECATED_NORMAL;
 
@@ -31213,6 +31238,48 @@ synthesize_implicit_template_parm  (cp_parser *parser)
   return new_type;
 }
 
+/* Convert the generic type parameters in PARM that match the types given in 
the
+   range [START_IDX, END_IDX) from the template parameters CURRENT into generic
+   type packs.  */
+
+tree
+convert_generic_types_to_packs (tree parm,
+   tree current, int start_idx, int end_idx)
+{
+  int depth = TMPL_PARMS_DEPTH (current);
+  current = INNERMOST_TEMPLATE_PARMS (current);
+  tree replacement = make_tree_vec (TREE_VEC_LENGTH (current));
+
+  for (int i = start_idx; i < end_idx; ++i)
+{
+  /* Create a distinct parameter pack type from the current parm and add it
+to the replacement args to tsubst below into the generic function
+parameter.  */
+
+  tree t = copy_type (TREE_TYPE (TREE_VALUE (TREE_VEC_ELT (current, i;
+  TYPE_STUB_DECL (t) = TYPE_NAME (t) = TEMPLATE_TYPE_DECL (t);
+  TYPE_MAIN_VARIANT (t) = t;
+  TEMPLATE_TYPE_PARAMETER_PACK (t) = true;
+  SET_TYPE_STRUCTURAL_EQUALITY (t);
+  TREE_VEC_ELT (replacement, i) = t;
+}
+
+  if (depth > 1)
+{
+  /* Build up a tree vec of

Re: [SKETCH] Refactor implicit function template implementation and fix 58534, 58536, 58548, 58549 and 58637.

2013-11-02 Thread Adam Butcher
Hi Jason,

I've got the tsubst solution for implicit parameter packs working now.  I've
also improved the efficiency of incremental template parameter synthesis and
added some testcases.  All C++14 generic lambda examples pass and no new
regressions.

Cheers,
Adam

Patch summary (3):

  Refactor implicit function template implementation and fix 58534,
58536, 58548, 58549 and 58637.
  Support implicit parameter packs.
  Add some generic lambda test cases.

 gcc/cp/decl.c  |  30 +-
 gcc/cp/parser.c| 331 +++--
 gcc/cp/parser.h|  19 ++
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C   |  25 ++
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C|  42 +++
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C  |  10 +
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C|  51 
 .../g++.dg/cpp1y/lambda-generic-variadic.C |  15 +
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-x.C  |  25 ++
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-xcfun.C  |  25 ++
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-xudt.C   |   4 +
 gcc/testsuite/g++.dg/cpp1y/lambda-generic.C|  23 ++
 gcc/testsuite/g++.dg/cpp1y/pr58534.C   |   9 +
 gcc/testsuite/g++.dg/cpp1y/pr58536.C   |  12 +
 gcc/testsuite/g++.dg/cpp1y/pr58548.C   |  10 +
 gcc/testsuite/g++.dg/cpp1y/pr58549.C   |  10 +
 gcc/testsuite/g++.dg/cpp1y/pr58637.C   |   7 +
 gcc/tree.c |  22 ++
 gcc/tree.h |   5 +
 19 files changed, 561 insertions(+), 114 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-variadic.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-x.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-xcfun.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-xudt.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58534.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58536.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58548.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58549.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58637.C

-- 
1.8.4



[C++ PATCH 3/3] Add some generic lambda test cases.

2013-11-02 Thread Adam Butcher
gcc/testsuite/g++.dg/cpp1y/
 * lambda-generic.C: New test case.
 * lambda-generic-cfun.C: New test case.
 * lambda-generic-dep.C: New test case.
 * lambda-generic-udt.C: New test case.
 * lambda-generic-variadic.C: New test case.
 * lambda-generic-x.C: New test case.
 * lambda-generic-xcfun.C: New test case.
 * lambda-generic-xudt.C: New test case.
 * lambda-generic-mixed.C: New test case.
---
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C   | 25 +++
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C| 42 ++
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C  | 10 +
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C| 51 ++
 .../g++.dg/cpp1y/lambda-generic-variadic.C | 15 +++
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-x.C  | 25 +++
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-xcfun.C  | 25 +++
 gcc/testsuite/g++.dg/cpp1y/lambda-generic-xudt.C   |  4 ++
 gcc/testsuite/g++.dg/cpp1y/lambda-generic.C| 23 ++
 9 files changed, 220 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-variadic.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-x.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-xcfun.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-xudt.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic.C

diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C 
b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C
new file mode 100644
index 000..5e51526
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-cfun.C
@@ -0,0 +1,25 @@
+// Generic lambda conversion to function ptr test from N3690 5.1.2.6
+// { dg-options "-std=c++1y" }
+
+void f1(int (*)(int)) { }
+void f2(char (*)(int)) { }
+void g(int (*)(int)) { } // #1
+void g(char (*)(char)) { } // #2
+void h(int (*)(int)) { } // #3
+void h(char (*)(int)) { } // #4
+
+int main()
+{
+  auto glambda = [](auto a) { return a; };
+  int (*fp)(int) = glambda;
+  f1(glambda); // OK
+  f2(glambda); // { dg-error "invalid user-defined conversion" }
+  g(glambda); // { dg-error "ambiguous" }
+  h(glambda); // OK: calls #3 since it is convertible from ID
+  int& (*fpi)(int*) = [](auto* a) -> auto& { return *a; }; // OK
+
+  auto GL = [](auto a) { return a; };
+  int (*GL_int)(int) = GL; // OK: through conversion function template
+  GL_int(3);
+}
+
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C 
b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C
new file mode 100644
index 000..bb68738
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep.C
@@ -0,0 +1,42 @@
+// Generic lambda type dependence test part from N3690 5.1.2.12
+// { dg-options "-std=c++1y" }
+
+void f(int, const int (&)[2] = {}) { } // #1
+void f(const int&, const int (&)[1]) { } // #2
+
+void test()
+{
+  const int x = 17;
+  auto g = [](auto a) {
+f(x); // OK: calls #1, does not capture x
+  };
+  auto g2 = [=](auto a) {
+int selector[sizeof(a) == 1 ? 1 : 2]{};
+f(x, selector); // OK: is a dependent expression, so captures x
+  };
+}
+
+struct S {
+  struct N {
+auto test () { return 7.f; }
+  };
+};
+
+#include 
+
+int main()
+{
+  auto f = []  (T const& s) mutable {
+typename T::N x;
+return x.test ();
+  };
+  auto g = [] (auto const& s) {
+typename std::decay::type::N x;
+return x.test ();
+  };
+
+  S i;
+  f(i);
+  g(i);
+}
+
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C 
b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C
new file mode 100644
index 000..4e26fc5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-mixed.C
@@ -0,0 +1,10 @@
+// Mixed explicit and implicit generic lambda test.
+// { dg-options "-std=c++1y" }
+
+int main()
+{
+  auto f = []  (T a, auto b) { return a + b; };
+  auto g = []  (auto a, T b) { return a + b; };
+
+  return f (1.0, 3) + g (1.0, 3);
+}
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C 
b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C
new file mode 100644
index 000..9f6d45a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-udt.C
@@ -0,0 +1,51 @@
+// Ensure that generic lambdas properly construct and destroy user types.
+// { dg-options "-std=c++1y -DUSE_AUTO_SYNTAX" }
+// { dg-do run }
+
+int i = 3;
+
+struct S
+{
+  S () { ++i; }
+  S (S const&) { ++i; }
+  S (S&& old) { old.shadow = true; i += 2; }
+  ~S () { if (shadow) i -= 2; else --i; }
+
+  bool shadow = false;
+};
+
+extern "C" void printf(...);
+#define assert(e) if (e); else \
+

[C++ PATCH 1/3] Refactor implicit function template implementation and fix 58534, 58536, 58548, 58549 and 58637.

2013-11-02 Thread Adam Butcher
gcc/
* tree.c (grow_tree_vec_stat): New function ...
* tree.h (grow_tree_vec_stat) (grow_tree_vec): ... and its declaration
and macro front-end.

gcc/cp/
PR c++/58534
PR c++/58536
PR c++/58548
PR c++/58549
PR c++/58637
* parser.h (struct cp_parser): New members implicit_template_parms,
implicit_template_scope and auto_is_implicit_function_template_parm_p.
* parser.c (add_implicit_template_parms): Refactor as ...
(synthesize_implicit_template_parm): ... this to append a new template
type parm to the current template parameter list (introducing a new list
if necessary).
(cp_parser_new): Initialize new cp_parser members.
(cp_parser_parameter_declaration_clause): Consider auto as implicit
template parm when parsing a parameter declaration (unless paring an
explicit specialization).
(cp_parser_parameter_declaration_list): Remove local
implicit_template_parms counter and reset cp_parser implicit template
state when complete.
(cp_parser_lambda_expression): Reset implicit template cp_parser members
whilst generating lambda class.
(cp_parser_function_definition_after_declarator): Reset implicit
template cp_parser members whilst parsing function definition.
(make_generic_type_name): Respell '' as 'auto:N' which works
better with template diagnostics.
(cp_parser_simple_type_specifier): Synthesize implicit template parm on
parsing 'auto' if auto_is_implicit_function_template_parm_p and provide
diagnostics ...
* decl.c (grokdeclarator): ... that were previously done here.

gcc/testsuite/g++.dg/
* cpp1y/pr58534.C: New testcase.
* cpp1y/pr58536.C: New testcase.
* cpp1y/pr58548.C: New testcase.
* cpp1y/pr58549.C: New testcase.
* cpp1y/pr58637.C: New testcase.
---
 gcc/cp/decl.c|  30 +---
 gcc/cp/parser.c  | 278 +++
 gcc/cp/parser.h  |  19 +++
 gcc/testsuite/g++.dg/cpp1y/pr58534.C |   9 ++
 gcc/testsuite/g++.dg/cpp1y/pr58536.C |  12 ++
 gcc/testsuite/g++.dg/cpp1y/pr58548.C |  10 ++
 gcc/testsuite/g++.dg/cpp1y/pr58549.C |  10 ++
 gcc/testsuite/g++.dg/cpp1y/pr58637.C |   7 +
 gcc/tree.c   |  22 +++
 gcc/tree.h   |   5 +
 10 files changed, 281 insertions(+), 121 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58534.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58536.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58548.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58549.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr58637.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 09c1daa..786814c 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -10375,33 +10375,11 @@ grokdeclarator (const cp_declarator *declarator,
 
   if (type_uses_auto (type))
{
- if (template_parm_flag)
-   {
- error ("template parameter declared %");
- type = error_mark_node;
-   }
- else if (decl_context == CATCHPARM)
-   {
- error ("catch parameter declared %");
- type = error_mark_node;
-   }
- else if (current_class_type && LAMBDA_TYPE_P (current_class_type))
-   {
- if (cxx_dialect < cxx1y)
-   pedwarn (location_of (type), 0,
-"use of % in lambda parameter declaration "
-"only available with "
-"-std=c++1y or -std=gnu++1y");
-   }
- else if (cxx_dialect < cxx1y)
-   pedwarn (location_of (type), 0,
-"use of % in parameter declaration "
-"only available with "
-"-std=c++1y or -std=gnu++1y");
+ if (cxx_dialect >= cxx1y)
+   error ("% parameter not permitted in this context");
  else
-   pedwarn (location_of (type), OPT_Wpedantic,
-"ISO C++ forbids use of % in parameter "
-"declaration");
+   error ("parameter declared %");
+ type = error_mark_node;
}
 
   /* A parameter declared as an array of T is really a pointer to T.
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index bbc8e75..b699ac4 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -2107,8 +2107,8 @@ static bool 
cp_parser_ctor_initializer_opt_and_function_body
 static tree cp_parser_late_parsing_omp_declare_simd
   (cp_parser *, tree);
 
-static tree add_implicit_template_parms
-  (cp_parser *, size_t, tree);
+static tree synthesize_implicit_template_parm
+  (cp_parser *);
 static tree finish_fully_implicit_template
   (cp_parser *, tree);
 
@@ -3443,7 +3443,10 @@ cp_parser_new (void)
   parser->num_template_parameter_lists = 0;
 
   /* Not decla

[wide-int] Fix aarch{32,64} builds

2013-11-02 Thread Richard Sandiford
I decided to lump these together since the problems were the same.
There were some typos in the real_to_integer invocation, while changing:

/* There must be no padding.  */
if (!host_integerp (TYPE_SIZE (type), 1)
|| (tree_low_cst (TYPE_SIZE (type), 1)
!= count * GET_MODE_BITSIZE (*modep)))
  return -1;

to:

if (!tree_fits_uhwi_p (TYPE_SIZE (type))
|| (tree_to_uhwi (TYPE_SIZE (type))
!= count * GET_MODE_BITSIZE (*modep)))
  return -1;

introduced a signed/unsigned warning.

Tested on aarch64-linux-gnueabi & arm-linux-gnueabi and applied as
obvious.

Thanks,
Richard


Index: gcc/config/aarch64/aarch64.c
===
--- gcc/config/aarch64/aarch64.c(revision 204311)
+++ gcc/config/aarch64/aarch64.c(working copy)
@@ -6030,9 +6030,7 @@
  - tree_to_uhwi (TYPE_MIN_VALUE (index)));
 
/* There must be no padding.  */
-   if (!tree_fits_uhwi_p (TYPE_SIZE (type))
-   || (tree_to_uhwi (TYPE_SIZE (type))
-   != count * GET_MODE_BITSIZE (*modep)))
+   if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
  return -1;
 
return count;
@@ -6060,9 +6058,7 @@
  }
 
/* There must be no padding.  */
-   if (!tree_fits_uhwi_p (TYPE_SIZE (type))
-   || (tree_to_uhwi (TYPE_SIZE (type))
-   != count * GET_MODE_BITSIZE (*modep)))
+   if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
  return -1;
 
return count;
@@ -6092,9 +6088,7 @@
  }
 
/* There must be no padding.  */
-   if (!tree_fits_uhwi_p (TYPE_SIZE (type))
-   || (tree_to_uhwi (TYPE_SIZE (type))
-   != count * GET_MODE_BITSIZE (*modep)))
+   if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
  return -1;
 
return count;
@@ -7433,7 +7427,7 @@
   int exponent;
   unsigned HOST_WIDE_INT mantissa, mask;
   REAL_VALUE_TYPE r, m;
-  bool &fail
+  bool fail;
 
   if (!CONST_DOUBLE_P (x))
 return false;
@@ -7457,7 +7451,7 @@
  WARNING: If we ever have a representation using more than 2 * H_W_I - 1
  bits for the mantissa, this can fail (low bits will be lost).  */
   real_ldexp (&m, &r, point_pos - exponent);
-  w = real_to_integer (m, &fail, HOST_BITS_PER_WIDE_INT * 2);
+  wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
 
   /* If the low part of the mantissa has bits set we cannot represent
  the value.  */
Index: gcc/config/arm/arm.c
===
--- gcc/config/arm/arm.c(revision 204311)
+++ gcc/config/arm/arm.c(working copy)
@@ -4678,9 +4678,7 @@
  - tree_to_uhwi (TYPE_MIN_VALUE (index)));
 
/* There must be no padding.  */
-   if (!tree_fits_uhwi_p (TYPE_SIZE (type))
-   || (tree_to_uhwi (TYPE_SIZE (type))
-   != count * GET_MODE_BITSIZE (*modep)))
+   if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
  return -1;
 
return count;
@@ -4708,9 +4706,7 @@
  }
 
/* There must be no padding.  */
-   if (!tree_fits_uhwi_p (TYPE_SIZE (type))
-   || (tree_to_uhwi (TYPE_SIZE (type))
-   != count * GET_MODE_BITSIZE (*modep)))
+   if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
  return -1;
 
return count;
@@ -4740,9 +4736,7 @@
  }
 
/* There must be no padding.  */
-   if (!tree_fits_uhwi_p (TYPE_SIZE (type))
-   || (tree_to_uhwi (TYPE_SIZE (type))
-   != count * GET_MODE_BITSIZE (*modep)))
+   if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
  return -1;
 
return count;
@@ -11299,7 +11293,7 @@
  WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
  bits for the mantissa, this may fail (low bits would be lost).  */
   real_ldexp (&m, &r, point_pos - exponent);
-  wide_int w = real_to_integer (m, &fail, HOST_BITS_PER_WIDE_INT * 2);
+  wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
   mantissa = w.elt (0);
   mant_hi = w.elt (1);
 


[wide-int] Fix inverted bfin test

2013-11-02 Thread Richard Sandiford
A test for "size > 8" had got inverted.

Tested on bfin-elf and applied as obvious.

Thanks,
Richard


Index: gcc/config/bfin/bfin.c
===
--- gcc/config/bfin/bfin.c  (revision 204311)
+++ gcc/config/bfin/bfin.c  (working copy)
@@ -3286,7 +3286,7 @@
  memcpy can use 32 bit loads/stores.  */
   if (TYPE_SIZE (type)
   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
-  && !wi::gtu_p (TYPE_SIZE (type), 8)
+  && wi::gtu_p (TYPE_SIZE (type), 8)
   && align < 32)
 return 32;
   return align;


[wide-int] Fix exact_log2 zext test

2013-11-02 Thread Richard Sandiford
...which I'd fluffed when doing the wi:: conversion.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixed a spurious
gcc.dg difference on alpha-linux-gnu.  Applied as obvious.

Thanks,
Richard


Index: gcc/wide-int.cc
===
--- gcc/wide-int.cc (revision 204311)
+++ gcc/wide-int.cc (working copy)
@@ -2044,7 +2044,7 @@
 
   /* Get a zero-extended form of block CRUX.  */
   unsigned HOST_WIDE_INT hwi = x.val[crux];
-  if (crux * HOST_BITS_PER_WIDE_INT > x.precision)
+  if ((crux + 1) * HOST_BITS_PER_WIDE_INT > x.precision)
 hwi = zext_hwi (hwi, x.precision % HOST_BITS_PER_WIDE_INT);
 
   /* Now it's down to whether HWI is a power of 2.  */


[wide-int] Fix mask inversions in expand_copysign_bit

2013-11-02 Thread Richard Sandiford
At some point the original "~"s got straightened to "-"s.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  This fixed
a testsuite difference in bfin-elf.  Applied as obvious.

Thanks,
Richard


Index: gcc/optabs.c
===
--- gcc/optabs.c(revision 204311)
+++ gcc/optabs.c(working copy)
@@ -3678,7 +3678,7 @@
  if (!op0_is_abs)
op0_piece
  = expand_binop (imode, and_optab, op0_piece,
- immed_wide_int_const (-mask, imode),
+ immed_wide_int_const (~mask, imode),
  NULL_RTX, 1, OPTAB_LIB_WIDEN);
  op1 = expand_binop (imode, and_optab,
  operand_subword_force (op1, i, mode),
@@ -3708,7 +3708,7 @@
   op0 = gen_lowpart (imode, op0);
   if (!op0_is_abs)
op0 = expand_binop (imode, and_optab, op0,
-   immed_wide_int_const (-mask, imode),
+   immed_wide_int_const (~mask, imode),
NULL_RTX, 1, OPTAB_LIB_WIDEN);
   temp = expand_binop (imode, ior_optab, op0, op1,
   gen_lowpart (imode, target), 1, OPTAB_LIB_WIDEN);


[wide-int] Do not treat rtxes as sign-extended

2013-11-02 Thread Richard Sandiford
Bah.  After all that effort, it turns out that -- by design --
there is one special case where CONST_INTs are not sign-extended.
Nonzero/true BImode integers are stored as STORE_FLAG_VALUE,
which can be 1 rather than -1.  So (const_int 1) can be a valid
BImode integer -- and consequently (const_int -1) can be wrong --
even though BImode only has 1 bit.

It might be nice to change that, but for wide-int I think we should
just treat rtxes like trees for now.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some ICEs
seen on bfin-elf.  OK to install?

Thanks,
Richard


Index: gcc/rtl.h
===
--- gcc/rtl.h   (revision 204311)
+++ gcc/rtl.h   (working copy)
@@ -1408,7 +1408,9 @@
   {
 static const enum precision_type precision_type = VAR_PRECISION;
 static const bool host_dependent_precision = false;
-static const bool is_sign_extended = true;
+/* This ought to be true, except for the special case that BImode
+   is canonicalized to STORE_FLAG_VALUE, which might be 1.  */
+static const bool is_sign_extended = false;
 static unsigned int get_precision (const rtx_mode_t &);
 static wi::storage_ref decompose (HOST_WIDE_INT *, unsigned int,
  const rtx_mode_t &);
@@ -1430,10 +1432,6 @@
   switch (GET_CODE (x.first))
 {
 case CONST_INT:
-  if (precision < HOST_BITS_PER_WIDE_INT)
-   gcc_checking_assert (INTVAL (x.first)
-== sext_hwi (INTVAL (x.first), precision));
-
   return wi::storage_ref (&INTVAL (x.first), 1, precision);
   
 case CONST_WIDE_INT:


Re: [wide-int] Do not treat rtxes as sign-extended

2013-11-02 Thread Mike Stump
On Nov 2, 2013, at 3:30 AM, Richard Sandiford  
wrote:
> Bah.  After all that effort, it turns out that -- by design --
> there is one special case where CONST_INTs are not sign-extended.

I'm thinking this needs commentary in wide-int.h, though, not sure what we'd 
say...

[wide-int] Another tweak to convert_modes

2013-11-02 Thread Richard Sandiford
It turns out that when doing a vector shift by 2, the optab routine
passes (const_int 2) to convert_modes with oldmode set to the mode
of the shift (e.g. something like V8HI).  When the target mode is a
real integer mode like SImode, mainline just ignores that oldmode
and returns a (const_int 2) regardless, but wide-int doesn't.

Saying that (const_int 2) has a vector mode is almost certainly a bug
that ought to be trapped by an assert, but we're not trying to fight
that battle here.  The current code:

  if (CONST_SCALAR_INT_P (x) 
  && GET_MODE_CLASS (mode) == MODE_INT
  && (oldmode == VOIDmode || GET_MODE_CLASS (oldmode) == MODE_INT))

is already coping with bogus oldmodes, just not in the way that
other routines seem to expect.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  This fixed several
testsuite changes on the ARM targets.  OK to install?

Thanks,
Richard


Index: gcc/expr.c
===
--- gcc/expr.c  2013-11-02 10:34:44.083635650 +
+++ gcc/expr.c  2013-11-02 10:42:55.179233840 +
@@ -712,13 +712,12 @@ convert_modes (enum machine_mode mode, e
 return x;
 
   if (CONST_SCALAR_INT_P (x) 
-  && GET_MODE_CLASS (mode) == MODE_INT
-  && (oldmode == VOIDmode || GET_MODE_CLASS (oldmode) == MODE_INT))
+  && GET_MODE_CLASS (mode) == MODE_INT)
 {
   /* If the caller did not tell us the old mode, then there is
 not much to do with respect to canonization.  We have to assume
 that all the bits are significant.  */
-  if (oldmode == VOIDmode)
+  if (GET_MODE_CLASS (oldmode) != MODE_INT)
oldmode = MAX_MODE_INT;
   wide_int w = wide_int::from (std::make_pair (x, oldmode),
   GET_MODE_PRECISION (mode),


[wide-int] doloop fixes

2013-11-02 Thread Richard Sandiford
The first part of this is a simple type mismatch -- get_max_loop_iterations
returns a widest_int aka max_wide_int -- and I'd have installed it as
obvious.  The second part isn't as obvious though.  The old code stored
the maximum iterations as:

  if (!get_max_loop_iterations (loop, &iter)
  || !iter.fits_shwi ())
iterations_max = const0_rtx;
  else
iterations_max = GEN_INT (iter.to_shwi ());

and the new code uses:

  if (!get_max_loop_iterations (loop, &iter)
  || !wi::fits_shwi_p (iter))
iterations_max = const0_rtx;
  else
iterations_max = immed_wide_int_const (iter, mode);

which includes an extra canonicalisation.  I agree it would be good to do
that in principle, but I'm not sure it copes correctly with the case where
the loop iterates 1 << GET_MODE_PRECISION (mode) times.  Plus I think the
real fix would be to avoid the host dependence altogether, i.e. get rid
of the fits_shwi_p too.

As it stands, this breaks bfin-elf's pattern, which has:

  /* Due to limitations in the hardware (an initial loop count of 0
 does not loop 2^32 times) we must avoid to generate a hardware
 loops when we cannot rule out this case.  */
  if (!flag_unsafe_loop_optimizations
  && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x)
FAIL;

With the sign-extending conversion, this now triggers more often than
it was supposed to.

Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too,
and since we still use that form in the doloop_begin code, I think we should
just back the immed_wide_int_const out.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some
unwanted testsuite changes in bfin-elf.  OK to install?

Thanks,
Richard


Index: gcc/loop-doloop.c
===
--- gcc/loop-doloop.c   2013-11-02 10:49:37.463178153 +
+++ gcc/loop-doloop.c   2013-11-02 10:49:55.927314661 +
@@ -549,7 +549,7 @@ doloop_modify (struct loop *loop, struct
   {
 rtx init;
 unsigned level = get_loop_level (loop) + 1;
-wide_int iter;
+widest_int iter;
 rtx iter_rtx;
 
 if (!get_max_loop_iterations (loop, &iter)
@@ -673,7 +673,7 @@ doloop_optimize (struct loop *loop)
   || !wi::fits_shwi_p (iter))
 iterations_max = const0_rtx;
   else
-iterations_max = immed_wide_int_const (iter, mode);
+iterations_max = GEN_INT (iter.to_shwi ());
   level = get_loop_level (loop) + 1;
 
   /* Generate looping insn.  If the pattern FAILs then give up trying


[wide-int] Fix fold_ternary VEC_PERM_EXPR handling

2013-11-02 Thread Richard Sandiford
The wide-int conversion for the fold_ternary VEC_PERM_EXPR case
converted a mask of valid element numbers to an element count,
which is one greater.  The old code set need_mask_canon if an index
was greater than the mask, but the new code sets it if an index
is greater than the element count, giving an off-by-one error.

This patch restores the mainline mask handling and uses that in
the gtu_p call instead.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some
unwanted testsuite differences for one of the ARM targets.  OK to install?

Thanks,
Richard


Index: gcc/fold-const.c
===
--- gcc/fold-const.c2013-11-02 11:07:33.097149207 +
+++ gcc/fold-const.c2013-11-02 11:07:38.107188425 +
@@ -14360,7 +14360,7 @@ fold_ternary_loc (location_t loc, enum t
 case VEC_PERM_EXPR:
   if (TREE_CODE (arg2) == VECTOR_CST)
{
- unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+ unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask;
  unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
  bool need_mask_canon = false;
  bool all_in_vec0 = true;
@@ -14368,23 +14368,22 @@ fold_ternary_loc (location_t loc, enum t
  bool maybe_identity = true;
  bool single_arg = (op0 == op1);
  bool changed = false;
- int nelts_cnt = single_arg ? nelts : nelts * 2;
 
+ mask = single_arg ? (nelts - 1) : (2 * nelts - 1);
  gcc_assert (nelts == VECTOR_CST_NELTS (arg2));
  for (i = 0; i < nelts; i++)
{
  tree val = VECTOR_CST_ELT (arg2, i);
-
  if (TREE_CODE (val) != INTEGER_CST)
return NULL_TREE;
 
  /* Make sure that the perm value is in an acceptable
 range.  */
  wide_int t = val;
- if (wi::gtu_p (t, nelts_cnt))
+ if (wi::gtu_p (t, mask))
{
  need_mask_canon = true;
- sel[i] = t.to_uhwi () & (nelts_cnt - 1);
+ sel[i] = t.to_uhwi () & mask;
}
  else
sel[i] = t.to_uhwi ();


Re: [wide-int] Do not treat rtxes as sign-extended

2013-11-02 Thread Richard Sandiford
Mike Stump  writes:
> On Nov 2, 2013, at 3:30 AM, Richard Sandiford
>  wrote:
>> Bah.  After all that effort, it turns out that -- by design --
>> there is one special case where CONST_INTs are not sign-extended.
>
> I'm thinking this needs commentary in wide-int.h, though, not sure what
> we'd say...

AIUI the only valid values of STORE_FLAG_VALUE are 1 and -1.  If others
are used, the rtx decompose routine would need to use the scratch array.

So wide-int itself shouldn't need to care.  It just means that a
1-precision rtx input can be sign or zero extended, just like for
N-precision trees.  And we indicate that by setting is_sign_extended
to false.

Thanks,
Richard


[PATCH, i386]: Introduce Ts and Tv address constraints

2013-11-02 Thread Uros Bizjak
Hello!

These two constraints go together with address_no_seg_operand and
vsib_address_operand operand predicates.

2013-11-02  Uros Bizjak  

* config/i386/constraints.md (Ts, Tv): New address constrains.
* config/i386/i386.md (*lea, *_): Use Ts
constraint for address_no_seg_operand.
* config/i386/sse.md (*avx512pf_gatherpf_mask)
(*avx512pf_gatherpf, *avx512pf_scatterpf_mask)
(*avx512pf_scatterpf, *avx2_gathersi)
(*avx2_gathersi_2, *avx2_gatherdi, *avx2_gatherdi_2)
(*avx2_gatherdi_3, *avx2_gatherdi_4)
(*avx512f_gathersi, *avx512f_gathersi_2)
(*avx512f_gatherdi, *avx512f_gatherdi_2)
(*avx512f_scattersi *avx512f_scatterdi): Use Tv
constraint for vsib_address_operand.

Bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32} and
committed to mainline SVN.

Uros.
Index: config/i386/constraints.md
===
--- config/i386/constraints.md  (revision 204298)
+++ config/i386/constraints.md  (working copy)
@@ -237,9 +237,19 @@
   (match_operand 0 "x86_64_zext_immediate_operand"))
 
 ;; T prefix is used for different address constraints
+;;   v - VSIB address
+;;   s - address with no segment register
 ;;   i - address with no index and no rip
 ;;   b - address with no base and no rip
 
+(define_address_constraint "Tv"
+  "VSIB address operand"
+  (match_operand 0 "vsib_address_operand"))
+
+(define_address_constraint "Ts"
+  "Address operand without segment register"
+  (match_operand 0 "address_no_seg_operand"))
+
 (define_address_constraint "Ti"
   "MPX address operand without index"
   (match_operand 0 "address_mpx_no_index_operand"))
Index: config/i386/i386.md
===
--- config/i386/i386.md (revision 204298)
+++ config/i386/i386.md (working copy)
@@ -5394,7 +5394,7 @@
 
 (define_insn_and_split "*lea"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
-   (match_operand:SWI48 1 "address_no_seg_operand" "p"))]
+   (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
   ""
 {
   if (SImode_address_operand (operands[1], VOIDmode))
@@ -18297,7 +18297,7 @@
 
 (define_insn "*_"
   [(parallel [(unspec [(match_operand:BND 0 "register_operand" "B")
-   (match_operand: 1 "address_no_seg_operand" 
"p")] BNDCHECK)
+   (match_operand: 1 "address_no_seg_operand" 
"Ts")] BNDCHECK)
   (set (match_operand:BLK 2 "bnd_mem_operator")
(unspec:BLK [(match_dup 2)] UNSPEC_MPX_FENCE))])]
   "TARGET_MPX"
Index: config/i386/sse.md
===
--- config/i386/sse.md  (revision 204298)
+++ config/i386/sse.md  (working copy)
@@ -11462,7 +11462,7 @@
  [(match_operand: 0 "register_operand" "k")
   (match_operator: 5 "vsib_mem_operator"
[(unspec:P
-  [(match_operand:P 2 "vsib_address_operand" "p")
+  [(match_operand:P 2 "vsib_address_operand" "Tv")
(match_operand:VI48_512 1 "register_operand" "v")
(match_operand:SI 3 "const1248_operand" "n")]
   UNSPEC_VSIBADDR)])
@@ -11489,7 +11489,7 @@
  [(const_int -1)
   (match_operator: 4 "vsib_mem_operator"
[(unspec:P
-  [(match_operand:P 1 "vsib_address_operand" "p")
+  [(match_operand:P 1 "vsib_address_operand" "Tv")
(match_operand:VI48_512 0 "register_operand" "v")
(match_operand:SI 2 "const1248_operand" "n")]
   UNSPEC_VSIBADDR)])
@@ -11533,7 +11533,7 @@
  [(match_operand: 0 "register_operand" "k")
   (match_operator: 5 "vsib_mem_operator"
[(unspec:P
-  [(match_operand:P 2 "vsib_address_operand" "p")
+  [(match_operand:P 2 "vsib_address_operand" "Tv")
(match_operand:VI48_512 1 "register_operand" "v")
(match_operand:SI 3 "const1248_operand" "n")]
   UNSPEC_VSIBADDR)])
@@ -11560,7 +11560,7 @@
  [(const_int -1)
   (match_operator: 4 "vsib_mem_operator"
[(unspec:P
-  [(match_operand:P 1 "vsib_address_operand" "p")
+  [(match_operand:P 1 "vsib_address_operand" "Tv")
(match_operand:VI48_512 0 "register_operand" "v")
(match_operand:SI 2 "const1248_operand" "n")]
   UNSPEC_VSIBADDR)])
@@ -13650,7 +13650,7 @@
  [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
   (match_operator: 7 "vsib_mem_operator"
 [(unspec:P
-   [(match_operand:P 3 "vsib_address_operand" "p")
+   [(match_operand:P 3 "vsib_address_operand" "Tv")
 (match_operand: 4 "register_operand" "x")
 (match_operand:SI 6 "const1248_operand" "n")]
UNSPEC_VSIBADDR)])
@@ -13670,7 +13670,7 @@
  [(pc)
   (match_operator: 6 "vsib_mem_operator"
 [(unspec:P
-   [(match_operand:P 2 "vsib_address_operand" "p")
+   [(match_operand:P 2 "vsi

[wide-int] Make shifted_mask more forgiving

2013-11-02 Thread Richard Sandiford
r201806 added some extra checks to the handling of CONCAT when expanding
assignments.  This was to fix an ICE on gcc.dg/pr48335-2.c for ppc.

I tripped over this because it also causes the assembly output for
the test to change.  I tried backing out the patch and the ICE was
from the assert in wide-int.cc:shifted_mask:

  gcc_assert (start < 4 * MAX_BITSIZE_MODE_ANY_INT);

However, in this case, the mask was being generated by mask_rtx,
which specifically allows inputs that are out of range of the mode:

/* Return a constant integer mask value of mode MODE with BITSIZE ones
   followed by BITPOS zeros, or the complement of that if COMPLEMENT.
   The mask is truncated if necessary to the width of mode MODE.  The
   mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */

So I don't think these inputs should trigger an ICE, even if in this
particularly case they're conceptually wrong for some reason.

Since wide-int.cc:shifted_mask already handles zero-width masks
and cases where the end bit is out of range, I think we should
handle the start being out of range too.

It might be that 201806 is a good thing anyway (no idea either way),
but I think it should go directly on mainline first if so.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes the
pr48335-2.c assembly differences.  OK to install?

Thanks,
Richard


Index: gcc/expr.c
===
--- gcc/expr.c  2013-11-02 11:15:32.739837369 +
+++ gcc/expr.c  2013-11-02 11:15:42.562911693 +
@@ -4765,14 +4765,12 @@ expand_assignment (tree to, tree from, b
   && (bitpos == 0 || bitpos == mode_bitsize / 2))
result = store_expr (from, XEXP (to_rtx, bitpos != 0), false,
 nontemporal);
- else if (bitpos + bitsize <= mode_bitsize / 2
-  && bitpos+bitsize <= mode_bitsize)
+ else if (bitpos + bitsize <= mode_bitsize / 2)
result = store_field (XEXP (to_rtx, 0), bitsize, bitpos,
  bitregion_start, bitregion_end,
  mode1, from,
  get_alias_set (to), nontemporal);
- else if (bitpos >= mode_bitsize / 2
-  && bitpos+bitsize <= mode_bitsize)
+ else if (bitpos >= mode_bitsize / 2)
result = store_field (XEXP (to_rtx, 1), bitsize,
  bitpos - mode_bitsize / 2,
  bitregion_start, bitregion_end,
@@ -4791,12 +4789,8 @@ expand_assignment (tree to, tree from, b
}
  else
{
- HOST_WIDE_INT extra = 0;
-  if (bitpos+bitsize > mode_bitsize)
-extra = bitpos+bitsize - mode_bitsize;
  rtx temp = assign_stack_temp (GET_MODE (to_rtx),
-   GET_MODE_SIZE (GET_MODE (to_rtx))
-   + extra);
+   GET_MODE_SIZE (GET_MODE (to_rtx)));
  write_complex_part (temp, XEXP (to_rtx, 0), false);
  write_complex_part (temp, XEXP (to_rtx, 1), true);
  result = store_field (temp, bitsize, bitpos,
Index: gcc/wide-int.cc
===
--- gcc/wide-int.cc 2013-11-02 11:15:32.740837376 +
+++ gcc/wide-int.cc 2013-11-02 11:15:42.562911693 +
@@ -748,18 +748,16 @@ unsigned int
 wi::shifted_mask (HOST_WIDE_INT *val, unsigned int start, unsigned int width,
  bool negate, unsigned int prec)
 {
-  gcc_assert (start < 4 * MAX_BITSIZE_MODE_ANY_INT);
-
-  if (start + width > prec)
-width = prec - start;
-  unsigned int end = start + width;
-
-  if (width == 0)
+  if (start >= prec || width == 0)
 {
   val[0] = negate ? -1 : 0;
   return 1;
 }
 
+  if (width > prec - start)
+width = prec - start;
+  unsigned int end = start + width;
+
   unsigned int i = 0;
   while (i < start / HOST_BITS_PER_WIDE_INT)
 val[i++] = negate ? -1 : 0;


[gomp4] fix simd clones with no return value

2013-11-02 Thread Aldy Hernandez

The second testcase currently ICEs I guess during simd cloning, just wanted
to make it clear that while simd clones without any arguments probably don't
make any sense (other than const, but those really should be hoisted out of
the loop much earlier), simd clones with no return value make sense.


The problem here is that we are currently bailing out when there is no 
return value, thus keeping the return in the basic block which will be 
redirected elsewhere.


Fixed with the attached patch and committed to branch.

Note: I am purposely inhibiting warnings (-w) in the test to avoid 
failing with the aforementioned:


a.c:2:6: warning: AVX vector argument without AVX enabled changes the 
ABI [enabled by default]


We already have a test that fails because of this.  No sense having 
another one.  After we fix the ABI problem we can remove the -w.


Aldy
gcc/ChangeLog.gomp

* omp-low.c (ipa_simd_modify_function_body): Handle empty returns.

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 7852723..d30fb17 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -11098,17 +11098,23 @@ ipa_simd_modify_function_body (struct cgraph_node 
*node,
{
case GIMPLE_RETURN:
  {
-   /* Replace `return foo' by `retval_array[iter] = foo'.  */
tree old_retval = gimple_return_retval (stmt);
-   if (!old_retval)
- break;
-   stmt = gimple_build_assign (build4 (ARRAY_REF,
-   TREE_TYPE (old_retval),
-   retval_array, iter,
-   NULL, NULL),
-   old_retval);
-   gsi_replace (&gsi, stmt, true);
-   modified = true;
+   if (old_retval)
+ {
+   /* Replace `return foo' by `retval_array[iter] = foo'.  */
+   stmt = gimple_build_assign (build4 (ARRAY_REF,
+   TREE_TYPE (old_retval),
+   retval_array, iter,
+   NULL, NULL),
+   old_retval);
+   gsi_replace (&gsi, stmt, true);
+   modified = true;
+ }
+   else
+ {
+   gsi_remove (&gsi, true);
+   continue;
+ }
break;
  }
 
diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-5.c 
b/gcc/testsuite/gcc.dg/gomp/simd-clones-5.c
new file mode 100644
index 000..801c24f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-5.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-fopenmp -w" } */
+
+/* ?? The -w above is to inhibit the following warning for now:
+   a.c:2:6: warning: AVX vector argument without AVX enabled changes
+   the ABI [enabled by default].  */
+
+#pragma omp declare simd notinbranch simdlen(4)
+void foo (int *a)
+{
+  *a = 555;
+}


Re: [PATCH][ubsan] Add VLA bound instrumentation

2013-11-02 Thread Marek Polacek
On Fri, Nov 01, 2013 at 04:39:09PM -0400, Jason Merrill wrote:
> On 11/01/2013 03:10 PM, Marek Polacek wrote:
> >+  /* We need to stabilize side-effects in VLA sizes for regular array
> >+ declarations too, not just pointers to arrays.  */
> 
> This comment isn't really relevant to its new location.  :)
> 
> OK with that removed.

Sure, thanks.  Passed various testing, will install this soon.

Marek


[gomp4] mangle linear step of 1 with just 'l'

2013-11-02 Thread Aldy Hernandez

Hi Jakub.

Your patch mangling negative linear steps caused a regression in 
simd-clones-2.c.  Well, it already had a failure, but now it has two :).


The problem is that AFAIU, a linear step of 1, is mangled with just 'l', 
not 'l1'.


I am not completely sure of this, and was hoping Balaji could clear this 
up, but on page 7 of the Intel vector ABI document, the example for:


__declspec(vector(uniform(a), aligned(a:32), linear(k:1)))
extern float setArray(float *a, float x, int k)

...is mangled as _ZGVxN4ua32vl_setArray, and in the subsequent 
explanatory paragraph, the document specifically says:


“l” indicates linear(k:1) – k is a linear variable whose stride is 1.

However, since the spec itself says nothing about a default linear 
stride 1, I don't know whether this is an oversight in the BNF grammar 
or a typo in the example.  Balaji?


If a linear step of 1 is mangled as 'l' as the example suggests, then 
I'd like to commit the following patch.


Aldy
gcc/ChangeLog.gomp
* omp-low.c (simd_clone_mangle): Linear step of 1 is mangled as
'l'.


diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index d30fb17..e895cca 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -10825,9 +10825,9 @@ simd_clone_mangle (struct cgraph_node *old_node, struct 
cgraph_node *new_node)
{
  gcc_assert (arg.linear_step != 0);
  pp_character (&pp, 'l');
- if (arg.linear_step > 0)
+ if (arg.linear_step > 1)
pp_unsigned_wide_integer (&pp, arg.linear_step);
- else
+ else if (arg.linear_step < 0)
{
  pp_character (&pp, 'n');
  pp_unsigned_wide_integer (&pp, (-(unsigned HOST_WIDE_INT)


Re: [wide-int] Fix aarch{32,64} builds

2013-11-02 Thread Kenneth Zadeck

I always like patches that change host dependencies into more general code.

ok from my perspective.

kenny

On 11/02/2013 06:13 AM, Richard Sandiford wrote:

I decided to lump these together since the problems were the same.
There were some typos in the real_to_integer invocation, while changing:

/* There must be no padding.  */
if (!host_integerp (TYPE_SIZE (type), 1)
|| (tree_low_cst (TYPE_SIZE (type), 1)
!= count * GET_MODE_BITSIZE (*modep)))
  return -1;

to:

if (!tree_fits_uhwi_p (TYPE_SIZE (type))
|| (tree_to_uhwi (TYPE_SIZE (type))
!= count * GET_MODE_BITSIZE (*modep)))
  return -1;

introduced a signed/unsigned warning.

Tested on aarch64-linux-gnueabi & arm-linux-gnueabi and applied as
obvious.

Thanks,
Richard


Index: gcc/config/aarch64/aarch64.c
===
--- gcc/config/aarch64/aarch64.c(revision 204311)
+++ gcc/config/aarch64/aarch64.c(working copy)
@@ -6030,9 +6030,7 @@
  - tree_to_uhwi (TYPE_MIN_VALUE (index)));
  
  	/* There must be no padding.  */

-   if (!tree_fits_uhwi_p (TYPE_SIZE (type))
-   || (tree_to_uhwi (TYPE_SIZE (type))
-   != count * GET_MODE_BITSIZE (*modep)))
+   if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
  return -1;
  
  	return count;

@@ -6060,9 +6058,7 @@
  }
  
  	/* There must be no padding.  */

-   if (!tree_fits_uhwi_p (TYPE_SIZE (type))
-   || (tree_to_uhwi (TYPE_SIZE (type))
-   != count * GET_MODE_BITSIZE (*modep)))
+   if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
  return -1;
  
  	return count;

@@ -6092,9 +6088,7 @@
  }
  
  	/* There must be no padding.  */

-   if (!tree_fits_uhwi_p (TYPE_SIZE (type))
-   || (tree_to_uhwi (TYPE_SIZE (type))
-   != count * GET_MODE_BITSIZE (*modep)))
+   if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
  return -1;
  
  	return count;

@@ -7433,7 +7427,7 @@
int exponent;
unsigned HOST_WIDE_INT mantissa, mask;
REAL_VALUE_TYPE r, m;
-  bool &fail
+  bool fail;
  
if (!CONST_DOUBLE_P (x))

  return false;
@@ -7457,7 +7451,7 @@
   WARNING: If we ever have a representation using more than 2 * H_W_I - 1
   bits for the mantissa, this can fail (low bits will be lost).  */
real_ldexp (&m, &r, point_pos - exponent);
-  w = real_to_integer (m, &fail, HOST_BITS_PER_WIDE_INT * 2);
+  wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
  
/* If the low part of the mantissa has bits set we cannot represent

   the value.  */
Index: gcc/config/arm/arm.c
===
--- gcc/config/arm/arm.c(revision 204311)
+++ gcc/config/arm/arm.c(working copy)
@@ -4678,9 +4678,7 @@
  - tree_to_uhwi (TYPE_MIN_VALUE (index)));
  
  	/* There must be no padding.  */

-   if (!tree_fits_uhwi_p (TYPE_SIZE (type))
-   || (tree_to_uhwi (TYPE_SIZE (type))
-   != count * GET_MODE_BITSIZE (*modep)))
+   if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
  return -1;
  
  	return count;

@@ -4708,9 +4706,7 @@
  }
  
  	/* There must be no padding.  */

-   if (!tree_fits_uhwi_p (TYPE_SIZE (type))
-   || (tree_to_uhwi (TYPE_SIZE (type))
-   != count * GET_MODE_BITSIZE (*modep)))
+   if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
  return -1;
  
  	return count;

@@ -4740,9 +4736,7 @@
  }
  
  	/* There must be no padding.  */

-   if (!tree_fits_uhwi_p (TYPE_SIZE (type))
-   || (tree_to_uhwi (TYPE_SIZE (type))
-   != count * GET_MODE_BITSIZE (*modep)))
+   if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
  return -1;
  
  	return count;

@@ -11299,7 +11293,7 @@
   WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
   bits for the mantissa, this may fail (low bits would be lost).  */
real_ldexp (&m, &r, point_pos - exponent);
-  wide_int w = real_to_integer (m, &fail, HOST_BITS_PER_WIDE_INT * 2);
+  wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
mantissa = w.elt (0);
mant_hi = w.elt (1);
  




Re: [wide-int] Fix exact_log2 zext test

2013-11-02 Thread Kenneth Zadeck

On 11/02/2013 06:19 AM, Richard Sandiford wrote:

...which I'd fluffed when doing the wi:: conversion.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixed a spurious
gcc.dg difference on alpha-linux-gnu.  Applied as obvious.

Thanks,
Richard


Index: gcc/wide-int.cc
===
--- gcc/wide-int.cc (revision 204311)
+++ gcc/wide-int.cc (working copy)
@@ -2044,7 +2044,7 @@
  
/* Get a zero-extended form of block CRUX.  */

unsigned HOST_WIDE_INT hwi = x.val[crux];
-  if (crux * HOST_BITS_PER_WIDE_INT > x.precision)
+  if ((crux + 1) * HOST_BITS_PER_WIDE_INT > x.precision)
  hwi = zext_hwi (hwi, x.precision % HOST_BITS_PER_WIDE_INT);
  
/* Now it's down to whether HWI is a power of 2.  */

ok by me.

kenny


Re: [wide-int] Do not treat rtxes as sign-extended

2013-11-02 Thread Kenneth Zadeck

On 11/02/2013 06:30 AM, Richard Sandiford wrote:

Bah.  After all that effort, it turns out that -- by design --
there is one special case where CONST_INTs are not sign-extended.
Nonzero/true BImode integers are stored as STORE_FLAG_VALUE,
which can be 1 rather than -1.  So (const_int 1) can be a valid
BImode integer -- and consequently (const_int -1) can be wrong --
even though BImode only has 1 bit.

It might be nice to change that, but for wide-int I think we should
just treat rtxes like trees for now.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some ICEs
seen on bfin-elf.  OK to install?
do we have to throw away the baby with the bath water here?  I guess 
what you are saying is that it is worse to have is_sign_extended be a 
variable that is almost always true than to be a hard false.


also we could preserve the test and make it not apply to bimode.

kenny



Thanks,
Richard


Index: gcc/rtl.h
===
--- gcc/rtl.h   (revision 204311)
+++ gcc/rtl.h   (working copy)
@@ -1408,7 +1408,9 @@
{
  static const enum precision_type precision_type = VAR_PRECISION;
  static const bool host_dependent_precision = false;
-static const bool is_sign_extended = true;
+/* This ought to be true, except for the special case that BImode
+   is canonicalized to STORE_FLAG_VALUE, which might be 1.  */
+static const bool is_sign_extended = false;
  static unsigned int get_precision (const rtx_mode_t &);
  static wi::storage_ref decompose (HOST_WIDE_INT *, unsigned int,
  const rtx_mode_t &);
@@ -1430,10 +1432,6 @@
switch (GET_CODE (x.first))
  {
  case CONST_INT:
-  if (precision < HOST_BITS_PER_WIDE_INT)
-   gcc_checking_assert (INTVAL (x.first)
-== sext_hwi (INTVAL (x.first), precision));
-
return wi::storage_ref (&INTVAL (x.first), 1, precision);

  case CONST_WIDE_INT:




Re: [wide-int] Another tweak to convert_modes

2013-11-02 Thread Kenneth Zadeck

On 11/02/2013 06:48 AM, Richard Sandiford wrote:

It turns out that when doing a vector shift by 2, the optab routine
passes (const_int 2) to convert_modes with oldmode set to the mode
of the shift (e.g. something like V8HI).  When the target mode is a
real integer mode like SImode, mainline just ignores that oldmode
and returns a (const_int 2) regardless, but wide-int doesn't.

Saying that (const_int 2) has a vector mode is almost certainly a bug
that ought to be trapped by an assert, but we're not trying to fight
that battle here.  The current code:

   if (CONST_SCALAR_INT_P (x)
   && GET_MODE_CLASS (mode) == MODE_INT
   && (oldmode == VOIDmode || GET_MODE_CLASS (oldmode) == MODE_INT))

is already coping with bogus oldmodes, just not in the way that
other routines seem to expect.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  This fixed several
testsuite changes on the ARM targets.  OK to install?

Thanks,
Richard
or we can fix it on trunk and pull the patch up.If you as an 
(former) rtl maintainer want to go in this direction, then fine.


Index: gcc/expr.c
===
--- gcc/expr.c  2013-11-02 10:34:44.083635650 +
+++ gcc/expr.c  2013-11-02 10:42:55.179233840 +
@@ -712,13 +712,12 @@ convert_modes (enum machine_mode mode, e
  return x;
  
if (CONST_SCALAR_INT_P (x)

-  && GET_MODE_CLASS (mode) == MODE_INT
-  && (oldmode == VOIDmode || GET_MODE_CLASS (oldmode) == MODE_INT))
+  && GET_MODE_CLASS (mode) == MODE_INT)
  {
/* If the caller did not tell us the old mode, then there is
 not much to do with respect to canonization.  We have to assume
 that all the bits are significant.  */
-  if (oldmode == VOIDmode)
+  if (GET_MODE_CLASS (oldmode) != MODE_INT)
oldmode = MAX_MODE_INT;
wide_int w = wide_int::from (std::make_pair (x, oldmode),
   GET_MODE_PRECISION (mode),




Re: [wide-int] doloop fixes

2013-11-02 Thread Kenneth Zadeck

On 11/02/2013 07:06 AM, Richard Sandiford wrote:

The first part of this is a simple type mismatch -- get_max_loop_iterations
returns a widest_int aka max_wide_int -- and I'd have installed it as
obvious.  The second part isn't as obvious though.  The old code stored
the maximum iterations as:

   if (!get_max_loop_iterations (loop, &iter)
   || !iter.fits_shwi ())
 iterations_max = const0_rtx;
   else
 iterations_max = GEN_INT (iter.to_shwi ());

and the new code uses:

   if (!get_max_loop_iterations (loop, &iter)
   || !wi::fits_shwi_p (iter))
 iterations_max = const0_rtx;
   else
 iterations_max = immed_wide_int_const (iter, mode);

which includes an extra canonicalisation.  I agree it would be good to do
that in principle, but I'm not sure it copes correctly with the case where
the loop iterates 1 << GET_MODE_PRECISION (mode) times.  Plus I think the
real fix would be to avoid the host dependence altogether, i.e. get rid
of the fits_shwi_p too.

As it stands, this breaks bfin-elf's pattern, which has:

   /* Due to limitations in the hardware (an initial loop count of 0
  does not loop 2^32 times) we must avoid to generate a hardware
  loops when we cannot rule out this case.  */
   if (!flag_unsafe_loop_optimizations
   && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x)
 FAIL;

With the sign-extending conversion, this now triggers more often than
it was supposed to.

Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too,
and since we still use that form in the doloop_begin code, I think we should
just back the immed_wide_int_const out.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some
unwanted testsuite changes in bfin-elf.  OK to install?
I dislike this a lot. I think that this is a badly written pattern 
in the bfin port if it depends on the host size.   As machines get 
bigger, (they may not be getting faster but they are still getting 
bigger) having traps like this at the portable level will bite us.


in truth this code should really be iterations_max = 
immed_wide_int_const (iter, mode) with no tests at all.




Thanks,
Richard


Index: gcc/loop-doloop.c
===
--- gcc/loop-doloop.c   2013-11-02 10:49:37.463178153 +
+++ gcc/loop-doloop.c   2013-11-02 10:49:55.927314661 +
@@ -549,7 +549,7 @@ doloop_modify (struct loop *loop, struct
{
  rtx init;
  unsigned level = get_loop_level (loop) + 1;
-wide_int iter;
+widest_int iter;
  rtx iter_rtx;
  
  if (!get_max_loop_iterations (loop, &iter)

@@ -673,7 +673,7 @@ doloop_optimize (struct loop *loop)
|| !wi::fits_shwi_p (iter))
  iterations_max = const0_rtx;
else
-iterations_max = immed_wide_int_const (iter, mode);
+iterations_max = GEN_INT (iter.to_shwi ());
level = get_loop_level (loop) + 1;
  
/* Generate looping insn.  If the pattern FAILs then give up trying




Re: [wide-int] Make shifted_mask more forgiving

2013-11-02 Thread Kenneth Zadeck

this is fine with me.

kenny

On 11/02/2013 07:52 AM, Richard Sandiford wrote:

r201806 added some extra checks to the handling of CONCAT when expanding
assignments.  This was to fix an ICE on gcc.dg/pr48335-2.c for ppc.

I tripped over this because it also causes the assembly output for
the test to change.  I tried backing out the patch and the ICE was
from the assert in wide-int.cc:shifted_mask:

   gcc_assert (start < 4 * MAX_BITSIZE_MODE_ANY_INT);

However, in this case, the mask was being generated by mask_rtx,
which specifically allows inputs that are out of range of the mode:

/* Return a constant integer mask value of mode MODE with BITSIZE ones
followed by BITPOS zeros, or the complement of that if COMPLEMENT.
The mask is truncated if necessary to the width of mode MODE.  The
mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */

So I don't think these inputs should trigger an ICE, even if in this
particularly case they're conceptually wrong for some reason.

Since wide-int.cc:shifted_mask already handles zero-width masks
and cases where the end bit is out of range, I think we should
handle the start being out of range too.

It might be that 201806 is a good thing anyway (no idea either way),
but I think it should go directly on mainline first if so.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes the
pr48335-2.c assembly differences.  OK to install?

Thanks,
Richard


Index: gcc/expr.c
===
--- gcc/expr.c  2013-11-02 11:15:32.739837369 +
+++ gcc/expr.c  2013-11-02 11:15:42.562911693 +
@@ -4765,14 +4765,12 @@ expand_assignment (tree to, tree from, b
   && (bitpos == 0 || bitpos == mode_bitsize / 2))
result = store_expr (from, XEXP (to_rtx, bitpos != 0), false,
 nontemporal);
- else if (bitpos + bitsize <= mode_bitsize / 2
-  && bitpos+bitsize <= mode_bitsize)
+ else if (bitpos + bitsize <= mode_bitsize / 2)
result = store_field (XEXP (to_rtx, 0), bitsize, bitpos,
  bitregion_start, bitregion_end,
  mode1, from,
  get_alias_set (to), nontemporal);
- else if (bitpos >= mode_bitsize / 2
-  && bitpos+bitsize <= mode_bitsize)
+ else if (bitpos >= mode_bitsize / 2)
result = store_field (XEXP (to_rtx, 1), bitsize,
  bitpos - mode_bitsize / 2,
  bitregion_start, bitregion_end,
@@ -4791,12 +4789,8 @@ expand_assignment (tree to, tree from, b
}
  else
{
- HOST_WIDE_INT extra = 0;
-  if (bitpos+bitsize > mode_bitsize)
-extra = bitpos+bitsize - mode_bitsize;
  rtx temp = assign_stack_temp (GET_MODE (to_rtx),
-   GET_MODE_SIZE (GET_MODE (to_rtx))
-   + extra);
+   GET_MODE_SIZE (GET_MODE (to_rtx)));
  write_complex_part (temp, XEXP (to_rtx, 0), false);
  write_complex_part (temp, XEXP (to_rtx, 1), true);
  result = store_field (temp, bitsize, bitpos,
Index: gcc/wide-int.cc
===
--- gcc/wide-int.cc 2013-11-02 11:15:32.740837376 +
+++ gcc/wide-int.cc 2013-11-02 11:15:42.562911693 +
@@ -748,18 +748,16 @@ unsigned int
  wi::shifted_mask (HOST_WIDE_INT *val, unsigned int start, unsigned int width,
  bool negate, unsigned int prec)
  {
-  gcc_assert (start < 4 * MAX_BITSIZE_MODE_ANY_INT);
-
-  if (start + width > prec)
-width = prec - start;
-  unsigned int end = start + width;
-
-  if (width == 0)
+  if (start >= prec || width == 0)
  {
val[0] = negate ? -1 : 0;
return 1;
  }
  
+  if (width > prec - start)

+width = prec - start;
+  unsigned int end = start + width;
+
unsigned int i = 0;
while (i < start / HOST_BITS_PER_WIDE_INT)
  val[i++] = negate ? -1 : 0;




Re: [wide-int] Fix fold_ternary VEC_PERM_EXPR handling

2013-11-02 Thread Kenneth Zadeck

On 11/02/2013 07:14 AM, Richard Sandiford wrote:

The wide-int conversion for the fold_ternary VEC_PERM_EXPR case
converted a mask of valid element numbers to an element count,
which is one greater.  The old code set need_mask_canon if an index
was greater than the mask, but the new code sets it if an index
is greater than the element count, giving an off-by-one error.

This patch restores the mainline mask handling and uses that in
the gtu_p call instead.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some
unwanted testsuite differences for one of the ARM targets.  OK to install?

Thanks,
Richard

looks ok to me.


Index: gcc/fold-const.c
===
--- gcc/fold-const.c2013-11-02 11:07:33.097149207 +
+++ gcc/fold-const.c2013-11-02 11:07:38.107188425 +
@@ -14360,7 +14360,7 @@ fold_ternary_loc (location_t loc, enum t
  case VEC_PERM_EXPR:
if (TREE_CODE (arg2) == VECTOR_CST)
{
- unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+ unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask;
  unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
  bool need_mask_canon = false;
  bool all_in_vec0 = true;
@@ -14368,23 +14368,22 @@ fold_ternary_loc (location_t loc, enum t
  bool maybe_identity = true;
  bool single_arg = (op0 == op1);
  bool changed = false;
- int nelts_cnt = single_arg ? nelts : nelts * 2;
  
+	  mask = single_arg ? (nelts - 1) : (2 * nelts - 1);

  gcc_assert (nelts == VECTOR_CST_NELTS (arg2));
  for (i = 0; i < nelts; i++)
{
  tree val = VECTOR_CST_ELT (arg2, i);
-
  if (TREE_CODE (val) != INTEGER_CST)
return NULL_TREE;
  
  	  /* Make sure that the perm value is in an acceptable

 range.  */
  wide_int t = val;
- if (wi::gtu_p (t, nelts_cnt))
+ if (wi::gtu_p (t, mask))
{
  need_mask_canon = true;
- sel[i] = t.to_uhwi () & (nelts_cnt - 1);
+ sel[i] = t.to_uhwi () & mask;
}
  else
sel[i] = t.to_uhwi ();




Re: [wide-int] Do not treat rtxes as sign-extended

2013-11-02 Thread Richard Sandiford
Kenneth Zadeck  writes:
> On 11/02/2013 06:30 AM, Richard Sandiford wrote:
>> Bah.  After all that effort, it turns out that -- by design --
>> there is one special case where CONST_INTs are not sign-extended.
>> Nonzero/true BImode integers are stored as STORE_FLAG_VALUE,
>> which can be 1 rather than -1.  So (const_int 1) can be a valid
>> BImode integer -- and consequently (const_int -1) can be wrong --
>> even though BImode only has 1 bit.
>>
>> It might be nice to change that, but for wide-int I think we should
>> just treat rtxes like trees for now.
>>
>> Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some ICEs
>> seen on bfin-elf.  OK to install?
> do we have to throw away the baby with the bath water here?  I guess 
> what you are saying is that it is worse to have is_sign_extended be a 
> variable that is almost always true than to be a hard false.

Right.  is_sign_extended is only useful if it's a compile-time value.
Making it a run-time value would negate the benefit.

I think in practice STORE_FLAG_VALUE is a compile-time constant too,
so we could set is_sign_extended to STORE_FLAG_VALUE == -1.  But AFAICT
that would only help SPU and m68k.

> also we could preserve the test and make it not apply to bimode.

You mean the one in the assert?  Yeah, OK.  How about this version?

Richard


Index: gcc/rtl.h
===
--- gcc/rtl.h   2013-11-02 11:06:12.738517644 +
+++ gcc/rtl.h   2013-11-02 14:22:05.636007860 +
@@ -1408,7 +1408,9 @@ typedef std::pair ::decompose (
 case CONST_INT:
   if (precision < HOST_BITS_PER_WIDE_INT)
gcc_checking_assert (INTVAL (x.first)
-== sext_hwi (INTVAL (x.first), precision));
+== sext_hwi (INTVAL (x.first), precision)
+|| (precision == 1 && INTVAL (x.first) == 1));
 
   return wi::storage_ref (&INTVAL (x.first), 1, precision);
   


Re: [wide-int] doloop fixes

2013-11-02 Thread Richard Sandiford
Kenneth Zadeck  writes:
> On 11/02/2013 07:06 AM, Richard Sandiford wrote:
>> The first part of this is a simple type mismatch -- get_max_loop_iterations
>> returns a widest_int aka max_wide_int -- and I'd have installed it as
>> obvious.  The second part isn't as obvious though.  The old code stored
>> the maximum iterations as:
>>
>>if (!get_max_loop_iterations (loop, &iter)
>>|| !iter.fits_shwi ())
>>  iterations_max = const0_rtx;
>>else
>>  iterations_max = GEN_INT (iter.to_shwi ());
>>
>> and the new code uses:
>>
>>if (!get_max_loop_iterations (loop, &iter)
>>|| !wi::fits_shwi_p (iter))
>>  iterations_max = const0_rtx;
>>else
>>  iterations_max = immed_wide_int_const (iter, mode);
>>
>> which includes an extra canonicalisation.  I agree it would be good to do
>> that in principle, but I'm not sure it copes correctly with the case where
>> the loop iterates 1 << GET_MODE_PRECISION (mode) times.  Plus I think the
>> real fix would be to avoid the host dependence altogether, i.e. get rid
>> of the fits_shwi_p too.
>>
>> As it stands, this breaks bfin-elf's pattern, which has:
>>
>>/* Due to limitations in the hardware (an initial loop count of 0
>>   does not loop 2^32 times) we must avoid to generate a hardware
>>   loops when we cannot rule out this case.  */
>>if (!flag_unsafe_loop_optimizations
>>&& (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x)
>>  FAIL;
>>
>> With the sign-extending conversion, this now triggers more often than
>> it was supposed to.
>>
>> Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too,
>> and since we still use that form in the doloop_begin code, I think we should
>> just back the immed_wide_int_const out.
>>
>> Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some
>> unwanted testsuite changes in bfin-elf.  OK to install?
> I dislike this a lot. I think that this is a badly written pattern 
> in the bfin port if it depends on the host size.   As machines get 
> bigger, (they may not be getting faster but they are still getting 
> bigger) having traps like this at the portable level will bite us.

But this isn't at the portable level, it's backend-specific code.
It knows that it's dealing with a 32-bit counter.

> in truth this code should really be iterations_max = 
> immed_wide_int_const (iter, mode) with no tests at all.

But like I say, that won't cope with loops that iterate
1 << GET_MODE_PRECISION (mode) times, even if that fits
in a signed HWI.  (That case could happen if the hardware
supports it and if the counter starts out as 0.)

The mainline code is self-consistent in that the number of
iterations is passed as a positive signed HWI, using 0 if the
value isn't known or is greater than HOST_WIDE_INT_MAX.
In the current interface, the CONST_INT doesn't really have a mode.
It's just used as a convenient way of passing a HWI to an expander.

I'm not saying that it's the ideal interface.  But if we want to change it,
let's do that separately, and test it on targets that have doloops.
Until then, the wide-int branch can provide the current interface
just as easily as mainline.

Thanks,
Richard


Re: [wide-int] Do not treat rtxes as sign-extended

2013-11-02 Thread Kenneth Zadeck

On 11/02/2013 10:25 AM, Richard Sandiford wrote:

Kenneth Zadeck  writes:

On 11/02/2013 06:30 AM, Richard Sandiford wrote:

Bah.  After all that effort, it turns out that -- by design --
there is one special case where CONST_INTs are not sign-extended.
Nonzero/true BImode integers are stored as STORE_FLAG_VALUE,
which can be 1 rather than -1.  So (const_int 1) can be a valid
BImode integer -- and consequently (const_int -1) can be wrong --
even though BImode only has 1 bit.

It might be nice to change that, but for wide-int I think we should
just treat rtxes like trees for now.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some ICEs
seen on bfin-elf.  OK to install?

do we have to throw away the baby with the bath water here?  I guess
what you are saying is that it is worse to have is_sign_extended be a
variable that is almost always true than to be a hard false.

Right.  is_sign_extended is only useful if it's a compile-time value.
Making it a run-time value would negate the benefit.

I think in practice STORE_FLAG_VALUE is a compile-time constant too,
so we could set is_sign_extended to STORE_FLAG_VALUE == -1.  But AFAICT
that would only help SPU and m68k.


also we could preserve the test and make it not apply to bimode.

You mean the one in the assert?  Yeah, OK.  How about this version?

Richard


Index: gcc/rtl.h
===
--- gcc/rtl.h   2013-11-02 11:06:12.738517644 +
+++ gcc/rtl.h   2013-11-02 14:22:05.636007860 +
@@ -1408,7 +1408,9 @@ typedef std::pair ::decompose (
  case CONST_INT:
if (precision < HOST_BITS_PER_WIDE_INT)
gcc_checking_assert (INTVAL (x.first)
-== sext_hwi (INTVAL (x.first), precision));
+== sext_hwi (INTVAL (x.first), precision)
+|| (precision == 1 && INTVAL (x.first) == 1));
  
return wi::storage_ref (&INTVAL (x.first), 1, precision);


yes, this is fine.

kenny


Re: [wide-int] doloop fixes

2013-11-02 Thread Kenneth Zadeck

On 11/02/2013 10:47 AM, Richard Sandiford wrote:

Kenneth Zadeck  writes:

On 11/02/2013 07:06 AM, Richard Sandiford wrote:

The first part of this is a simple type mismatch -- get_max_loop_iterations
returns a widest_int aka max_wide_int -- and I'd have installed it as
obvious.  The second part isn't as obvious though.  The old code stored
the maximum iterations as:

if (!get_max_loop_iterations (loop, &iter)
|| !iter.fits_shwi ())
  iterations_max = const0_rtx;
else
  iterations_max = GEN_INT (iter.to_shwi ());

and the new code uses:

if (!get_max_loop_iterations (loop, &iter)
|| !wi::fits_shwi_p (iter))
  iterations_max = const0_rtx;
else
  iterations_max = immed_wide_int_const (iter, mode);

which includes an extra canonicalisation.  I agree it would be good to do
that in principle, but I'm not sure it copes correctly with the case where
the loop iterates 1 << GET_MODE_PRECISION (mode) times.  Plus I think the
real fix would be to avoid the host dependence altogether, i.e. get rid
of the fits_shwi_p too.

As it stands, this breaks bfin-elf's pattern, which has:

/* Due to limitations in the hardware (an initial loop count of 0
   does not loop 2^32 times) we must avoid to generate a hardware
   loops when we cannot rule out this case.  */
if (!flag_unsafe_loop_optimizations
&& (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x)
  FAIL;

With the sign-extending conversion, this now triggers more often than
it was supposed to.

Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too,
and since we still use that form in the doloop_begin code, I think we should
just back the immed_wide_int_const out.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some
unwanted testsuite changes in bfin-elf.  OK to install?

I dislike this a lot. I think that this is a badly written pattern
in the bfin port if it depends on the host size.   As machines get
bigger, (they may not be getting faster but they are still getting
bigger) having traps like this at the portable level will bite us.

But this isn't at the portable level, it's backend-specific code.
It knows that it's dealing with a 32-bit counter.

yes, but the patch is dumbing down the portable part of the compiler.



in truth this code should really be iterations_max =
immed_wide_int_const (iter, mode) with no tests at all.

But like I say, that won't cope with loops that iterate
1 << GET_MODE_PRECISION (mode) times, even if that fits
in a signed HWI.  (That case could happen if the hardware
supports it and if the counter starts out as 0.)

The mainline code is self-consistent in that the number of
iterations is passed as a positive signed HWI, using 0 if the
value isn't known or is greater than HOST_WIDE_INT_MAX.
In the current interface, the CONST_INT doesn't really have a mode.
It's just used as a convenient way of passing a HWI to an expander.

I'm not saying that it's the ideal interface.  But if we want to change it,
let's do that separately, and test it on targets that have doloops.
Until then, the wide-int branch can provide the current interface
just as easily as mainline.
we do not have a predicate at the rtl level like we do at the tree level 
where we can ask if the value can be represented in a mode without 
loosing any information.   But the proper thing to do here is to check 
to see if iter fits in the mode, if it does, then generate the 
immed_wide_int_const and if not generate the 0.   In this way it does 
the right thing for loops that iterate 1

Re: [PATCH, rs6000] Fix rs6000_expand_vector_set for little endian

2013-11-02 Thread David Edelsohn
On Thu, Oct 31, 2013 at 9:45 PM, Bill Schmidt
 wrote:
> Hi,
>
> Brooks Moses reported a bug with code that sets a single element of a
> vector to a given value and the rest of the vector to zero.  This is
> implemented in rs6000_expand_vector_set, which uses a vperm instruction
> to place the nonzero value.  As usual, we need to adjust the permute
> control vector and swap the order of the input operands.  I added a test
> case based on the bug report.
>
> Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
> regressions.  The new test now passes for both endiannesses.  Is this ok
> for trunk?
>
> Thanks,
> Bill
>
>
> gcc:
>
> 2013-10-31  Bill Schmidt  
>
> * config/rs6000/rs6000.c (rs6000_expand_vector_set): Adjust for
> little endian.

This is okay.

> +  if (!BYTES_BIG_ENDIAN)

But negating the expression and reversing the branches seems superfluous.

Thanks, David


Re: [wide-int] doloop fixes

2013-11-02 Thread Richard Sandiford
Kenneth Zadeck  writes:
> On 11/02/2013 10:47 AM, Richard Sandiford wrote:
>> Kenneth Zadeck  writes:
>>> On 11/02/2013 07:06 AM, Richard Sandiford wrote:
 The first part of this is a simple type mismatch -- get_max_loop_iterations
 returns a widest_int aka max_wide_int -- and I'd have installed it as
 obvious.  The second part isn't as obvious though.  The old code stored
 the maximum iterations as:

 if (!get_max_loop_iterations (loop, &iter)
 || !iter.fits_shwi ())
   iterations_max = const0_rtx;
 else
   iterations_max = GEN_INT (iter.to_shwi ());

 and the new code uses:

 if (!get_max_loop_iterations (loop, &iter)
 || !wi::fits_shwi_p (iter))
   iterations_max = const0_rtx;
 else
   iterations_max = immed_wide_int_const (iter, mode);

 which includes an extra canonicalisation.  I agree it would be good to do
 that in principle, but I'm not sure it copes correctly with the case where
 the loop iterates 1 << GET_MODE_PRECISION (mode) times.  Plus I think the
 real fix would be to avoid the host dependence altogether, i.e. get rid
 of the fits_shwi_p too.

 As it stands, this breaks bfin-elf's pattern, which has:

 /* Due to limitations in the hardware (an initial loop count of 0
does not loop 2^32 times) we must avoid to generate a hardware
loops when we cannot rule out this case.  */
 if (!flag_unsafe_loop_optimizations
 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x)
   FAIL;

 With the sign-extending conversion, this now triggers more often than
 it was supposed to.

 Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too,
 and since we still use that form in the doloop_begin code, I think we 
 should
 just back the immed_wide_int_const out.

 Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some
 unwanted testsuite changes in bfin-elf.  OK to install?
>>> I dislike this a lot. I think that this is a badly written pattern
>>> in the bfin port if it depends on the host size.   As machines get
>>> bigger, (they may not be getting faster but they are still getting
>>> bigger) having traps like this at the portable level will bite us.
>> But this isn't at the portable level, it's backend-specific code.
>> It knows that it's dealing with a 32-bit counter.
> yes, but the patch is dumbing down the portable part of the compiler.
>>
>>> in truth this code should really be iterations_max =
>>> immed_wide_int_const (iter, mode) with no tests at all.
>> But like I say, that won't cope with loops that iterate
>> 1 << GET_MODE_PRECISION (mode) times, even if that fits
>> in a signed HWI.  (That case could happen if the hardware
>> supports it and if the counter starts out as 0.)
>>
>> The mainline code is self-consistent in that the number of
>> iterations is passed as a positive signed HWI, using 0 if the
>> value isn't known or is greater than HOST_WIDE_INT_MAX.
>> In the current interface, the CONST_INT doesn't really have a mode.
>> It's just used as a convenient way of passing a HWI to an expander.
>>
>> I'm not saying that it's the ideal interface.  But if we want to change it,
>> let's do that separately, and test it on targets that have doloops.
>> Until then, the wide-int branch can provide the current interface
>> just as easily as mainline.
> we do not have a predicate at the rtl level like we do at the tree level 
> where we can ask if the value can be represented in a mode without 
> loosing any information.   But the proper thing to do here is to check 
> to see if iter fits in the mode, if it does, then generate the 
> immed_wide_int_const and if not generate the 0.   In this way it does 
> the right thing for loops that iterate 1< i.e. it will produce a 0.   And that will be correct even if the mode is 
> SI or TI.

Well, like I say, I don't see the current rtx has having a mode,
even conceptually.  The CONST_INT is just a convenient way of passing
an iteration count to a function that only accepts rtxes as arguments.
If we could, we'd be passing the widest_int directly as a pointer or
reference.

But since I'm just repeating what I said above, I should probably bow
out of this one.

Note that the current wide-int code doesn't allow any more cases than
the current mainline code, it just changes their values (and so breaks
the ports that relied on the old values).  So I think we both agree that
the current wide-int code needs to change in some way.  And I'd prefer
to change it to follow the existing interface, rather than (a) combining
a subtle interface change with a big patch and (b) potentially putting
the merge back unnecessarily.

If you and Mike want to change the interface on wide-int first,
and if the maintainers agree that's OK, then I won't object.
But you'll need to test it on port

Re: [gomp4] mangle linear step of 1 with just 'l'

2013-11-02 Thread Jakub Jelinek
On Sat, Nov 02, 2013 at 08:25:28AM -0500, Aldy Hernandez wrote:
> Your patch mangling negative linear steps caused a regression in
> simd-clones-2.c.  Well, it already had a failure, but now it has two
> :).
> 
> The problem is that AFAIU, a linear step of 1, is mangled with just
> 'l', not 'l1'.
> 
> I am not completely sure of this, and was hoping Balaji could clear
> this up, but on page 7 of the Intel vector ABI document, the example
> for:
> 
> __declspec(vector(uniform(a), aligned(a:32), linear(k:1)))
> extern float setArray(float *a, float x, int k)
> 
> ...is mangled as _ZGVxN4ua32vl_setArray, and in the subsequent
> explanatory paragraph, the document specifically says:
> 
>   “l” indicates linear(k:1) – k is a linear variable whose stride is 1.
> 
> However, since the spec itself says nothing about a default linear
> stride 1, I don't know whether this is an oversight in the BNF
> grammar or a typo in the example.  Balaji?

Ah, I was reading just the grammar and it didn't look like the number was
optional.

BTW, as I said on IRC yesterday, for say:
#pragma omp declare simd simdlen(8) notinbranch
int foo (int a, int b);
in the 'x' ISA (i.e. SSE2), we are supposed to pass it as
typedef int V __attribute__((vector_size (16)));
foo (V a.1, V a.2, V b.1, V b.2)
which probably isn't that hard for arguments, but are supposed to return
also the return value in two V registers (%xmm0/%xmm1).  We can't pass that
as vector(8) int, so perhaps pretend the return value is ARRAY_TYPE of
2 vector(4) ints and have some special code in the backend to pass that
return value in two xmm registers.  Similarly for 4 and 8.

Jakub


Re: [Patch, fortran] PR57445 - [4.8/4.9 Regression] ICE in gfc_conv_class_to_class - for OPTIONAL polymorphic array

2013-11-02 Thread Steve Kargl
On Fri, Nov 01, 2013 at 04:56:36PM +0100, Paul Richard Thomas wrote:
> 
> This one is trivial.  The ICE was caused by an assert that turns out
> to be spurious and has been removed.
> 
> Bootstrapped and regtested on FC17/x86_64 - OK for trunk and 4.8?
> 

OK.

-- 
Steve


Re: libsanitizer merge from upstream r191666

2013-11-02 Thread Jack Howarth
On Fri, Nov 01, 2013 at 04:13:05PM -0700, Konstantin Serebryany wrote:
> Jukub,
> 
> This works!
> New patch attached.

Konstantin,
   This patch, when applied on top of r204305, bootstraps fine on 
x86_64-apple-darwin12 but
produces a lot of regressions with...

make -k check RUNTESTFLAGS="asan.exp --target_board=unix'{-m64}'"

Native configuration is x86_64-apple-darwin12.5.0

=== g++ tests ===


Running target unix/-m64
FAIL: c-c++-common/asan/global-overflow-1.c  -O0  execution test
FAIL: c-c++-common/asan/global-overflow-1.c  -O1  execution test
FAIL: c-c++-common/asan/global-overflow-1.c  -O2  execution test
FAIL: c-c++-common/asan/global-overflow-1.c  -O3 -fomit-frame-pointer  
execution test
FAIL: c-c++-common/asan/global-overflow-1.c  -O3 -g  execution test
FAIL: c-c++-common/asan/global-overflow-1.c  -Os  execution test
FAIL: c-c++-common/asan/global-overflow-1.c  -O2 -flto -flto-partition=none  
execution test
FAIL: c-c++-common/asan/global-overflow-1.c  -O2 -flto  execution test
FAIL: c-c++-common/asan/heap-overflow-1.c  -O0  execution test
FAIL: c-c++-common/asan/heap-overflow-1.c  -O1  execution test
FAIL: c-c++-common/asan/heap-overflow-1.c  -O2  execution test
FAIL: c-c++-common/asan/heap-overflow-1.c  -O3 -fomit-frame-pointer  execution 
test
FAIL: c-c++-common/asan/heap-overflow-1.c  -O3 -g  execution test
FAIL: c-c++-common/asan/heap-overflow-1.c  -Os  execution test
FAIL: c-c++-common/asan/heap-overflow-1.c  -O2 -flto -flto-partition=none  
execution test
FAIL: c-c++-common/asan/heap-overflow-1.c  -O2 -flto  execution test
FAIL: c-c++-common/asan/memcmp-1.c  -O0  execution test
FAIL: c-c++-common/asan/memcmp-1.c  -O1  execution test
FAIL: c-c++-common/asan/memcmp-1.c  -O2  execution test
FAIL: c-c++-common/asan/memcmp-1.c  -O3 -fomit-frame-pointer  execution test
FAIL: c-c++-common/asan/memcmp-1.c  -O3 -g  execution test
FAIL: c-c++-common/asan/memcmp-1.c  -Os  execution test
FAIL: c-c++-common/asan/memcmp-1.c  -O2 -flto -flto-partition=none  execution 
test
FAIL: c-c++-common/asan/memcmp-1.c  -O2 -flto  execution test
FAIL: c-c++-common/asan/null-deref-1.c  -O0  execution test
FAIL: c-c++-common/asan/null-deref-1.c  -O1  execution test
FAIL: c-c++-common/asan/null-deref-1.c  -O2  execution test
FAIL: c-c++-common/asan/null-deref-1.c  -O3 -fomit-frame-pointer  execution test
FAIL: c-c++-common/asan/null-deref-1.c  -O3 -g  execution test
FAIL: c-c++-common/asan/null-deref-1.c  -Os  execution test
FAIL: c-c++-common/asan/null-deref-1.c  -O2 -flto -flto-partition=none  
execution test
FAIL: c-c++-common/asan/null-deref-1.c  -O2 -flto  execution test
FAIL: c-c++-common/asan/sanity-check-pure-c-1.c  -O0  execution test
FAIL: c-c++-common/asan/sanity-check-pure-c-1.c  -O1  execution test
FAIL: c-c++-common/asan/sanity-check-pure-c-1.c  -O2  execution test
FAIL: c-c++-common/asan/sanity-check-pure-c-1.c  -O3 -fomit-frame-pointer  
execution test
FAIL: c-c++-common/asan/sanity-check-pure-c-1.c  -O3 -g  execution test
FAIL: c-c++-common/asan/sanity-check-pure-c-1.c  -Os  execution test
FAIL: c-c++-common/asan/sanity-check-pure-c-1.c  -O2 -flto -flto-partition=none 
 execution test
FAIL: c-c++-common/asan/sanity-check-pure-c-1.c  -O2 -flto  execution test
FAIL: c-c++-common/asan/sleep-before-dying-1.c  -O2  execution test
FAIL: c-c++-common/asan/sleep-before-dying-1.c  -O2 -flto -flto-partition=none  
execution test
FAIL: c-c++-common/asan/sleep-before-dying-1.c  -O2 -flto  execution test
FAIL: c-c++-common/asan/stack-overflow-1.c  -O0  execution test
FAIL: c-c++-common/asan/stack-overflow-1.c  -O1  execution test
FAIL: c-c++-common/asan/stack-overflow-1.c  -O2  execution test
FAIL: c-c++-common/asan/stack-overflow-1.c  -O3 -fomit-frame-pointer  execution 
test
FAIL: c-c++-common/asan/stack-overflow-1.c  -O3 -g  execution test
FAIL: c-c++-common/asan/stack-overflow-1.c  -Os  execution test
FAIL: c-c++-common/asan/stack-overflow-1.c  -O2 -flto -flto-partition=none  
execution test
FAIL: c-c++-common/asan/stack-overflow-1.c  -O2 -flto  execution test
FAIL: c-c++-common/asan/strip-path-prefix-1.c  -O2  execution test
FAIL: c-c++-common/asan/strip-path-prefix-1.c  -O2 -flto -flto-partition=none  
execution test
FAIL: c-c++-common/asan/strip-path-prefix-1.c  -O2 -flto  execution test
FAIL: c-c++-common/asan/strncpy-overflow-1.c  -O0  execution test
FAIL: c-c++-common/asan/strncpy-overflow-1.c  -O1  execution test
FAIL: c-c++-common/asan/strncpy-overflow-1.c  -O2  execution test
FAIL: c-c++-common/asan/strncpy-overflow-1.c  -O3 -fomit-frame-pointer  
execution test
FAIL: c-c++-common/asan/strncpy-overflow-1.c  -O3 -g  execution test
FAIL: c-c++-common/asan/strncpy-overflow-1.c  -Os  execution test
FAIL: c-c++-common/asan/strncpy-overflow-1.c  -O2 -flto -flto-partition=none  
execution test
FAIL: c-c++-common/asan/strncpy-overflow-1.c  -O2 -flto  execution test
FAIL: c-c++-common/asan/use-after-free-1.c  -O0  execution test
FAIL: c-c++-common/asan/use-after-free-1.c  -O1  execution test

Re: [wide-int] doloop fixes

2013-11-02 Thread Kenneth Zadeck

On 11/02/2013 11:31 AM, Richard Sandiford wrote:

Kenneth Zadeck  writes:

On 11/02/2013 10:47 AM, Richard Sandiford wrote:

Kenneth Zadeck  writes:

On 11/02/2013 07:06 AM, Richard Sandiford wrote:

The first part of this is a simple type mismatch -- get_max_loop_iterations
returns a widest_int aka max_wide_int -- and I'd have installed it as
obvious.  The second part isn't as obvious though.  The old code stored
the maximum iterations as:

 if (!get_max_loop_iterations (loop, &iter)
 || !iter.fits_shwi ())
   iterations_max = const0_rtx;
 else
   iterations_max = GEN_INT (iter.to_shwi ());

and the new code uses:

 if (!get_max_loop_iterations (loop, &iter)
 || !wi::fits_shwi_p (iter))
   iterations_max = const0_rtx;
 else
   iterations_max = immed_wide_int_const (iter, mode);

which includes an extra canonicalisation.  I agree it would be good to do
that in principle, but I'm not sure it copes correctly with the case where
the loop iterates 1 << GET_MODE_PRECISION (mode) times.  Plus I think the
real fix would be to avoid the host dependence altogether, i.e. get rid
of the fits_shwi_p too.

As it stands, this breaks bfin-elf's pattern, which has:

 /* Due to limitations in the hardware (an initial loop count of 0
does not loop 2^32 times) we must avoid to generate a hardware
loops when we cannot rule out this case.  */
 if (!flag_unsafe_loop_optimizations
 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x)
   FAIL;

With the sign-extending conversion, this now triggers more often than
it was supposed to.

Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too,
and since we still use that form in the doloop_begin code, I think we should
just back the immed_wide_int_const out.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some
unwanted testsuite changes in bfin-elf.  OK to install?

I dislike this a lot. I think that this is a badly written pattern
in the bfin port if it depends on the host size.   As machines get
bigger, (they may not be getting faster but they are still getting
bigger) having traps like this at the portable level will bite us.

But this isn't at the portable level, it's backend-specific code.
It knows that it's dealing with a 32-bit counter.

yes, but the patch is dumbing down the portable part of the compiler.

in truth this code should really be iterations_max =
immed_wide_int_const (iter, mode) with no tests at all.

But like I say, that won't cope with loops that iterate
1 << GET_MODE_PRECISION (mode) times, even if that fits
in a signed HWI.  (That case could happen if the hardware
supports it and if the counter starts out as 0.)

The mainline code is self-consistent in that the number of
iterations is passed as a positive signed HWI, using 0 if the
value isn't known or is greater than HOST_WIDE_INT_MAX.
In the current interface, the CONST_INT doesn't really have a mode.
It's just used as a convenient way of passing a HWI to an expander.

I'm not saying that it's the ideal interface.  But if we want to change it,
let's do that separately, and test it on targets that have doloops.
Until then, the wide-int branch can provide the current interface
just as easily as mainline.

we do not have a predicate at the rtl level like we do at the tree level
where we can ask if the value can be represented in a mode without
loosing any information.   But the proper thing to do here is to check
to see if iter fits in the mode, if it does, then generate the
immed_wide_int_const and if not generate the 0.   In this way it does
the right thing for loops that iterate 1<
Well, like I say, I don't see the current rtx has having a mode,
even conceptually.  The CONST_INT is just a convenient way of passing
an iteration count to a function that only accepts rtxes as arguments.
If we could, we'd be passing the widest_int directly as a pointer or
reference.

But since I'm just repeating what I said above, I should probably bow
out of this one.
you point seems to be that this is a place where the "assumption" that 
rtxes are signed bites us.
if mode happens to be SImode but the variable was unsigned and large, 
then we will sign extend this and get a pessimistic answer for this 
pattern.on the other hand, if we do what you suggest, then we are 
forever limited by the host precision.


I see your point, but i do not know what to do.

kenny

Note that the current wide-int code doesn't allow any more cases than
the current mainline code, it just changes their values (and so breaks
the ports that relied on the old values).  So I think we both agree that
the current wide-int code needs to change in some way.  And I'd prefer
to change it to follow the existing interface, rather than (a) combining
a subtle interface change with a big patch and (b) potentially putting
the merge back unnecessarily.

If you and Mike want to change the interface on wide-int first,
and if the main

Re: [wide-int] doloop fixes

2013-11-02 Thread Richard Sandiford
Kenneth Zadeck  writes:
> On 11/02/2013 11:31 AM, Richard Sandiford wrote:
>> Kenneth Zadeck  writes:
>>> On 11/02/2013 10:47 AM, Richard Sandiford wrote:
 Kenneth Zadeck  writes:
> On 11/02/2013 07:06 AM, Richard Sandiford wrote:
>> The first part of this is a simple type mismatch --
>> get_max_loop_iterations
>> returns a widest_int aka max_wide_int -- and I'd have installed it as
>> obvious.  The second part isn't as obvious though.  The old code stored
>> the maximum iterations as:
>>
>>  if (!get_max_loop_iterations (loop, &iter)
>>  || !iter.fits_shwi ())
>>iterations_max = const0_rtx;
>>  else
>>iterations_max = GEN_INT (iter.to_shwi ());
>>
>> and the new code uses:
>>
>>  if (!get_max_loop_iterations (loop, &iter)
>>  || !wi::fits_shwi_p (iter))
>>iterations_max = const0_rtx;
>>  else
>>iterations_max = immed_wide_int_const (iter, mode);
>>
>> which includes an extra canonicalisation.  I agree it would be good to do
>> that in principle, but I'm not sure it copes correctly with the case 
>> where
>> the loop iterates 1 << GET_MODE_PRECISION (mode) times.  Plus I think the
>> real fix would be to avoid the host dependence altogether, i.e. get rid
>> of the fits_shwi_p too.
>>
>> As it stands, this breaks bfin-elf's pattern, which has:
>>
>>  /* Due to limitations in the hardware (an initial loop count of 0
>> does not loop 2^32 times) we must avoid to generate a hardware
>> loops when we cannot rule out this case.  */
>>  if (!flag_unsafe_loop_optimizations
>>  && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x)
>>FAIL;
>>
>> With the sign-extending conversion, this now triggers more often than
>> it was supposed to.
>>
>> Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int 
>> too,
>> and since we still use that form in the doloop_begin code, I think we 
>> should
>> just back the immed_wide_int_const out.
>>
>> Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some
>> unwanted testsuite changes in bfin-elf.  OK to install?
> I dislike this a lot. I think that this is a badly written pattern
> in the bfin port if it depends on the host size.   As machines get
> bigger, (they may not be getting faster but they are still getting
> bigger) having traps like this at the portable level will bite us.
 But this isn't at the portable level, it's backend-specific code.
 It knows that it's dealing with a 32-bit counter.
>>> yes, but the patch is dumbing down the portable part of the compiler.
> in truth this code should really be iterations_max =
> immed_wide_int_const (iter, mode) with no tests at all.
 But like I say, that won't cope with loops that iterate
 1 << GET_MODE_PRECISION (mode) times, even if that fits
 in a signed HWI.  (That case could happen if the hardware
 supports it and if the counter starts out as 0.)

 The mainline code is self-consistent in that the number of
 iterations is passed as a positive signed HWI, using 0 if the
 value isn't known or is greater than HOST_WIDE_INT_MAX.
 In the current interface, the CONST_INT doesn't really have a mode.
 It's just used as a convenient way of passing a HWI to an expander.

 I'm not saying that it's the ideal interface.  But if we want to change it,
 let's do that separately, and test it on targets that have doloops.
 Until then, the wide-int branch can provide the current interface
 just as easily as mainline.
>>> we do not have a predicate at the rtl level like we do at the tree level
>>> where we can ask if the value can be represented in a mode without
>>> loosing any information.   But the proper thing to do here is to check
>>> to see if iter fits in the mode, if it does, then generate the
>>> immed_wide_int_const and if not generate the 0.   In this way it does
>>> the right thing for loops that iterate 1<>> i.e. it will produce a 0.   And that will be correct even if the mode is
>>> SI or TI.
>> Well, like I say, I don't see the current rtx has having a mode,
>> even conceptually.  The CONST_INT is just a convenient way of passing
>> an iteration count to a function that only accepts rtxes as arguments.
>> If we could, we'd be passing the widest_int directly as a pointer or
>> reference.
>>
>> But since I'm just repeating what I said above, I should probably bow
>> out of this one.
> you point seems to be that this is a place where the "assumption" that 
> rtxes are signed bites us.
> if mode happens to be SImode but the variable was unsigned and large, 
> then we will sign extend this and get a pessimistic answer for this 
> pattern.

Right.

> on the other hand, if we do what you suggest, then we are 
> fo

Re: [Patch] Implementation of n3793

2013-11-02 Thread Luc Danton

On 2013-11-01 21:46, Paolo Carlini wrote:

Hi,


Il giorno 01/nov/2013, alle ore 21:09, Jonathan Wakely  
ha scritto:

Here's the final version of Luc's optional implementation that I'm
committing, tested on x86_64-linux.

Great. Just noticed a minor nit: the fallback __constexpr_addressof appears not 
to be inline.

Paolo


Can you expand? I think it's just as much inline as the other overload 
-- does it need to be different?


Re: [Patch] Implementation of n3793

2013-11-02 Thread Paolo Carlini


>Can you expand? I think it's just as much inline as the other overload
>-- does it need to be different?

The other overload is constexpr thus it's implicitly inline. The fall back is 
very simple too and I think it should be declared inline, unless you analyzed 
the assembly and believe it normally boils down to more than, say, 5 
instructions.

Paolo



Re: [wide-int] doloop fixes

2013-11-02 Thread Kenneth Zadeck

On 11/02/2013 01:51 PM, Richard Sandiford wrote:

Kenneth Zadeck  writes:

On 11/02/2013 11:31 AM, Richard Sandiford wrote:

Kenneth Zadeck  writes:

On 11/02/2013 10:47 AM, Richard Sandiford wrote:

Kenneth Zadeck  writes:

On 11/02/2013 07:06 AM, Richard Sandiford wrote:

The first part of this is a simple type mismatch --
get_max_loop_iterations
returns a widest_int aka max_wide_int -- and I'd have installed it as
obvious.  The second part isn't as obvious though.  The old code stored
the maximum iterations as:

  if (!get_max_loop_iterations (loop, &iter)
  || !iter.fits_shwi ())
iterations_max = const0_rtx;
  else
iterations_max = GEN_INT (iter.to_shwi ());

and the new code uses:

  if (!get_max_loop_iterations (loop, &iter)
  || !wi::fits_shwi_p (iter))
iterations_max = const0_rtx;
  else
iterations_max = immed_wide_int_const (iter, mode);

which includes an extra canonicalisation.  I agree it would be good to do
that in principle, but I'm not sure it copes correctly with the case where
the loop iterates 1 << GET_MODE_PRECISION (mode) times.  Plus I think the
real fix would be to avoid the host dependence altogether, i.e. get rid
of the fits_shwi_p too.

As it stands, this breaks bfin-elf's pattern, which has:

  /* Due to limitations in the hardware (an initial loop count of 0
 does not loop 2^32 times) we must avoid to generate a hardware
 loops when we cannot rule out this case.  */
  if (!flag_unsafe_loop_optimizations
  && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0x)
FAIL;

With the sign-extending conversion, this now triggers more often than
it was supposed to.

Since the old "GEN_INT (iter.to_shwi ());" works verbatim in wide-int too,
and since we still use that form in the doloop_begin code, I think we should
just back the immed_wide_int_const out.

Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  It fixes some
unwanted testsuite changes in bfin-elf.  OK to install?

I dislike this a lot. I think that this is a badly written pattern
in the bfin port if it depends on the host size.   As machines get
bigger, (they may not be getting faster but they are still getting
bigger) having traps like this at the portable level will bite us.

But this isn't at the portable level, it's backend-specific code.
It knows that it's dealing with a 32-bit counter.

yes, but the patch is dumbing down the portable part of the compiler.

in truth this code should really be iterations_max =
immed_wide_int_const (iter, mode) with no tests at all.

But like I say, that won't cope with loops that iterate
1 << GET_MODE_PRECISION (mode) times, even if that fits
in a signed HWI.  (That case could happen if the hardware
supports it and if the counter starts out as 0.)

The mainline code is self-consistent in that the number of
iterations is passed as a positive signed HWI, using 0 if the
value isn't known or is greater than HOST_WIDE_INT_MAX.
In the current interface, the CONST_INT doesn't really have a mode.
It's just used as a convenient way of passing a HWI to an expander.

I'm not saying that it's the ideal interface.  But if we want to change it,
let's do that separately, and test it on targets that have doloops.
Until then, the wide-int branch can provide the current interface
just as easily as mainline.

we do not have a predicate at the rtl level like we do at the tree level
where we can ask if the value can be represented in a mode without
loosing any information.   But the proper thing to do here is to check
to see if iter fits in the mode, if it does, then generate the
immed_wide_int_const and if not generate the 0.   In this way it does
the right thing for loops that iterate 1<
Well, like I say, I don't see the current rtx has having a mode,
even conceptually.  The CONST_INT is just a convenient way of passing
an iteration count to a function that only accepts rtxes as arguments.
If we could, we'd be passing the widest_int directly as a pointer or
reference.

But since I'm just repeating what I said above, I should probably bow
out of this one.

you point seems to be that this is a place where the "assumption" that
rtxes are signed bites us.
if mode happens to be SImode but the variable was unsigned and large,
then we will sign extend this and get a pessimistic answer for this
pattern.

Right.


on the other hand, if we do what you suggest, then we are
forever limited by the host precision.

I see your point, but i do not know what to do.

Note that what I'm suggesting is simply restoring the mainline "else" code,
rather than something new.  wide-int doesn't change which of the if-else
arms are chosen, and if we keep the else arm as-is, existing code will
be happy.

"Forever" is a bit pessimistic (though probably true).  We're only
limited until somehow who cares about wide counters comes along and
changes things.  Or if you feel particularly motivated, you could do
it after the merg

Re: [Patch] Implementation of n3793

2013-11-02 Thread Luc Danton

On 2013-11-02 19:02, Paolo Carlini wrote:



Can you expand? I think it's just as much inline as the other overload
-- does it need to be different?

The other overload is constexpr thus it's implicitly inline. The fall back is 
very simple too and I think it should be declared inline, unless you analyzed 
the assembly and believe it normally boils down to more than, say, 5 
instructions.

Paolo



I see. It didn't occur to me to declare it inline, as I only ever use 
the keyword to satisfy

ODR requirements. E.g. the non-member swap isn't declared inline either.

For future reference, is there a rule of thumb in use?


Re: [Patch] Implementation of n3793

2013-11-02 Thread Paolo Carlini


Hi

>I see. It didn't occur to me to declare it inline, as I only ever use
>the keyword to satisfy
>ODR requirements. E.g. the non-member swap isn't declared inline
>either.
>
>For future reference, is there a rule of thumb in use?

In general we are very careful with code bloat, but free functions which just 
forward to other functions should be definiyely inline, otherwise typically at 
widely used optimization levels like -O2 users get suboptimal performance for 
no reason. But please feel free to experiment and report your findings on the 
mailing list!

Paolo





LRA vs reload on powerpc: 2 extra FAILs that are actually improvements?

2013-11-02 Thread Steven Bosscher
Hello,

Today's powerpc64-linux gcc has 2 extra failures with -mlra vs. reload
(i.e. svn unpatched).

(I'm excluding guality failure differences here because there are too
many of them that seem to fail at random after minimal changes
anywhere in the compiler...).

Test results are posted here:
reload: http://gcc.gnu.org/ml/gcc-testresults/2013-11/msg00128.html
lra: http://gcc.gnu.org/ml/gcc-testresults/2013-11/msg00129.html

The new failures and total score is as follows (+=lra, -=reload):
+FAIL: gcc.target/powerpc/pr53199.c scan-assembler-times stwbrx 6
+FAIL: gcc.target/powerpc/pr58330.c scan-assembler-not stwbrx

=== gcc Summary ===

-# of expected passes   97887
-# of unexpected failures   536
+# of expected passes   97903
+# of unexpected failures   538
 # of unexpected successes  38
 # of expected failures 244
-# of unsupported tests 1910
+# of unsupported tests 1892


The failure of pr53199.c is because of different instruction selection
for bswap. Test case is reduced to just one function:

/* { dg-options "-O2 -mcpu=power6 -mavoid-indexed-addresses" } */
long long
reg_reverse (long long x)
{
  return __builtin_bswap64 (x);
}

Reload left vs. LRA right:
reg_reverse:   reg_reverse:
srdi 8,3,32  | addi 8,1,-16
rlwinm 7,3,8,0x  | srdi 10,3,32
rlwinm 9,8,8,0x  | addi 9,8,4
rlwimi 7,3,24,0,7| stwbrx 3,0,8
rlwimi 7,3,24,16,23  | stwbrx 10,0,9
rlwimi 9,8,24,0,7| ld 3,-16(1)
rlwimi 9,8,24,16,23  <
sldi 7,7,32  <
or 7,7,9 <
mr 3,7   <
blrblr

This same difference is responsible for the failure of pr58330.c which
also uses __builtin_bswap64().

The difference in RTL for the test case is this (after reload vs. after LRA):
-   11: {%7:DI=bswap(%3:DI);clobber %8:DI;clobber %9:DI;clobber %10:DI;}
-   20: %3:DI=%7:DI
+   20: %8:DI=%1:DI-0x10
+   21: %8:DI=%8:DI  // stupid no-op move
+   11: {[%8:DI]=bswap(%3:DI);clobber %9:DI;clobber %10:DI;clobber scratch;}
+   19: %3:DI=[%1:DI-0x10]

So LRA believes going through memory is better than using a register,
even though obviously there are plenty registers available.

What LRA does:
  Creating newreg=129
Removing SCRATCH in insn #11 (nop 2)
  Creating newreg=130
Removing SCRATCH in insn #11 (nop 3)
  Creating newreg=131
Removing SCRATCH in insn #11 (nop 4)
// at this point the insn would be a bswapdi2_64bit:
//   11: {%3:DI=bswap(%3:DI);clobber r129;clobber r130;clobber r131;}
// cost calculation for the insn alternatives:
0 Early clobber: reject++
1 Non-pseudo reload: reject+=2
1 Spill pseudo in memory: reject+=3
2 Scratch win: reject+=2
3 Scratch win: reject+=2
4 Scratch win: reject+=2
  alt=0,overall=18,losers=1,rld_nregs=0
0 Non-pseudo reload: reject+=2
0 Spill pseudo in memory: reject+=3
0 Non input pseudo reload: reject++
2 Scratch win: reject+=2
3 Scratch win: reject+=2
  alt=1,overall=16,losers=1,rld_nregs=0
Staticly defined alt reject+=12
0 Early clobber: reject++
2 Scratch win: reject+=2
3 Scratch win: reject+=2
4 Scratch win: reject+=2
0 Conflict early clobber reload: reject--
  alt=2,overall=24,losers=1,rld_nregs=0
 Choosing alt 1 in insn 11:  (0) Z  (1) r  (2) &b  (3) &r  (4)
X {*bswapdi2_64bit}
  Change to class BASE_REGS for r129
  Change to class GENERAL_REGS for r130
  Creating newreg=132 from oldreg=3, assigning class NO_REGS to r132
  Change to class NO_REGS for r131
   11: {r132:DI=bswap(%3:DI);clobber r129:DI;clobber r130:DI;clobber r131:DI;}
  REG_UNUSED r131:DI
  REG_UNUSED r130:DI
  REG_UNUSED r129:DI

LRA selects alternative 1 (Z,r,&b,&r,X) which seems to be the right
choice, from looking at the constraints. Reload selects alternative 2
which is slightly^2 discouraged: (??&r,r,&r,&r,&r).

Is this an improvement or a regression? If it's an improvement then
these two test cases should be adjusted :-)

Ciao!
Steven


Re: lto-plugin: mismatch between ld's architecture and GCC's configure --host

2013-11-02 Thread Thomas Schwinge
Hi!

Ping.


On Fri, 25 Oct 2013 09:23:24 +0200, I wrote:
> Ping.  To sum it up, with these patches applied, there are no changes for
> a "regular" build (not using the new configure options).  On the other
> hand, configuring GCC as described, it is possible use the 32-bit x86
> linker for/with a x86_64 build, and get the very same GCC test results as
> when using a x86_64 linker.
> 
> 
> On Mon, 14 Oct 2013 12:28:11 +0200, I wrote:
> > On Sat, 12 Oct 2013 12:20:19 +0200, I wrote:
> > > This is a bit of a weird scenario -- but it is supposed to work fine in
> > > my opinion (but doesn't).
> > > 
> > > I have a GNU toolchain as 32-bit x86 GNU/Linux executables, configured to
> > > to generate code for 32-bit x86 by default, and using -m64 for x86_64.
> > > 
> > > This toolchain I'm using on a x86_64 system (which can execute 32-bit
> > > executables) to build a *native* GCC, that is I'm using the 32-bit
> > > toolchain to build a x86_64 GCC (configuring with CC='gcc -m64' CXX='g++
> > > -m64').  I intend to continue using the 32-bit toolchain's linker, which
> > > also is a 32-bit executable (GNU ld).  That one also defaults to x86
> > > code, but can handle the x86_64 case fine if passed -m elf_x86_64, which
> > > GCC does.
> > > 
> > > That the linker is a 32-bit executable is an implementation detail that
> > > is not important generally: it's a separate process living in its own
> > > address space.  However it becomes relevant in the case of linker
> > > plugins: the native x86_64 GCC that I'm building also builds a x86_64
> > > lto-plugin, which the 32-bit ld cannot load:
> > > 
> > > $ gcc/xgcc -B[...] [...]/gcc.c-torture/execute/ieee/2320-1.c 
> > > [...] -flto [...]
> > > [...]/ld: [...]/gcc/liblto_plugin.so: error loading plugin: 
> > > [...]/gcc/liblto_plugin.so: wrong ELF class: ELFCLASS64
> > > collect2: error: ld returned 1 exit status
> > > 
> > > So, aside from building a 64-bit ld (which is the "lame" alternative), I
> > > now need to teach GCC's build system that the lto-plugin may need special
> > > configuration: CC='gcc -m32' -- and [...] its own build of libiberty,
> > > too [...]
> > 
> > > Instead of auto-detecting the linker's
> > > architecture (and then, what to do with that information?), I intend to
> > > make this a manual process (so, some new top-level configure
> > > argument(s)).  Adding yet another set of {...,CC,...}_FOR_[something] is
> > > probably overkill -- I'll try to find something simpler.
> > > 
> > > Any comments on this scenario?
> > 
> > Here are the patches.  Unless the new option is exercised, there are no
> > effects on a native x86_64 GNU/Linux bootstrap build (the build trees'
> > *.o files are identical, as are the test results).  OK to commit?
> 
> > Allow overriding the libiberty used for building the LTO plugin.
> > 
> > lto-plugin/
> > * configure.ac (--with-libiberty): New configure option.
> > * configure: Regenerate.
> > * Makefile.am (libiberty, libiberty_pic): New variables.
> > (liblto_plugin_la_LIBADD, liblto_plugin_la_LDFLAGS)
> > (liblto_plugin_la_DEPENDENCIES): Use them.
> > * Makefile.in: Regenerate.
> > ---
> >  lto-plugin/Makefile.am  | 20 +++-
> >  lto-plugin/Makefile.in  | 22 --
> >  lto-plugin/configure| 17 +++--
> >  lto-plugin/configure.ac |  5 +
> >  4 files changed, 43 insertions(+), 21 deletions(-)
> > 
> > diff --git lto-plugin/Makefile.am lto-plugin/Makefile.am
> > index b24015e..8b7bb54 100644
> > --- lto-plugin/Makefile.am
> > +++ lto-plugin/Makefile.am
> > @@ -15,17 +15,19 @@ libexecsub_LTLIBRARIES = liblto_plugin.la
> >  gcc_build_dir = ../$(host_subdir)/gcc
> >  in_gcc_libs = $(foreach lib, $(libexecsub_LTLIBRARIES), 
> > $(gcc_build_dir)/$(lib))
> >  
> > -# Can be removed when libiberty becomes a normal convenience library
> > -Wc=-Wc,
> > -
> >  liblto_plugin_la_SOURCES = lto-plugin.c
> > +# Note that we intentionally override the bindir supplied by 
> > ACX_LT_HOST_FLAGS.
> > +liblto_plugin_la_LDFLAGS = $(lt_host_flags) -module -bindir 
> > $(libexecsubdir)
> > +# Can be simplified when libiberty becomes a normal convenience library.
> > +libiberty=$(with_libiberty)/libiberty.a
> > +libiberty_pic=$(with_libiberty)/pic/libiberty.a
> > +Wc=-Wc,
> >  liblto_plugin_la_LIBADD = \
> > -   $(if $(wildcard 
> > ../libiberty/pic/libiberty.a),$(Wc)../libiberty/pic/libiberty.a,)
> > -# Note that we intentionally override the bindir supplied by 
> > ACX_LT_HOST_FLAGS
> > -liblto_plugin_la_LDFLAGS = $(lt_host_flags) -module -bindir 
> > $(libexecsubdir) \
> > -   $(if $(wildcard 
> > ../libiberty/pic/libiberty.a),,-Wc,../libiberty/libiberty.a)
> > -liblto_plugin_la_DEPENDENCIES = $(if $(wildcard \
> > -   ../libiberty/pic/libiberty.a),../libiberty/pic/libiberty.a,)
> > +   $(if $(wildcard $(libiberty_pic)),$(Wc)$(libiberty_pic),)
> > +liblto_plugin_la_LDFLAGS += \
> > +   $(if $(wildcard $(libiberty_pic)),,-Wc,$(libiberty))
> > +lib

[PATCH, rs6000] Re-permute source register for postreload splits of VSX LE stores

2013-11-02 Thread Bill Schmidt
Hi,

When I created the patch to split VSX loads and stores to add permutes
so the register image was correctly little endian, I forgot to implement
a known requirement.  When a VSX store is split after reload, we must
reuse the source register for the permute, which leaves it in a swapped
state after the store.  If the register remains live, this is invalid.
After the store, we need to permute the register back to its original
value.

For each of the little endian VSX store patterns, I've replaced the
define_insn_and_split with a define_insn and two define_splits, one for
prior to reload and one for after reload.  The post-reload split has the
extra behavior.

I don't know of a way to set the insn's length attribute conditionally,
so I'm optimistically setting this to 8, though it will be 12 in the
post-reload case.  Is there any concern with that?  Is there a way to
make it fully accurate?

Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
regressions.  This fixes two failing test cases.  Is this ok for trunk?

Thanks!
Bill


2013-11-02  Bill Schmidt  

* config/rs6000/vsx.md (*vsx_le_perm_store_ for VSX_D):
Replace the define_insn_and_split with a define_insn and two
define_splits, with the split after reload re-permuting the source
register to its original value.
(*vsx_le_perm_store_ for VSX_W): Likewise.
(*vsx_le_perm_store_v8hi): Likewise.
(*vsx_le_perm_store_v16qi): Likewise.


Index: gcc/config/rs6000/vsx.md
===
--- gcc/config/rs6000/vsx.md(revision 204192)
+++ gcc/config/rs6000/vsx.md(working copy)
@@ -333,12 +333,18 @@
   [(set_attr "type" "vecload")
(set_attr "length" "8")])
 
-(define_insn_and_split "*vsx_le_perm_store_"
+(define_insn "*vsx_le_perm_store_"
   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
 (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:VSX_D 0 "memory_operand" "")
+(match_operand:VSX_D 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
   [(set (match_dup 2)
 (vec_select:
   (match_dup 1)
@@ -347,21 +353,43 @@
 (vec_select:
   (match_dup 2)
   (parallel [(const_int 1) (const_int 0)])))]
-  "
 {
   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
: operands[1];
-}
-  "
-  [(set_attr "type" "vecstore")
-   (set_attr "length" "8")])
+})
 
-(define_insn_and_split "*vsx_le_perm_store_"
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:VSX_D 0 "memory_operand" "")
+(match_operand:VSX_D 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+(vec_select:
+  (match_dup 1)
+  (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 0)
+(vec_select:
+  (match_dup 1)
+  (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 1)
+(vec_select:
+  (match_dup 1)
+  (parallel [(const_int 1) (const_int 0)])))]
+  "")
+
+(define_insn "*vsx_le_perm_store_"
   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
 (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:VSX_W 0 "memory_operand" "")
+(match_operand:VSX_W 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
   [(set (match_dup 2)
 (vec_select:
   (match_dup 1)
@@ -372,21 +400,46 @@
   (match_dup 2)
   (parallel [(const_int 2) (const_int 3)
 (const_int 0) (const_int 1)])))]
-  "
 {
   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
: operands[1];
-}
-  "
-  [(set_attr "type" "vecstore")
-   (set_attr "length" "8")])
+})
 
-(define_insn_and_split "*vsx_le_perm_store_v8hi"
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:VSX_W 0 "memory_operand" "")
+(match_operand:VSX_W 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+(vec_select:
+  (match_dup 1)
+  (parallel [(const_int 2) (const_int 3)
+(const_int 0) (const_int 1)])))
+   (set (match_dup 0)
+(vec_select:
+  (match_dup 1)
+  (parallel [(const_int 2) (const_int 3)
+   

Re: [Patch] Implementation of n3793

2013-11-02 Thread Luc Danton


On 2013-11-02 23:31, Paolo Carlini wrote:


In general we are very careful with code bloat, but free functions which just 
forward to other functions should be definiyely inline, otherwise typically at 
widely used optimization levels like -O2 users get suboptimal performance for 
no reason. But please feel free to experiment and report your findings on the 
mailing list!

Paolo





After review, the free functions that only ever forward to something 
else and are not implicitly inline appear to be that 
__constexpr_addressof overload and non-member swap. Here's a patch 
(against master) that marks them inline, all tests pass. I briefly and 
casually attempted to notice any effect, but to be honest in my smallish 
testcases GCC appeared to already see through everything.
>From 0175e61f3295159fe7ac2fefec05b829ecd6bc4d Mon Sep 17 00:00:00 2001
From: Michael Brune 
Date: Sun, 3 Nov 2013 01:37:16 +0100
Subject: [PATCH] Marked some forwarding free functions as inline that weren't
 already.

---
 libstdc++-v3/include/experimental/optional | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/experimental/optional b/libstdc++-v3/include/experimental/optional
index 5915892..06e0f29 100644
--- a/libstdc++-v3/include/experimental/optional
+++ b/libstdc++-v3/include/experimental/optional
@@ -157,7 +157,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 */
   template::value,
 int>::type...>
-_Tp* __constexpr_addressof(_Tp& __t)
+inline _Tp* __constexpr_addressof(_Tp& __t)
 { return std::__addressof(__t); }
 
   /**
@@ -790,7 +790,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // [X.Y.11]
   template
-void
+inline void
 swap(optional<_Tp>& __lhs, optional<_Tp>& __rhs)
 noexcept(noexcept(__lhs.swap(__rhs)))
 { __lhs.swap(__rhs); }
-- 
1.8.1.2