Re: [PATCH][4/4][v2] RPO-style value-numbering for FRE/PRE

2018-09-05 Thread Richard Biener
On Wed, 5 Sep 2018, Gerald Pfeifer wrote:

> On Fri, 24 Aug 2018, Richard Biener wrote:
> > Comments are still welcome - I've re-bootstrapped and tested the series
> > on x86_64-unknown-linux-gnu for all languages and will talk about
> > this work at the Cauldron in more detail.
> 
> Is there any chance you can test this on i586 as well?

Sounds similar to PR87134?  I do see successful bootstraps on i?86-linux
on gcc-testresults though.

> Since around that commit (August 27th) my i586 builds are failing
> with something like
> 
>   during GIMPLE pass: pre
>   cp-demangle.c: In function ‘d_print_comp’:
>   cp-demangle.c:5711:1: internal compiler error: Segmentation fault
>   5711 | d_print_comp (struct d_print_info *dpi, int options,
>| ^~~~
> 
> when doing a regular bootstrap on i586-unknown-freebsd10.4 (with
> clang 3.4.1 as system compiler).
> 
> Alas, when I add -save-temps, the internal compiler error goes away,
> and the backtrace using gdb installed on that system I share isn't 
> very useful, either.  (When I replace cp-demangle.c by cp-demangle.i
> in the invocation the error also disppears.)
> 
> On the other hand, this ICE has been consistent across a week of
> daily builds now.

Any help with tracking this down appreciated ...

Richard.

> Gerald
> 
> /scratch/tmp/gerald/OBJ-0904-1640/./gcc/xgcc 
> -B/scratch/tmp/gerald/OBJ-0904-1640/./gcc/ 
> -B/home/gerald/gcc-ref10-i386/i586-unknown-freebsd10.4/bin/ 
> -B/home/gerald/gcc-ref10-i386/i586-unknown-freebsd10.4/lib/ -isystem 
> /home/gerald/gcc-ref10-i386/i586-unknown-freebsd10.4/include -isystem 
> /home/gerald/gcc-ref10-i386/i586-unknown-freebsd10.4/sys-include 
> -fno-checking -DHAVE_CONFIG_H -I.. 
> -I/scratch/tmp/gerald/GCC-HEAD/libstdc++-v3/../libiberty 
> -I/scratch/tmp/gerald/GCC-HEAD/libstdc++-v3/../include -D_GLIBCXX_SHARED 
> -I/scratch/tmp/gerald/OBJ-0904-1640/i586-unknown-freebsd10.4/libstdc++-v3/include/i586-unknown-freebsd10.4
>  
> -I/scratch/tmp/gerald/OBJ-0904-1640/i586-unknown-freebsd10.4/libstdc++-v3/include
>  
> -I/scratch/tmp/gerald/GCC-HEAD/libstdc++-v3/libsupc++ -g -O2 -DIN_GLIBCPP_V3 
> -Wno-error 
> -c cp-demangle.c  -fPIC -DPIC -o cp-demangle.o

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

Backports to 8.3

2018-09-05 Thread Jakub Jelinek
Hi!

I've bootstrapped/regtested the following backports on gcc-8-branch
on x86_64-linux and i686-linux and committed.

Jakub
2018-09-05  Jakub Jelinek  

Backported from mainline
2018-08-03  Jason Merrill  

PR c++/86706
* class.c (build_base_path): Use currently_open_class.

* g++.dg/template/pr86706.C: New test.

--- gcc/cp/class.c  (revision 263292)
+++ gcc/cp/class.c  (revision 263293)
@@ -278,6 +278,9 @@ build_base_path (enum tree_code code,
   probe = TYPE_MAIN_VARIANT (TREE_TYPE (expr));
   if (want_pointer)
 probe = TYPE_MAIN_VARIANT (TREE_TYPE (probe));
+  if (dependent_type_p (probe))
+if (tree open = currently_open_class (probe))
+  probe = open;
 
   if (code == PLUS_EXPR
   && !SAME_BINFO_TYPE_P (BINFO_TYPE (d_binfo), probe))
--- gcc/testsuite/g++.dg/template/pr86706.C (nonexistent)
+++ gcc/testsuite/g++.dg/template/pr86706.C (revision 263293)
@@ -0,0 +1,16 @@
+// PR c++/86706
+// { dg-do compile }
+
+class A { int b; };
+
+template 
+class C : A { C (); static C *f; };
+
+template 
+C *C::f;
+
+template 
+C::C ()
+{
+  f->b;
+}
2018-09-05  Jakub Jelinek  

Backported from mainline
2018-08-08  Jakub Jelinek  

PR c++/86738
* constexpr.c (cxx_eval_binary_expression): For arithmetics involving
NULL pointer set *non_constant_p to true.
(cxx_eval_component_reference): For dereferencing of a NULL pointer,
set *non_constant_p to true and return t.

* g++.dg/opt/pr86738.C: New test.

--- gcc/cp/constexpr.c  (revision 263389)
+++ gcc/cp/constexpr.c  (revision 263390)
@@ -2082,6 +2082,7 @@ cxx_eval_binary_expression (const conste
 {
   if (!ctx->quiet)
error ("arithmetic involving a null pointer in %qE", lhs);
+  *non_constant_p = true;
   return t;
 }
   else if (code == POINTER_PLUS_EXPR)
@@ -2522,9 +2523,13 @@ cxx_eval_component_reference (const cons
 lval,
 non_constant_p, overflow_p);
   if (INDIRECT_REF_P (whole)
-  && integer_zerop (TREE_OPERAND (whole, 0))
-  && !ctx->quiet)
-error ("dereferencing a null pointer in %qE", orig_whole);
+  && integer_zerop (TREE_OPERAND (whole, 0)))
+{
+  if (!ctx->quiet)
+   error ("dereferencing a null pointer in %qE", orig_whole);
+  *non_constant_p = true;
+  return t;
+}
 
   if (TREE_CODE (whole) == PTRMEM_CST)
 whole = cplus_expand_constant (whole);
--- gcc/testsuite/g++.dg/opt/pr86738.C  (nonexistent)
+++ gcc/testsuite/g++.dg/opt/pr86738.C  (revision 263390)
@@ -0,0 +1,12 @@
+// PR c++/86738
+// { dg-do compile }
+
+struct S { int s; };
+unsigned char a[20];
+unsigned char *p = &a[(__UINTPTR_TYPE__) &((S *) 0)->s];
+
+void
+foo ()
+{
+  __builtin_memcpy (&a[15], &a[(__UINTPTR_TYPE__) &((S *) 0)->s], 2);
+}
2018-09-05  Jakub Jelinek  

Backported from mainline
2018-08-08  Jakub Jelinek  

PR c++/86836
* pt.c (tsubst_expr): For structured bindings, call tsubst_decomp_names
before tsubst_init, not after it.

* g++.dg/cpp1z/decomp46.C: New test.

--- gcc/cp/pt.c (revision 263390)
+++ gcc/cp/pt.c (revision 263391)
@@ -16740,7 +16740,17 @@ tsubst_expr (tree t, tree args, tsubst_f
else
  {
int const_init = false;
+   unsigned int cnt = 0;
+   tree first = NULL_TREE, ndecl = error_mark_node;
maybe_push_decl (decl);
+
+   if (VAR_P (decl)
+   && DECL_DECOMPOSITION_P (decl)
+   && TREE_TYPE (pattern_decl) != error_mark_node)
+ ndecl = tsubst_decomp_names (decl, pattern_decl, args,
+  complain, in_decl, &first,
+  &cnt);
+
if (VAR_P (decl)
&& DECL_PRETTY_FUNCTION_P (decl))
  {
@@ -16756,23 +16766,14 @@ tsubst_expr (tree t, tree args, tsubst_f
if (VAR_P (decl))
  const_init = (DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P
(pattern_decl));
-   if (VAR_P (decl)
-   && DECL_DECOMPOSITION_P (decl)
-   && TREE_TYPE (pattern_decl) != error_mark_node)
- {
-   unsigned int cnt;
-   tree first;
-   tree ndecl
- = tsubst_decomp_names (decl, pattern_decl, args,
-complain, in_decl, &first, 
&cnt);
-   if (ndecl != error_mark_node)
- cp_maybe_mangle_decomp (ndecl, first, cnt);
-   cp_finish_decl (decl, init, const_init, NULL_TREE, 0);
-   if (n

Re: [PATCH][4/4][v2] RPO-style value-numbering for FRE/PRE

2018-09-05 Thread Gerald Pfeifer
On Tue, 4 Sep 2018, Jeff Law wrote:
>> On the other hand, this ICE has been consistent across a week of
>> daily builds now.
> An FYI, My i686 builds have been running OK.  But given what you've
> described this could well be an uninitialized read, dangling pointer,
> out of bounds write or some similar kind of bug.

I did binary search now, and am afraid it's really that patch, Richard:

Revision 263874 appears just fine; 263875 breaks as per my original 
message.

Gerald


Re: [PATCH][4/4][v2] RPO-style value-numbering for FRE/PRE

2018-09-05 Thread Richard Biener
On Wed, 5 Sep 2018, Gerald Pfeifer wrote:

> On Tue, 4 Sep 2018, Jeff Law wrote:
> >> On the other hand, this ICE has been consistent across a week of
> >> daily builds now.
> > An FYI, My i686 builds have been running OK.  But given what you've
> > described this could well be an uninitialized read, dangling pointer,
> > out of bounds write or some similar kind of bug.
> 
> I did binary search now, and am afraid it's really that patch, Richard:
> 
> Revision 263874 appears just fine; 263875 breaks as per my original 
> message.

Sure - but without a way to reproduce locally investigation is really
hard...  So I'm concentrating on the bugs the rev caused that I can
reproduce and thus fix ;)

Richard.


Re: [PATCH] Ignore properly -mdirect-move (PR target/87164).

2018-09-05 Thread Martin Liška
On 09/04/2018 09:40 PM, Segher Boessenkool wrote:
> Hi!
> 
> On Tue, Sep 04, 2018 at 04:02:23PM +0200, Martin Liška wrote:
>> Option mdirect-move should be Deprecated, that means option value is ignored
>> and user can't influence rs6000_isa_flags.
>>
>> Patch can bootstrap on ppc64le-redhat-linux (gcc110 and gcc112) and survives 
>> regression tests.
>>
>> Ready to be installed?
> 
> So the mask is still defined then, and mask for that variable?  Sounds fine
> then, thanks.

Yes, Deprecated is basically Ignore+Warn(...).

> 
> The other issue is still there: options.texi says:
> @item Ignore
> This option is ignored apart from printing any warning specified using
> @code{Warn}.
> 
> So Warn is explicitly allowed with Ignore, not forbidden.  If you want to
> change that, you'll have to change the documentation as well ;-)

Which means yes, the documentation is not correct. I'm suggesting that:

diff --git a/gcc/doc/options.texi b/gcc/doc/options.texi
index f887d16f88f..dd201c3b7bc 100644
--- a/gcc/doc/options.texi
+++ b/gcc/doc/options.texi
@@ -431,9 +431,10 @@ negated.  @code{NegativeAlias} may not be used with the 
forms of
 @code{Alias} taking more than one argument.
 
 @item Ignore
-This option is ignored apart from printing any warning specified using
-@code{Warn}.  The option will not be seen by specs and no @samp{OPT_}
-enumeration value is defined for it.
+This option is ignored and the option will not be seen by specs
+and no @samp{OPT_} enumeration value is defined for it.  The option
+cannot be combined with @code{Warn} and in such case @code{Deprecated}
+should be used.
 
 @item SeparateAlias
 For an option marked with @code{Joined}, @code{Separate} and

Feel free to make language correction. I'm going to install the current fix
for the PR and documentation can be updated independently.

Martin

> 
> I'll test it later, might be tomorrow.  Thanks,
> 
> 
> Segher
> 



[PATCH] Group switch cases in switch lowering (PR tree-optimization/87205).

2018-09-05 Thread Martin Liška
Hi.

It's beneficial to group cases before switch lowering machinery
is making a decision what to do with a switch.

Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.

Ready to be installed?
Martin


gcc/ChangeLog:

2018-09-04  Martin Liska  

PR tree-optimization/87205
* tree-switch-conversion.c (pass_lower_switch::execute):
Group cases for switch statements.

gcc/testsuite/ChangeLog:

2018-09-04  Martin Liska  

PR tree-optimization/87205
* gcc.dg/tree-ssa/pr87205-2.c: New test.
* gcc.dg/tree-ssa/pr87205.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr87205-2.c | 18 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr87205.c   | 21 +
 gcc/tree-switch-conversion.c  |  8 ++--
 3 files changed, 45 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr87205-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr87205.c


diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr87205-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr87205-2.c
new file mode 100644
index 000..fb1879ed71b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr87205-2.c
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void f(int);
+void h(unsigned i)
+{
+  switch (i) {
+default: __builtin_unreachable();
+case 0: f(42); break;
+case 1: f(42); break;
+case 2: f(42); break;
+case 3: f(42); break;
+case 4: f(42); break;
+case 5: f(42); break;
+  }
+} 
+
+/* { dg-final { scan-tree-dump-not "if" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "switch" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr87205.c b/gcc/testsuite/gcc.dg/tree-ssa/pr87205.c
new file mode 100644
index 000..129e60747e2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr87205.c
@@ -0,0 +1,21 @@
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void f( int x );
+
+void h( unsigned ix )
+{
+  switch( ix )
+  {
+case 0: f(42); break;
+case 1: f(42); break;
+case 2: f(42); break;
+case 3: f(42); break;
+case 4: f(42); break;
+case 5: f(42); break;
+default: __builtin_unreachable();
+  }
+}
+
+
+/* { dg-final { scan-tree-dump-not "if" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "switch" "optimized" } } */
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index 1f543b2ecc8..85039e23e58 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -2419,8 +2419,12 @@ pass_lower_switch::execute (function *fun)
   FOR_EACH_BB_FN (bb, fun)
 {
   gimple *stmt = last_stmt (bb);
-  if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
-	switch_statements.safe_push (stmt);
+  gswitch *swtch;
+  if (stmt && (swtch = dyn_cast (stmt)))
+	{
+	  group_case_labels_stmt (swtch);
+	  switch_statements.safe_push (swtch);
+	}
 }
 
   for (unsigned i = 0; i < switch_statements.length (); i++)



Re: Do not stream TYPE_STUB_DECL

2018-09-05 Thread Richard Biener
On Thu, Aug 23, 2018 at 2:28 PM Jan Hubicka  wrote:
>
> Hi,
> this patch removes streaming of TYPE_STUB_DECL. The sanity checking part 
> depends
> on the coverage change but I may just drop it (though I think it is useful as
> a sanity check that things are consistend within the middle-end).
>
> lto-bootstrapped/regtested x86_64-linux, OK?

OK.

> Honza
>
> * ipa-utils.h (polymorphic_type_binfo_p,
> type_in_anonymous_namespace_p): Expect free lang data to remove
> TYPE_STUB_DECL after producing mangled names for types with linkage.
> * lto-stramer-out.c (DFS::DFS_write_tree_body, hash_tree):
> Do not walk TYPE_STUB_DECL; sanity check it is NULL.
> * tree-streamer-in.c (lto_input_ts_type_common_tree_pointers):
> Do not stream TYPE_STUB_DECL.
> * tree-streamer-out.c (write_ts_type_common_tree_pointers): Likewise.
> * tree.c (free_lang_data_in_type): Always clear TYPE_STUB_DECL.
> Index: ipa-utils.h
> ===
> --- ipa-utils.h (revision 263696)
> +++ ipa-utils.h (working copy)
> @@ -179,22 +179,24 @@ polymorphic_type_binfo_p (const_tree bin
>  inline bool
>  type_with_linkage_p (const_tree t)
>  {
> -  if (!TYPE_NAME (t) || TREE_CODE (TYPE_NAME (t)) != TYPE_DECL
> -  || !TYPE_STUB_DECL (t))
> +  if (!TYPE_NAME (t) || TREE_CODE (TYPE_NAME (t)) != TYPE_DECL)
> +return false;
> +
> +  /* To support -fno-lto-odr-type-merigng recognize types with vtables
> + to have linkage.  */
> +  if (RECORD_OR_UNION_TYPE_P (t)
> +  && TYPE_BINFO (t) && BINFO_VTABLE (TYPE_BINFO (t)))
> +return true;
> +
> +  /* After free_lang_data was run and -flto-odr-type-merging we can recongize
> + types with linkage by presence of mangled name.  */
> +  if (DECL_ASSEMBLER_NAME_SET_P (TYPE_NAME (t)))
> +return true;
> +
> +  /* If free lang data was not run check if indeed the type looks like C++
> + type with linkage.  */
> +  if (in_lto_p || !TYPE_STUB_DECL (t))
>  return false;
> -  /* In LTO do not get confused by non-C++ produced types or types built
> - with -fno-lto-odr-type-merigng.  */
> -  if (in_lto_p)
> -{
> -  /* To support -fno-lto-odr-type-merigng recognize types with vtables
> - to have linkage.  */
> -  if (RECORD_OR_UNION_TYPE_P (t)
> - && TYPE_BINFO (t) && BINFO_VTABLE (TYPE_BINFO (t)))
> -return true;
> -  /* With -flto-odr-type-merging C++ FE specify mangled names
> -for all types with the linkage.  */
> -  return DECL_ASSEMBLER_NAME_SET_P (TYPE_NAME (t));
> -}
>
>if (!RECORD_OR_UNION_TYPE_P (t) && TREE_CODE (t) != ENUMERAL_TYPE)
>  return false;
> @@ -214,18 +216,16 @@ type_in_anonymous_namespace_p (const_tre
>  {
>gcc_checking_assert (type_with_linkage_p (t));
>
> -  if (!TREE_PUBLIC (TYPE_STUB_DECL (t)))
> -{
> -  /* C++ FE uses magic  as assembler names of anonymous types.
> -verify that this match with type_in_anonymous_namespace_p.  */
> -  gcc_checking_assert (!in_lto_p
> -  || !DECL_ASSEMBLER_NAME_SET_P (TYPE_NAME (t))
> -  || !strcmp ("",
> -  IDENTIFIER_POINTER
> -  (DECL_ASSEMBLER_NAME (TYPE_NAME 
> (t);
> -  return true;
> -}
> -  return false;
> +  /* free_lang_data clears TYPE_STUB_DECL but sets assembler name to
> + ""  */
> +  if (DECL_ASSEMBLER_NAME_SET_P (TYPE_NAME (t)))
> +return !strcmp ("",
> +   IDENTIFIER_POINTER
> +   (DECL_ASSEMBLER_NAME (TYPE_NAME (t;
> +  else if (!TYPE_STUB_DECL (t))
> +return false;
> +  else
> +return !TREE_PUBLIC (TYPE_STUB_DECL (t));
>  }
>
>  /* Return true of T is type with One Definition Rule info attached.
> Index: lto-streamer-out.c
> ===
> --- lto-streamer-out.c  (revision 263696)
> +++ lto-streamer-out.c  (working copy)
> @@ -857,7 +857,7 @@ DFS::DFS_write_tree_body (struct output_
>DFS_follow_tree_edge (TYPE_CONTEXT (expr));
>/* TYPE_CANONICAL is re-computed during type merging, so no need
>  to follow it here.  */
> -  DFS_follow_tree_edge (TYPE_STUB_DECL (expr));
> +  gcc_checking_assert (TYPE_STUB_DECL (expr) == NULL);
>  }
>
>if (CODE_CONTAINS_STRUCT (code, TS_TYPE_NON_COMMON))
> @@ -1270,7 +1270,6 @@ hash_tree (struct streamer_tree_cache_d
> ;
>else
> visit (TYPE_CONTEXT (t));
> -  visit (TYPE_STUB_DECL (t));
>  }
>
>if (CODE_CONTAINS_STRUCT (code, TS_TYPE_NON_COMMON))
> Index: tree-streamer-in.c
> ===
> --- tree-streamer-in.c  (revision 263696)
> +++ tree-streamer-in.c  (working copy)
> @@ -820,7 +820,6 @@ lto_input_ts_type_common_tree_pointers (
>TYPE_CONTEXT (expr) = stream_read_tree (ib, data_in);
>/* TYPE_CAN

[PATCH] S/390: Fix conditional returns

2018-09-05 Thread Ilya Leoshkevich
S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of
the more usual (return) or (simple_return).  This sequence is not
recognized by the conditional return logic in try_optimize_cfg ().

gcc/ChangeLog:

2018-08-28  Ilya Leoshkevich  

PR target/80080
* cfgcleanup.c (bb_is_just_return): Accept PARALLELs containing
RETURNs.
* cfgrtl.c (rtl_verify_bb_layout): Handle PARALLELs containing
conditional jumps.
* config/s390/s390.md: Recognize PARALLELs containing RETURNs.
* jump.c (copy_update_parallel): Create a copy of a PARALLEL
in which one of side effects is replaced.
(redirect_exp_1): Handle jump targets that are PARALLELs
containing RETURNs.
(redirect_jump_2): Likewise.
(return_in_parallel): Recognize PARALLELs containing RETURNs.
* rtl.h (return_in_parallel): Add declaration.

gcc/testsuite/ChangeLog:

2018-08-28  Ilya Leoshkevich  

PR target/80080
* gcc.target/s390/risbg-ll-3.c: Expect conditional returns.
* gcc.target/s390/zvector/vec-cmp-2.c: Likewise.
---
 gcc/cfgcleanup.c  |  2 +-
 gcc/cfgrtl.c  |  3 +-
 gcc/config/s390/s390.md   | 13 +++-
 gcc/jump.c| 69 ++-
 gcc/rtl.h |  1 +
 gcc/testsuite/gcc.target/s390/risbg-ll-3.c|  4 +-
 .../gcc.target/s390/zvector/vec-cmp-2.c   | 48 ++---
 7 files changed, 108 insertions(+), 32 deletions(-)

diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
index 4a5dc29d14f..7f2545f453f 100644
--- a/gcc/cfgcleanup.c
+++ b/gcc/cfgcleanup.c
@@ -2624,7 +2624,7 @@ bb_is_just_return (basic_block bb, rtx_insn **ret, 
rtx_insn **use)
   {
rtx pat = PATTERN (insn);
 
-   if (!*ret && ANY_RETURN_P (pat))
+   if (!*ret && (ANY_RETURN_P (pat) || return_in_parallel (pat)))
  *ret = insn;
else if (!*ret && !*use && GET_CODE (pat) == USE
&& REG_P (XEXP (pat, 0))
diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
index 3b1931daeba..701c6a985b8 100644
--- a/gcc/cfgrtl.c
+++ b/gcc/cfgrtl.c
@@ -2987,7 +2987,8 @@ rtl_verify_bb_layout (void)
}
 
   if (JUMP_P (x)
- && returnjump_p (x) && ! condjump_p (x)
+ && returnjump_p (x)
+ && ! (condjump_p (x) || condjump_in_parallel_p (x))
  && ! ((y = next_nonnote_nondebug_insn (x))
&& BARRIER_P (y)))
fatal_insn ("return not followed by barrier", x);
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index db260e41bfd..3c413638038 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -8842,8 +8842,19 @@
(set_attr "type"  "branch")
(set_attr "atype" "agen")])
 
+(define_subst "add_use_return_reg_subst"
+  [(set (match_operand 0 "" "")
+   (match_operand 1 "" ""))]
+  ""
+  [(set (match_dup 0)
+   (match_dup 1))
+   (use (reg RETURN_REGNUM))])
+
+(define_subst_attr "add_use_return_reg_name" "add_use_return_reg_subst"
+  "" "_use_return_reg")
+
 ;; A conditional return instruction.
-(define_insn "*c"
+(define_insn "*c"
   [(set (pc)
 (if_then_else
   (match_operator 0 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
diff --git a/gcc/jump.c b/gcc/jump.c
index 06f7255d24d..8057ace74d2 100644
--- a/gcc/jump.c
+++ b/gcc/jump.c
@@ -1423,6 +1423,26 @@ redirect_target (rtx x)
   return x;
 }
 
+/* Create a copy of PARALLEL with side-effect OSIDE replaced by NSIDE.  */
+static rtx
+copy_update_parallel (rtx par, rtx *oside, rtx nside)
+{
+  rtx npar;
+  int i;
+
+  npar = gen_rtx_PARALLEL (GET_MODE (par), rtvec_alloc (XVECLEN (par, 0)));
+  for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
+{
+  rtx *side_effect = &XVECEXP (par, 0, i);
+
+  if (side_effect == oside)
+   XVECEXP (npar, 0, i) = nside;
+  else
+   XVECEXP (npar, 0, i) = copy_rtx (*side_effect);
+}
+  return npar;
+}
+
 /* Throughout LOC, redirect OLABEL to NLABEL.  Treat null OLABEL or
NLABEL as a return.  Accrue modifications into the change group.  */
 
@@ -1437,9 +1457,22 @@ redirect_exp_1 (rtx *loc, rtx olabel, rtx nlabel, 
rtx_insn *insn)
   if ((code == LABEL_REF && label_ref_label (x) == olabel)
   || x == olabel)
 {
-  x = redirect_target (nlabel);
-  if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn))
-   x = gen_rtx_SET (pc_rtx, x);
+  rtx *nret = return_in_parallel (nlabel);
+
+  if (nret)
+   {
+ rtx npat;
+
+ x = *nret;
+ npat = copy_update_parallel (nlabel, nret, PATTERN (insn));
+ validate_change (insn, &PATTERN (insn), npat, 1);
+   }
+  else
+   {
+ x = redirect_target (nlabel);
+ if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn))
+   x = gen_rtx_SET (pc_rtx, x);
+   }
   validate_change (insn, loc, x, 1);
   return;
 }
@@ -1551,

[PATCH 1/2] S/390: Register pass_s390_early_mach statically

2018-09-05 Thread Ilya Leoshkevich
The dump file used to come at the end of the sorted dump file list,
because the pass was registered dynamically. This did not reflect the
order in which passes are executed. Static registration fixes this:

* foo4.c.277r.split2
* foo4.c.281r.early_mach
* foo4.c.282r.pro_and_epilogue

gcc/ChangeLog:

2018-08-27  Ilya Leoshkevich  

PR target/80080
* config/s390/s390-passes.def: New file.
* config/s390/s390-protos.h (class rtl_opt_pass): Add forward
declaration.
(make_pass_s390_early_mach): Add declaration.
* config/s390/s390.c (make_pass_s390_early_mach):
(s390_option_override): Remove dynamic registration.
* config/s390/t-s390: Add s390-passes.def.
---
 gcc/config/s390/s390-passes.def | 20 
 gcc/config/s390/s390-protos.h   |  6 ++
 gcc/config/s390/s390.c  | 21 ++---
 gcc/config/s390/t-s390  |  1 +
 4 files changed, 33 insertions(+), 15 deletions(-)
 create mode 100644 gcc/config/s390/s390-passes.def

diff --git a/gcc/config/s390/s390-passes.def b/gcc/config/s390/s390-passes.def
new file mode 100644
index 000..035c6e8bc0a
--- /dev/null
+++ b/gcc/config/s390/s390-passes.def
@@ -0,0 +1,20 @@
+/* Description of target passes for S/390.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+INSERT_PASS_BEFORE (pass_thread_prologue_and_epilogue, 1, 
pass_s390_early_mach);
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 46f0743461d..45fce6ce865 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -165,3 +165,9 @@ extern void s390_register_target_pragmas (void);
 
 /* Routines for s390-c.c */
 extern bool s390_const_operand_ok (tree, int, int, tree);
+
+/* Pass management.  */
+namespace gcc { class context; }
+class rtl_opt_pass;
+
+extern rtl_opt_pass *make_pass_s390_early_mach (gcc::context *ctxt);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 5c2a8cb2c6c..e869e2eb506 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -10677,6 +10677,12 @@ pass_s390_early_mach::execute (function *fun)
 
 } // anon namespace
 
+rtl_opt_pass *
+make_pass_s390_early_mach (gcc::context *ctxt)
+{
+  return new pass_s390_early_mach (ctxt);
+}
+
 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
- push too big immediates to the literal pool and annotate the refs
- emit frame related notes for stack pointer changes.  */
@@ -15085,21 +15091,6 @@ s390_option_override (void)
   if (!global_options_set.x_dwarf_version)
dwarf_version = 2;
 }
-
-  /* Register a target-specific optimization-and-lowering pass
- to run immediately before prologue and epilogue generation.
-
- Registering the pass must be done at start up.  It's
- convenient to do it here.  */
-  opt_pass *new_pass = new pass_s390_early_mach (g);
-  struct register_pass_info insert_pass_s390_early_mach =
-{
-  new_pass,/* pass */
-  "pro_and_epilogue",  /* reference_pass_name */
-  1,   /* ref_pass_instance_number */
-  PASS_POS_INSERT_BEFORE   /* po_op */
-};
-  register_pass (&insert_pass_s390_early_mach);
 }
 
 #if S390_USE_TARGET_ATTRIBUTE
diff --git a/gcc/config/s390/t-s390 b/gcc/config/s390/t-s390
index cdea373040a..8ca0c7879c9 100644
--- a/gcc/config/s390/t-s390
+++ b/gcc/config/s390/t-s390
@@ -18,6 +18,7 @@
 
 TM_H += $(srcdir)/config/s390/s390-builtins.def
 TM_H += $(srcdir)/config/s390/s390-builtin-types.def
+PASSES_EXTRA += $(srcdir)/config/s390/s390-passes.def
 
 s390-c.o: $(srcdir)/config/s390/s390-c.c \
   $(srcdir)/config/s390/s390-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \
-- 
2.18.0



[PATCH 2/2] S/390: Repeat jump threading after combine

2018-09-05 Thread Ilya Leoshkevich
Combine can change basic blocks in a way that they end up containing
a single jump_insn. This creates an opportunity to improve code with
jump threading.

gcc/ChangeLog:

2018-08-28  Ilya Leoshkevich  

PR target/80080
* cfgcleanup.c: Make jump pass clonable.
* config/s390/s390-passes.def (INSERT_PASS_AFTER): Perform jump
threading after combine.

gcc/testsuite/ChangeLog:

2018-08-28  Ilya Leoshkevich  

PR target/80080
* gcc.target/s390/pr80080-4.c: New test.
---
 gcc/cfgcleanup.c  |  1 +
 gcc/config/s390/s390-passes.def   |  1 +
 gcc/testsuite/gcc.target/s390/pr80080-4.c | 16 
 3 files changed, 18 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/pr80080-4.c

diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
index 4a5dc29d14f..35aa9f0ac4a 100644
--- a/gcc/cfgcleanup.c
+++ b/gcc/cfgcleanup.c
@@ -3234,6 +3234,7 @@ public:
   {}
 
   /* opt_pass methods: */
+  virtual opt_pass *clone () { return new pass_jump (m_ctxt); }
   virtual unsigned int execute (function *);
 
 }; // class pass_jump
diff --git a/gcc/config/s390/s390-passes.def b/gcc/config/s390/s390-passes.def
index 035c6e8bc0a..2e9b208553e 100644
--- a/gcc/config/s390/s390-passes.def
+++ b/gcc/config/s390/s390-passes.def
@@ -17,4 +17,5 @@ You should have received a copy of the GNU General Public 
License
 along with GCC; see the file COPYING3.  If not see
 .  */
 
+INSERT_PASS_AFTER (pass_combine, 1, pass_jump);
 INSERT_PASS_BEFORE (pass_thread_prologue_and_epilogue, 1, 
pass_s390_early_mach);
diff --git a/gcc/testsuite/gcc.target/s390/pr80080-4.c 
b/gcc/testsuite/gcc.target/s390/pr80080-4.c
new file mode 100644
index 000..91d31ec7845
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/pr80080-4.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=z196 -O2" } */
+
+extern void bar(int *mem);
+
+void foo4(int *mem)
+{
+  int oldval = 0;
+  if (!__atomic_compare_exchange_n (mem, (void *) &oldval, 1,
+   1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+{
+  bar (mem);
+}
+}
+
+/* { dg-final { scan-assembler 
"\n\tlt\t.*\n\tjne\t(\\.L\\d+)\n(.*\n)*\tcs\t.*\n\tber\t%r14\n\\1:\n\tjg\tbar\n"
 } } */
-- 
2.18.0



[PATCH][OBVIOUS] Fix a scan in test for Darwin target (PR testsuite/87216).

2018-09-05 Thread Martin Liška
Hi.

I would like to update a test-case, tested on x86_64-linux-gnu and
on Darwin by Rainer.

I'm going to install that.

Martin

gcc/testsuite/ChangeLog:

2018-09-05  Martin Liska  

PR testsuite/87216
* gcc.dg/tree-prof/pr59521-3.c: Update scanned pattern
to support Dawring names.
---
 gcc/testsuite/gcc.dg/tree-prof/pr59521-3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


diff --git a/gcc/testsuite/gcc.dg/tree-prof/pr59521-3.c b/gcc/testsuite/gcc.dg/tree-prof/pr59521-3.c
index 497643bed3d..00232176695 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/pr59521-3.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/pr59521-3.c
@@ -31,4 +31,4 @@ int main()
   }
 }
 
-/* { dg-final-use-not-autofdo { scan-assembler "\nfoo:\n.*cmp.*1,.*cmp.*10,.*cmp.*100" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final-use-not-autofdo { scan-assembler "\n_?foo:\n.*cmp.*1,.*cmp.*10,.*cmp.*100" { target i?86-*-* x86_64-*-* } } } */



Re: [PATCH][OBVIOUS] Fix a scan in test for Darwin target (PR testsuite/87216).

2018-09-05 Thread Rainer Orth
Hi Martin,

> 2018-09-05  Martin Liska  
>
>   PR testsuite/87216
>   * gcc.dg/tree-prof/pr59521-3.c: Update scanned pattern
>   to support Dawring names.
   ^ Darwin ;-)

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH][OBVIOUS] Fix a scan in test for Darwin target (PR testsuite/87216).

2018-09-05 Thread Martin Liška
On 09/05/2018 11:08 AM, Rainer Orth wrote:
> Hi Martin,
> 
>> 2018-09-05  Martin Liska  
>>
>>  PR testsuite/87216
>>  * gcc.dg/tree-prof/pr59521-3.c: Update scanned pattern
>>  to support Dawring names.
>^ Darwin ;-)
> 
>   Rainer
> 

Thanks, fixed.

Martin


[PING][PATCH] Frame pointer for arm with THUMB2 mode

2018-09-05 Thread Denis Khalikov
Hello everyone,
can someone, please, take a look on this patch
https://gcc.gnu.org/ml/gcc-patches/2018-08/msg01656.html

Thanks.


Re: [PATCH] Group switch cases in switch lowering (PR tree-optimization/87205).

2018-09-05 Thread Richard Biener
On Wed, Sep 5, 2018 at 10:31 AM Martin Liška  wrote:
>
> Hi.
>
> It's beneficial to group cases before switch lowering machinery
> is making a decision what to do with a switch.
>
> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>
> Ready to be installed?

Hmm, do we want to do this at O0?  There it should be redundant given
CFG build already performs it?  We also perform it at .optimized
(execute_cleanup_cfg_post_optimizing) plus when we redirect some
edges and then call record_case_labels.

Otherwise OK.

Richard.

> Martin
>
>
> gcc/ChangeLog:
>
> 2018-09-04  Martin Liska  
>
> PR tree-optimization/87205
> * tree-switch-conversion.c (pass_lower_switch::execute):
> Group cases for switch statements.
>
> gcc/testsuite/ChangeLog:
>
> 2018-09-04  Martin Liska  
>
> PR tree-optimization/87205
> * gcc.dg/tree-ssa/pr87205-2.c: New test.
> * gcc.dg/tree-ssa/pr87205.c: New test.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/pr87205-2.c | 18 ++
>  gcc/testsuite/gcc.dg/tree-ssa/pr87205.c   | 21 +
>  gcc/tree-switch-conversion.c  |  8 ++--
>  3 files changed, 45 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr87205-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr87205.c
>
>


Re: [PATCH 2/2] S/390: Repeat jump threading after combine

2018-09-05 Thread Richard Biener
On Wed, Sep 5, 2018 at 10:44 AM Ilya Leoshkevich  wrote:
>
> Combine can change basic blocks in a way that they end up containing
> a single jump_insn. This creates an opportunity to improve code with
> jump threading.

Hmm, I think CFG cleanup performs this as well (if run in the correct mode)
so maybe combine should invoke that?

I don't think you should alter the pass pipeline this way in
archtecture specific
ways.

Richard.

> gcc/ChangeLog:
>
> 2018-08-28  Ilya Leoshkevich  
>
> PR target/80080
> * cfgcleanup.c: Make jump pass clonable.
> * config/s390/s390-passes.def (INSERT_PASS_AFTER): Perform jump
> threading after combine.
>
> gcc/testsuite/ChangeLog:
>
> 2018-08-28  Ilya Leoshkevich  
>
> PR target/80080
> * gcc.target/s390/pr80080-4.c: New test.
> ---
>  gcc/cfgcleanup.c  |  1 +
>  gcc/config/s390/s390-passes.def   |  1 +
>  gcc/testsuite/gcc.target/s390/pr80080-4.c | 16 
>  3 files changed, 18 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/s390/pr80080-4.c
>
> diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
> index 4a5dc29d14f..35aa9f0ac4a 100644
> --- a/gcc/cfgcleanup.c
> +++ b/gcc/cfgcleanup.c
> @@ -3234,6 +3234,7 @@ public:
>{}
>
>/* opt_pass methods: */
> +  virtual opt_pass *clone () { return new pass_jump (m_ctxt); }
>virtual unsigned int execute (function *);
>
>  }; // class pass_jump
> diff --git a/gcc/config/s390/s390-passes.def b/gcc/config/s390/s390-passes.def
> index 035c6e8bc0a..2e9b208553e 100644
> --- a/gcc/config/s390/s390-passes.def
> +++ b/gcc/config/s390/s390-passes.def
> @@ -17,4 +17,5 @@ You should have received a copy of the GNU General Public 
> License
>  along with GCC; see the file COPYING3.  If not see
>  .  */
>
> +INSERT_PASS_AFTER (pass_combine, 1, pass_jump);
>  INSERT_PASS_BEFORE (pass_thread_prologue_and_epilogue, 1, 
> pass_s390_early_mach);
> diff --git a/gcc/testsuite/gcc.target/s390/pr80080-4.c 
> b/gcc/testsuite/gcc.target/s390/pr80080-4.c
> new file mode 100644
> index 000..91d31ec7845
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/pr80080-4.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=z196 -O2" } */
> +
> +extern void bar(int *mem);
> +
> +void foo4(int *mem)
> +{
> +  int oldval = 0;
> +  if (!__atomic_compare_exchange_n (mem, (void *) &oldval, 1,
> +   1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
> +{
> +  bar (mem);
> +}
> +}
> +
> +/* { dg-final { scan-assembler 
> "\n\tlt\t.*\n\tjne\t(\\.L\\d+)\n(.*\n)*\tcs\t.*\n\tber\t%r14\n\\1:\n\tjg\tbar\n"
>  } } */
> --
> 2.18.0
>


Re: [PING 5][PATCH] [v4][aarch64] Avoid tag collisions for loads falkor

2018-09-05 Thread Siddhesh Poyarekar

On Thursday 30 August 2018 01:28 PM, Siddhesh Poyarekar wrote:

On Wednesday 29 August 2018 10:05 PM, James Greenhalgh wrote:
Sorry that this took me so long to get to.  > > The code is 
outstanding quality, a textbook example of writing an 
 > analysis/optimization pass using modern GCC frameworks and data > 
structures! If you ever find the opportunity, I bet you could create > 
some useful newcomer training materials from this source. > > This is 
almost OK for trunk, please just clean up the (very few) > issues 
detected by check_GNU_style.py and make the obvious rebase on > trunk. 
As I know you will ask, you can also backport to GCC 8 if you > are 
happy with that risk for the Falkor subtarget.
Thanks for the very generous review :)  I'll fix it up and push it next 
week when I return from my holiday.


Fixed up after rebase, build-tested and committed now.

Thanks,
Siddhesh


[PATCH] Fix PR87225

2018-09-05 Thread Richard Biener


Committed as obvious.

Richard.

2018-09-05  Richard Biener  

PR bootstrap/87225
* tree-vect-stmts.c (vectorizable_simd_clone_call): Fix bogus
return.

Index: gcc/tree-vect-stmts.c
===
--- gcc/tree-vect-stmts.c   (revision 264102)
+++ gcc/tree-vect-stmts.c   (working copy)
@@ -3898,7 +3898,7 @@ vectorizable_simd_clone_call (stmt_vec_i
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 "not considering SIMD clones; not yet supported"
 " for variable-width vectors.\n");
-  return NULL;
+  return false;
 }
 
   unsigned int badness = 0;


Re: [PATCH] fixincludes: vxworks: regs.h: Guard include of vxTypesOld.h by !_ASMLANGUAGE

2018-09-05 Thread Olivier Hainque
Hi Rasmus,

> On 3 Sep 2018, at 15:20, Rasmus Villemoes  wrote:

>> How do we not get in assembly the problems we'd get in C
>> when not including vxTypesOld ?

Answering part of my own question: turns out that some pieces
included via regs.h are already taking care of the _ASMLANGUAGE
case.

> Well, I don't know why types/vxTypesOld.h got shoehorned into the
> fixinclude regs.h [1]. A better fix would be to remove that #include
> completely, making the fixinclude regs.h a pure wrapper for vxworks
> regs.h. However, I was afraid that might break existing C code that has
> come to rely on "#include " pulling in types/vxTypesOld.h (or
> any of the headers included from that), and it's not really possible to
> know if such code exists - other than trying it and waiting for emails
> to arrive.(*)

> As I wrote, including the vxworks regs.h (from assembly or C) requires
> that types/vxCpu.h is already included, because it contains directives
> such as
> 
> #if CPU_FAMILY==I960
> ...
> #if CPU_FAMILY==MC680X0

> Hence my "fix" relies on the fact that any existing assembly source that
> includes regs.h must already have arranged for types/vxCpu.h to be
> included, one way or another. I could add an explicit include of that
> (maybe just in the _ASMLANGUAGE case), but that could easily cause
> problems of its own. Now, if one could rely on all existing C code
> having been through a non-gcc compiler, the same argument could be
> applied and my above worry (*) would be unfounded, but...

I think we should either do a fixinclude that would "work" for
C and ASM (like #include vxCpu for ASM, vxTypesOld otherwise), or
simply remove this hack (just using the fixinclude parlance here).

My inclination would be for the latter.

First, I'm not convinced fixincludes should be in the business
of dealing with that kind of issue, the proper resolution of which
depends on context.

Then if this triggers a failure for some user, it would only show
up for people upgrading the toolchain, which always calls for particular
attention and often goes with adjustments. 

The symptom would be a compilation failure, easy to address if you can
modify sources, with changes that you'd need to do if you were compiling
with the system toolchain anyway, and which can be done with a manual
fixinclude like trick if really needed.

Finally,
- this would only be visible in cases where the headers needed
  by regs.h aren't already #included,
- there are probably not many users of the upstream gcc for VxWorks, and
- I know that at least some of them (us) don't use fixincludes
  to begin with.

So the risk of breaking existing C code seems very low for starters.

What happens on your end if you just remove the hack ?



[PR c++/87185] ICE in prune-lambdas

2018-09-05 Thread Nathan Sidwell
I'm applying this patch of Pádraig's. It's a sufficiently obvious 
NULL-ptr-dereference fix.


tested on x86_64-linux.

nathan
--
Nathan Sidwell
2018-09-05   Pádraig Brady p...@draigbrady.com

	cp/
	PR c++/87185
	* lambda.c (prune_lambda_captures): Protect against const_vars.get
	returning NULL.
	testsuite/
	PR c++/87185
	* g++.dg/pr87185.C: New.

Index: cp/lambda.c
===
--- cp/lambda.c	(revision 264112)
+++ cp/lambda.c	(working copy)
@@ -1519,8 +1519,8 @@ prune_lambda_captures (tree body)
   tree cap = *capp;
   if (tree var = var_to_maybe_prune (cap))
 	{
-	  tree *use = *const_vars.get (var);
-	  if (TREE_CODE (*use) == DECL_EXPR)
+	  tree **use = const_vars.get (var);
+	  if (use && TREE_CODE (**use) == DECL_EXPR)
 	{
 	  /* All uses of this capture were folded away, leaving only the
 		 proxy declaration.  */
@@ -1535,7 +1535,7 @@ prune_lambda_captures (tree body)
 	  *fieldp = DECL_CHAIN (*fieldp);
 
 	  /* And remove the capture proxy declaration.  */
-	  *use = void_node;
+	  **use = void_node;
 	  continue;
 	}
 	}
Index: testsuite/g++.dg/pr87185.C
===
--- testsuite/g++.dg/pr87185.C	(nonexistent)
+++ testsuite/g++.dg/pr87185.C	(working copy)
@@ -0,0 +1,4 @@
+// PR c++/87185
+// { dg-do compile { target c++11 } }
+
+void f() { const int i=0; [&]() noexcept {i;}; }


Re: PR85787: Extend malloc_candidate_p to handle multiple phis.

2018-09-05 Thread Prathamesh Kulkarni
On 28 August 2018 at 16:56, Prathamesh Kulkarni
 wrote:
> H
> The attached patch extends malloc_candidate_p to handle multiple phis.
> There's a lot of noise in the patch because I moved most of
> malloc_candidate_p into
> new function malloc_candidate_p_1. The only real change is following hunk:
>
> +   gimple *arg_def = SSA_NAME_DEF_STMT (arg);
> +   if (is_a (arg_def))
> + {
> +   if (!malloc_candidate_p_1 (fun, arg, phi, ipa))
> +   DUMP_AND_RETURN ("nested phi fail")
> +   continue;
> + }
> +
>
> Which checks recursively that the phi argument is used only within
> comparisons against 0
> and the phi.
>
> Bootstrapped+tested on x86_64-unknown-linux-gnu.
> OK to commit ?
ping https://gcc.gnu.org/ml/gcc-patches/2018-08/msg01757.html

Thanks,
Prathamesh
>
> Thanks,
> Prathamesh


[PATCH] Fix PR87217

2018-09-05 Thread Richard Biener


This re-introduces the region boundary checking for the alias walk.  It's
really also necessary for correctness, not only for compile-time.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2018-09-05  Richard Biener  

PR tree-optimization/87217
* tree-ssa-sccvn.c (vuse_valueize): New.
(vn_reference_lookup_pieces): Use it.
(vn_reference_lookup): Likewise.

* gfortran.dg/pr87217.f: New testcase.

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c(revision 264102)
+++ gcc/tree-ssa-sccvn.c(working copy)
@@ -493,6 +493,25 @@ vuse_ssa_val (tree x)
   return x;
 }
 
+/* Similar to the above but used as callback for walk_non_aliases_vuses
+   and thus should stop at unvisited VUSE to not walk across region
+   boundaries.  */
+
+static tree
+vuse_valueize (tree vuse)
+{
+  do
+{
+  bool visited;
+  vuse = SSA_VAL (vuse, &visited);
+  if (!visited)
+   return NULL_TREE;
+  gcc_assert (vuse != VN_TOP);
+}
+  while (SSA_NAME_IN_FREE_LIST (vuse));
+  return vuse;
+}
+
 
 /* Return the vn_kind the expression computed by the stmt should be
associated with.  */
@@ -2573,7 +2592,7 @@ vn_reference_lookup_pieces (tree vuse, a
  (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse,
  vn_reference_lookup_2,
  vn_reference_lookup_3,
- vuse_ssa_val, &vr1);
+ vuse_valueize, &vr1);
   gcc_checking_assert (vr1.operands == shared_lookup_references);
 }
 
@@ -2629,7 +2648,7 @@ vn_reference_lookup (tree op, tree vuse,
(vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse,
vn_reference_lookup_2,
vn_reference_lookup_3,
-   vuse_ssa_val, &vr1);
+   vuse_valueize, &vr1);
   gcc_checking_assert (vr1.operands == shared_lookup_references);
   if (wvnresult)
{
Index: gcc/testsuite/gfortran.dg/pr87217.f
===
--- gcc/testsuite/gfortran.dg/pr87217.f (nonexistent)
+++ gcc/testsuite/gfortran.dg/pr87217.f (working copy)
@@ -0,0 +1,32 @@
+! { dg-do compile }
+! { dg-options "-O3" }
+  implicit real*8 (a-h,o-z)
+  common clop6(3),dps(6),aml6(6,6)
+  dimension y1(3)
+  dimension dclo(3)
+  dimension dx(3),dy(3)
+  save
+  do 80 ii=1,itco
+y1(3)=dps(1)
+do 40 l=1,3
+  dy(l)=clop6(l)-y1(l)
+   40   continue
+dczp=abs(dy(3))
+if(dcx.le.c1m10.and.dcz.le.c1m10.and.dcxp.le.c1m10.and.dczp
+ +  .le.c1m10.and.dcy.le.c1m10.and.dcyp.le.c1m10) goto 90
+   80 continue
+  write(6) itco
+  ii=itco
+   90 continue
+  if(ii.ne.itco) then
+do 65 k=1,3
+  do 55 j=1,3
+jj=2*j
+kk=2*k
+dclo(k)=aml6(kk-1,jj-1)*dx(j)+dclo(k)
+dclo(k)=aml6(kk-1,jj)*dy(j)+dclo(k)
+   55 continue
+   65   continue
+  endif
+  end
+


Re: [patch, fortan] PR87103 - [OOP] ICE in gfc_new_symbol() due to overlong symbol name

2018-09-05 Thread Bernhard Reutner-Fischer
On Wed, 5 Sep 2018 at 03:30, Jerry DeLisle  wrote:
>
> On 09/04/2018 10:43 AM, Bernhard Reutner-Fischer wrote:
> > On Tue, 4 Sep 2018 at 18:43, Andrew Benson  
> > wrote:
> >>
> >> As suggested by Janus, PR87103 is easily fixed by the attached patch which
> >> increases GFC_MAX_SYMBOL_LEN to 76 (sufficient to hold the maximum allowed 
> >> F08
> >> symbol length of 63, plus a null terminator, plus the "__tmp_class_" 
> >> prefix).
> >
> > This is so much wrong.
> > Note that this will be fixed properly by the changes contained in the
> > https://gcc.gnu.org/git/?p=gcc.git;a=shortlog;h=refs/heads/aldot/fortran-fe-stringpool
> > branch.
> > There we keep the GFC_MAX_SYMBOL_LEN at 63 proper but use an internal
> > buffer double that size which in turn is sufficient to hold all
> > compiler-generated identifiers.
> > See gfc_get_string() even in current TOT.
> >
> > Maybe we should bite the bullet and start to merge the stringpool
> > changes now instead of this hack?
>
> It all makes sense to me, please proceed. (my 2 cents worth)

Ok so i will reread the fortran-fe-stringpool series and submit it
here for review.

Let's return to the issue at hand for a moment, though.
I tested the attached alternate fix on top of the
fortran-fe-stringpool branch where it fixes PR87103.
Maybe somebody has spare cycles to test it on top of current trunk?

thanks,

[PATCH,FORTRAN] PR87103: Remove max symbol length check from gfc_new_symbol

gfc_match_name does check for too long names already. Since
gfc_new_symbol is also called for symbols with internal names containing
compiler-generated prefixes, these internal names can easily exceed the
max_identifier_length mandated by the standard.

gcc/fortran/ChangeLog

2018-09-04  Bernhard Reutner-Fischer  

PR fortran/87103
* expr.c (gfc_check_conformance): Check vsnprintf for truncation.
* iresolve.c (gfc_get_string): Likewise.
* symbol.c (gfc_new_symbol): Remove check for maximum symbol
name length.  Remove redundant 0 setting of new calloc()ed
gfc_symbol.
Remove max symbol length check from gfc_new_symbol

gfc_match_name does check for too long names already. Since
gfc_new_symbol is also called for symbols with internal names containing
compiler-generated prefixes, these internal names can easily exceed the
max_identifier_length mandated by the standard.

gcc/fortran/ChangeLog

2018-09-04  Bernhard Reutner-Fischer  

	PR fortran/87103
	* expr.c (gfc_check_conformance): Check vsnprintf for truncation.
	* iresolve.c (gfc_get_string): Likewise.
	* symbol.c (gfc_new_symbol): Remove check for maximum symbol
	name length.  Remove redundant 0 setting of new calloc()ed
	gfc_symbol.

diff --git a/gcc/fortran/expr.c b/gcc/fortran/expr.c
index c5bf822cd24..6b5671390ec 100644
--- a/gcc/fortran/expr.c
+++ b/gcc/fortran/expr.c
@@ -3225,8 +3225,10 @@ gfc_check_conformance (gfc_expr *op1, gfc_expr *op2, const char *optype_msgid, .
 return true;
 
   va_start (argp, optype_msgid);
-  vsnprintf (buffer, 240, optype_msgid, argp);
+  d = vsnprintf (buffer, sizeof (buffer), optype_msgid, argp);
   va_end (argp);
+  if (d < 1 || d >= (int) sizeof (buffer)) /* Reject truncation.  */
+gfc_internal_error ("optype_msgid overflow: %d", d);
 
   if (op1->rank != op2->rank)
 {
diff --git a/gcc/fortran/iresolve.c b/gcc/fortran/iresolve.c
index 61663fec7e5..d7bd0545173 100644
--- a/gcc/fortran/iresolve.c
+++ b/gcc/fortran/iresolve.c
@@ -60,9 +60,12 @@ gfc_get_string (const char *format, ...)
 }
   else
 {
+  int ret;
   va_start (ap, format);
-  vsnprintf (temp_name, sizeof (temp_name), format, ap);
+  ret = vsnprintf (temp_name, sizeof (temp_name), format, ap);
   va_end (ap);
+  if (ret < 1 || ret >= (int) sizeof (temp_name)) /* Reject truncation.  */
+	gfc_internal_error ("identifier overflow: %d", ret);
   temp_name[sizeof (temp_name) - 1] = 0;
   str = temp_name;
 }
diff --git a/gcc/fortran/symbol.c b/gcc/fortran/symbol.c
index cde34c67482..fc3354f0457 100644
--- a/gcc/fortran/symbol.c
+++ b/gcc/fortran/symbol.c
@@ -3142,25 +3142,9 @@ gfc_new_symbol (const char *name, gfc_namespace *ns)
   gfc_clear_ts (&p->ts);
   gfc_clear_attr (&p->attr);
   p->ns = ns;
-
   p->declared_at = gfc_current_locus;
-
-  if (strlen (name) > GFC_MAX_SYMBOL_LEN)
-gfc_internal_error ("new_symbol(): Symbol name too long");
-
   p->name = gfc_get_string ("%s", name);
 
-  /* Make sure flags for symbol being C bound are clear initially.  */
-  p->attr.is_bind_c = 0;
-  p->attr.is_iso_c = 0;
-
-  /* Clear the ptrs we may need.  */
-  p->common_block = NULL;
-  p->f2k_derived = NULL;
-  p->assoc = NULL;
-  p->dt_next = NULL;
-  p->fn_result_spec = 0;
-
   return p;
 }
 


Committed: io/async.h: Use __gthread_mutex_t, not pthread_mutex_t.

2018-09-05 Thread Hans-Peter Nilsson
These pthread_mutex_t were obviously meant to be
__gthread_mutex_t.  See other declarations.  Not being that,
broke cris-elf build at r264070, restored with this patch.  Also
regtested on native x86_64-pc-linux-gnu.

I'm not sure know why no other bare-iron target saw this, but
perhaps it's because my newlib is a bit dated (...looks... yes,
looks like newlib has grown pthreads since then, but that
doesn't excuse this flaw).  Still, there may be other similar
flaws behind #ifdefs, this is just a build-fixing patch.

Committed as obvious.

libgfortran:
* io/async.h: Use __gthread_mutex_t, not pthread_mutex_t.

Index: libgfortran/io/async.h
===
--- libgfortran/io/async.h  (revision 264070)
+++ libgfortran/io/async.h  (working copy)
@@ -337,8 +337,8 @@ struct adv_cond
 
 typedef struct async_unit
 {
-  pthread_mutex_t io_lock;   /* Lock for doing actual I/O. */
-  pthread_mutex_t lock;  /* Lock for manipulating the queue structure.  */
+  __gthread_mutex_t io_lock;   /* Lock for doing actual I/O. */
+  __gthread_mutex_t lock;  /* Lock for manipulating the queue structure.  
*/
   bool empty;
   struct
   {

brgds, H-P


Re: Committed: io/async.h: Use __gthread_mutex_t, not pthread_mutex_t.

2018-09-05 Thread Bernhard Reutner-Fischer
On Wed, 5 Sep 2018 at 12:35, Hans-Peter Nilsson
 wrote:
>
> These pthread_mutex_t were obviously meant to be
> __gthread_mutex_t.  See other declarations.  Not being that,
> broke cris-elf build at r264070, restored with this patch.  Also
> regtested on native x86_64-pc-linux-gnu.
>
> I'm not sure know why no other bare-iron target saw this, but
> perhaps it's because my newlib is a bit dated (...looks... yes,
> looks like newlib has grown pthreads since then, but that
> doesn't excuse this flaw).  Still, there may be other similar
> flaws behind #ifdefs, this is just a build-fixing patch.

Thanks for the fix!
I mentioned the need to fix this in
https://gcc.gnu.org/ml/fortran/2018-09/msg6.html
and i hope Thomas and Nicolas will take care of the rest of the
comments soonish.

cheers,


[wwwdocs] Document 87137 fix

2018-09-05 Thread Nathan Sidwell

Gerald,
this documents the fix for pr87137.  Discovered as a GCC-8 regression, 
turned out to be an ABI bug.  Decided to fix the entire bug in one go. 
Are these changes.html changes ok?


Patch discussion at https://gcc.gnu.org/ml/gcc-patches/2018-08/msg01900.html

nathan
--
Nathan Sidwell
Index: gcc-8/changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-8/changes.html,v
retrieving revision 1.94
diff -r1.94 changes.html
1351a1352,1377
> 
> GCC 8.3
> 
> GCC 8.3 is not yet released.
> 
> Windows
> 
> 
>   A C++ Microsoft ABI bitfield layout
>   bug, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87137";>PR87137
>   has been fixed.  A non-field declaration could cause the current
>   bitfield allocation unit to be completed, incorrectly placing a
>   following bitfield into a new allocation unit.  Microsoft ABI is
>   selected for:
>   
> Mingw targets
> PowerPC, IA-32 or x86-64 targets
>   when -mms-bitfields option is specified
>   or __attribute__((ms_struct)) is used
> SuperH targets when the -mhitachi option is
>   specified, or __attribute__((renesas)) is used
>   
>   GCC 8 introduced additional cases of this defect, but rather than
>   resolve only those regressions, it was decided to resolve all the
>   case of this defect at once.
> 
Index: gcc-9/changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-9/changes.html,v
retrieving revision 1.19
diff -r1.19 changes.html
212c212
< 
---
> Windows
213a214,228
> 
>   A C++ Microsoft ABI bitfield layout
>   bug, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87137";>PR87137
>   has been fixed.  A non-field declaration could cause the current
>   bitfield allocation unit to be completed, incorrectly placing a
>   following bitfield into a new allocation unit.  Microsoft ABI is
>   selected for:
>   
> Mingw targets
> PowerPC, IA-32 or x86-64 targets
>   when -mms-bitfields option is specified
>   or __attribute__((ms_struct)) is used
> SuperH targets when the -mhitachi option is
>   specified, or __attribute__((renesas)) is used
>   


For our betterment

2018-09-05 Thread Evan Kachukwu
My pleasure to communicate with you through this medium. I know you will be 
surprise reading my email message. However, it is my sincere pleasure at this 
moment to exhibit my total confidence bestowed on you in accordance of which I 
am fully convinced that you will really be of immense assistance as my future 
friend and business partner. Well, i hope my letter to you will be given proper 
attention despite the fact we have not known each other before. knowing fully 
well it take a minute, an hour or even a day to know somebody and also 
establish an everlasting relationship between your family and my family. 

I want to go into self investment in your country in your field or in your area 
of specialization or any lucrative business of your choice.  My main purpose of 
relating to you is to assist me establish, conduct and manage my investment 
project in your country, since i will not be present in day-to-day running of 
the business, do to my active function in certain Governmental activities.

With my position I urge you to keep the business confidential. The level of 
your confidentiality will determine the success of the project. For the sake of 
the investment you are required to create an avenue to trust one another. I 
want to know if we can convey the trust to each other without sabotage of 
contract and if you are eligible for handling such project so i can send you 
the full message and also let you know full details of the message.

I will be glad to get your positive response.

Best Regards,
Engiineer Evans Kachuukwu



[PATCH] Maybe fix PR87134

2018-09-05 Thread Richard Biener


The following makes sure to call the default CTOR when emplacing a
vec<> in the avail hash-map.  Certainly the intent was to zero-initialize
the m_vec member.

It looks like some versions of clang do not agree that vec<> is POD
so that might also help the reported FreeBSD issue.

Bootstrap running on x86_64-unknown-linux-gnu, will apply shortly.

Richard.

2018-09-05  Richard Biener  

PR bootstrap/87134
* tree-ssa-sccvn.c (rpo_elim::eliminate_push_avail): Make sure
to zero-init the emplaced vec.

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c(revision 264123)
+++ gcc/tree-ssa-sccvn.c(working copy)
@@ -5798,7 +5798,7 @@ rpo_elim::eliminate_push_avail (basic_bl
   vec > &av = m_rpo_avail.get_or_insert (valnum, &existed);
   if (!existed)
 {
-  new (&av) vec >;
+  new (&av) vec >();
   av.reserve_exact (2);
 }
   av.safe_push (std::make_pair (bb->index, SSA_NAME_VERSION (leader)));


Re: [PATCH] Maybe fix PR87134

2018-09-05 Thread Jakub Jelinek
On Wed, Sep 05, 2018 at 12:55:55PM +0200, Richard Biener wrote:
> 
> The following makes sure to call the default CTOR when emplacing a
> vec<> in the avail hash-map.  Certainly the intent was to zero-initialize
> the m_vec member.

Guess it would be nice to see what older versions of system g++ do with
that.
At least from what I remember last time, value-initialization vs.
zero-initialization vs. no initialization in C++ is heavily dependent on
compiler version and -std=c++NN version, there have been many bugs in the
past and changes between standard versions.

> 2018-09-05  Richard Biener  
> 
>   PR bootstrap/87134
>   * tree-ssa-sccvn.c (rpo_elim::eliminate_push_avail): Make sure
>   to zero-init the emplaced vec.
> 
> Index: gcc/tree-ssa-sccvn.c
> ===
> --- gcc/tree-ssa-sccvn.c  (revision 264123)
> +++ gcc/tree-ssa-sccvn.c  (working copy)
> @@ -5798,7 +5798,7 @@ rpo_elim::eliminate_push_avail (basic_bl
>vec > &av = m_rpo_avail.get_or_insert (valnum, 
> &existed);
>if (!existed)
>  {
> -  new (&av) vec >;
> +  new (&av) vec >();
>av.reserve_exact (2);
>  }
>av.safe_push (std::make_pair (bb->index, SSA_NAME_VERSION (leader)));

Jakub


Re: [PATCH] Group switch cases in switch lowering (PR tree-optimization/87205).

2018-09-05 Thread Martin Liška
On 09/05/2018 11:31 AM, Richard Biener wrote:
> On Wed, Sep 5, 2018 at 10:31 AM Martin Liška  wrote:
>>
>> Hi.
>>
>> It's beneficial to group cases before switch lowering machinery
>> is making a decision what to do with a switch.
>>
>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>>
>> Ready to be installed?
> 
> Hmm, do we want to do this at O0?  There it should be redundant given
> CFG build already performs it?  We also perform it at .optimized
> (execute_cleanup_cfg_post_optimizing) plus when we redirect some
> edges and then call record_case_labels.

Sure, I'll not do it with O0. I'm going to install the patch.

Martin

> 
> Otherwise OK.
> 
> Richard.
> 
>> Martin
>>
>>
>> gcc/ChangeLog:
>>
>> 2018-09-04  Martin Liska  
>>
>> PR tree-optimization/87205
>> * tree-switch-conversion.c (pass_lower_switch::execute):
>> Group cases for switch statements.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2018-09-04  Martin Liska  
>>
>> PR tree-optimization/87205
>> * gcc.dg/tree-ssa/pr87205-2.c: New test.
>> * gcc.dg/tree-ssa/pr87205.c: New test.
>> ---
>>  gcc/testsuite/gcc.dg/tree-ssa/pr87205-2.c | 18 ++
>>  gcc/testsuite/gcc.dg/tree-ssa/pr87205.c   | 21 +
>>  gcc/tree-switch-conversion.c  |  8 ++--
>>  3 files changed, 45 insertions(+), 2 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr87205-2.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr87205.c
>>
>>



Re: [PATCH] Maybe fix PR87134

2018-09-05 Thread Richard Biener
On Wed, 5 Sep 2018, Jakub Jelinek wrote:

> On Wed, Sep 05, 2018 at 12:55:55PM +0200, Richard Biener wrote:
> > 
> > The following makes sure to call the default CTOR when emplacing a
> > vec<> in the avail hash-map.  Certainly the intent was to zero-initialize
> > the m_vec member.
> 
> Guess it would be nice to see what older versions of system g++ do with
> that.
> At least from what I remember last time, value-initialization vs.
> zero-initialization vs. no initialization in C++ is heavily dependent on
> compiler version and -std=c++NN version, there have been many bugs in the
> past and changes between standard versions.

It looks like g++ 4.8 puts extra zero-init with () but not without:

-  (void) (struct vec *) operator new (8, (void *) TARGET_EXPR ) >;
+  (void) (TARGET_EXPR >;, TARGET_EXPR )>;;, (struct vec *) D.107095 != 0B ? *(struct vec *) D.107095 = 
{.m_vec=0B};, (struct vec *) D.107095; : (struct vec *) D.107095;) >;

but yes, another option would be to explicitely do the following.
Cheaper av.m_vec = NULL doesn't work because m_vec is poisoned...

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c(revision 264123)
+++ gcc/tree-ssa-sccvn.c(working copy)
@@ -5798,7 +5798,8 @@ rpo_elim::eliminate_push_avail (basic_bl
   vec > &av = m_rpo_avail.get_or_insert (valnum, 
&existed);
   if (!existed)
 {
-  new (&av) vec >;
+  new (&av) vec >();
+  memset (&av, 0, sizeof (vec >));
   av.reserve_exact (2);
 }
   av.safe_push (std::make_pair (bb->index, SSA_NAME_VERSION (leader)));


> > 2018-09-05  Richard Biener  
> > 
> > PR bootstrap/87134
> > * tree-ssa-sccvn.c (rpo_elim::eliminate_push_avail): Make sure
> > to zero-init the emplaced vec.
> > 
> > Index: gcc/tree-ssa-sccvn.c
> > ===
> > --- gcc/tree-ssa-sccvn.c(revision 264123)
> > +++ gcc/tree-ssa-sccvn.c(working copy)
> > @@ -5798,7 +5798,7 @@ rpo_elim::eliminate_push_avail (basic_bl
> >vec > &av = m_rpo_avail.get_or_insert (valnum, 
> > &existed);
> >if (!existed)
> >  {
> > -  new (&av) vec >;
> > +  new (&av) vec >();
> >av.reserve_exact (2);
> >  }
> >av.safe_push (std::make_pair (bb->index, SSA_NAME_VERSION (leader)));
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PATCH] Maybe fix PR87134

2018-09-05 Thread Richard Biener
On Wed, 5 Sep 2018, Richard Biener wrote:

> On Wed, 5 Sep 2018, Jakub Jelinek wrote:
> 
> > On Wed, Sep 05, 2018 at 12:55:55PM +0200, Richard Biener wrote:
> > > 
> > > The following makes sure to call the default CTOR when emplacing a
> > > vec<> in the avail hash-map.  Certainly the intent was to zero-initialize
> > > the m_vec member.
> > 
> > Guess it would be nice to see what older versions of system g++ do with
> > that.
> > At least from what I remember last time, value-initialization vs.
> > zero-initialization vs. no initialization in C++ is heavily dependent on
> > compiler version and -std=c++NN version, there have been many bugs in the
> > past and changes between standard versions.
> 
> It looks like g++ 4.8 puts extra zero-init with () but not without:
> 
> -  (void) (struct vec *) operator new (8, (void *) TARGET_EXPR  (struct vec *) av>) >;
> +  (void) (TARGET_EXPR  vec *) av>>;, TARGET_EXPR  )>;;, (struct vec *) D.107095 != 0B ? *(struct vec *) D.107095 = 
> {.m_vec=0B};, (struct vec *) D.107095; : (struct vec *) D.107095;) >;

So I'm testing the following then, leaving the placement new untouched
(no init is fine) and then assign from vNULL.

2018-09-05  Richard Biener  

PR bootstrap/87134
* tree-ssa-sccvn.c (rpo_elim::eliminate_push_avail): Make sure
to zero-init the emplaced vec.

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c(revision 264123)
+++ gcc/tree-ssa-sccvn.c(working copy)
@@ -5799,6 +5799,7 @@ rpo_elim::eliminate_push_avail (basic_bl
   if (!existed)
 {
   new (&av) vec >;
+  av = vNULL;
   av.reserve_exact (2);
 }
   av.safe_push (std::make_pair (bb->index, SSA_NAME_VERSION (leader)));


Re: [PATCH] Optimise sqrt reciprocal multiplications

2018-09-05 Thread Kyrill Tkachov

On 04/09/18 17:52, Kyrill Tkachov wrote:
>
> On 04/09/18 15:31, Richard Biener wrote:
>> On Tue, 4 Sep 2018, Kyrill Tkachov wrote:
>>> Hi Richard,
>>>
>>> On 31/08/18 12:07, Richard Biener wrote:
 On Thu, 30 Aug 2018, Kyrill Tkachov wrote:
> Ping.
>
> https://gcc.gnu.org/ml/gcc-patches/2018-08/msg01496.html
>
> Thanks,
> Kyrill
>
> On 23/08/18 18:09, Kyrill Tkachov wrote:
>> Hi Richard,
>>
>> On 23/08/18 11:13, Richard Sandiford wrote:
>>> Kyrill  Tkachov  writes:
 Hi all,

 This patch aims to optimise sequences involving uses of 1.0 / sqrt (a)
 under -freciprocal-math and -funsafe-math-optimizations.
 In particular consider:

 x = 1.0 / sqrt (a);
 r1 = x * x;  // same as 1.0 / a
 r2 = a * x; // same as sqrt (a)

 If x, r1 and r2 are all used further on in the code, this can be
 transformed into:
 tmp1 = 1.0 / a
 tmp2 = sqrt (a)
 tmp3 = tmp1 * tmp2
 x = tmp3
 r1 = tmp1
 r2 = tmp2
>>> Nice optimisation :-)  Someone who knows the pass better should review,
>>> but:
>> Thanks for the review.
>>> There seems to be an implicit assumption that this is a win even
>>> when the r1 and r2 assignments are only conditionally executed.
>>> That's probably true, but it might be worth saying explicitly.
>> I'll admit I had not considered that case.
>> I think it won't make a difference in practice, as the really expensive
>> operations here
>> are the sqrt and the division and they are on the executed path in either
>> case and them
>> becoming independent should be a benefit of its own.
 +/* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
 +
 +static inline bool
 +is_mult_by (gimple *use_stmt, tree def, tree a)
 +{
 +  if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 +  && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
 +{
 +  tree op0 = gimple_assign_rhs1 (use_stmt);
 +  tree op1 = gimple_assign_rhs2 (use_stmt);
 +
 +  return (op0 == def && op1 == a)
 +  || (op0 == a && op1 == def);
 +}
 +  return 0;
 +}
>>> Seems like is_square_of could now be a light-weight wrapper around this.
>> Indeed, I've done the wrapping now.
 @@ -652,6 +669,180 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator
 *def_gsi, tree def)
  occ_head = NULL;
}
+/* Transform sequences like
 +   x = 1.0 / sqrt (a);
 +   r1 = x * x;
 +   r2 = a * x;
 +   into:
 +   tmp1 = 1.0 / a;
 +   tmp2 = sqrt (a);
 +   tmp3 = tmp1 * tmp2;
 +   x = tmp3;
 +   r1 = tmp1;
 +   r2 = tmp2;
 +   depending on the uses of x, r1, r2.  This removes one 
multiplication
 and
 +   allows the sqrt and division operations to execute in parallel.
 +   DEF_GSI is the gsi of the initial division by sqrt that defines
 +   DEF (x in the example abovs). */
 +
 +static void
 +optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
 +{
 +  use_operand_p use_p;
 +  imm_use_iterator use_iter;
 +  gimple *stmt = gsi_stmt (*def_gsi);
 +  tree x = def;
 +  tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
 +  tree div_rhs1 = gimple_assign_rhs1 (stmt);
 +
 +  if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
 +  || TREE_CODE (div_rhs1) != REAL_CST
 +  || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
 +return;
 +
 +  gimple *sqrt_stmt = SSA_NAME_DEF_STMT (orig_sqrt_ssa_name);
 +  if (!is_gimple_call (sqrt_stmt)
 +  || !gimple_call_lhs (sqrt_stmt))
 +return;
 +
 +  gcall *call = as_a  (sqrt_stmt);
>>> Very minor, but:
>>>
>>> gcall *sqrt_stmt
>>>   = dyn_cast  (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
>>> if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
>>>   return;
>>>
>>> would avoid the need for the separate as_a<>, and would mean that
>>> we only call gimple_call_* on gcalls.
>> Ok.
 +  if (has_other_use)
 +{
 +  /* Using the two temporaries tmp1, tmp2 from above
 + the original x is now:
 + x = tmp1 * tmp2.  */
 +  gcc_assert (mult_ssa_name);
 +  gcc_assert (sqr_ssa_name);
 +  gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
 +
 +  tree new_ssa_name
 += make_temp_ssa_name (TREE_TYPE (a), NULL,
 "recip_sqrt_transformed");
 +  gimple *new_stmt
 += gimple_build_assign (new_ssa_name, MULT_EXPR,
>

Re: [PATCH] Optimise sqrt reciprocal multiplications

2018-09-05 Thread Richard Biener
On Wed, 5 Sep 2018, Kyrill Tkachov wrote:

> On 04/09/18 17:52, Kyrill Tkachov wrote:
> >
> > On 04/09/18 15:31, Richard Biener wrote:
> >> On Tue, 4 Sep 2018, Kyrill Tkachov wrote:
> >>> Hi Richard,
> >>>
> >>> On 31/08/18 12:07, Richard Biener wrote:
>  On Thu, 30 Aug 2018, Kyrill Tkachov wrote:
> > Ping.
> >
> > https://gcc.gnu.org/ml/gcc-patches/2018-08/msg01496.html
> >
> > Thanks,
> > Kyrill
> >
> > On 23/08/18 18:09, Kyrill Tkachov wrote:
> >> Hi Richard,
> >>
> >> On 23/08/18 11:13, Richard Sandiford wrote:
> >>> Kyrill  Tkachov  writes:
>  Hi all,
> 
>  This patch aims to optimise sequences involving uses of 1.0 / sqrt
> (a)
>  under -freciprocal-math and -funsafe-math-optimizations.
>  In particular consider:
> 
>  x = 1.0 / sqrt (a);
>  r1 = x * x;  // same as 1.0 / a
>  r2 = a * x; // same as sqrt (a)
> 
>  If x, r1 and r2 are all used further on in the code, this can be
>  transformed into:
>  tmp1 = 1.0 / a
>  tmp2 = sqrt (a)
>  tmp3 = tmp1 * tmp2
>  x = tmp3
>  r1 = tmp1
>  r2 = tmp2
> >>> Nice optimisation :-)  Someone who knows the pass better should
> review,
> >>> but:
> >> Thanks for the review.
> >>> There seems to be an implicit assumption that this is a win even
> >>> when the r1 and r2 assignments are only conditionally executed.
> >>> That's probably true, but it might be worth saying explicitly.
> >> I'll admit I had not considered that case.
> >> I think it won't make a difference in practice, as the really expensive
> >> operations here
> >> are the sqrt and the division and they are on the executed path in
> either
> >> case and them
> >> becoming independent should be a benefit of its own.
>  +/* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
>  +
>  +static inline bool
>  +is_mult_by (gimple *use_stmt, tree def, tree a)
>  +{
>  +  if (gimple_code (use_stmt) == GIMPLE_ASSIGN
>  +  && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
>  +{
>  +  tree op0 = gimple_assign_rhs1 (use_stmt);
>  +  tree op1 = gimple_assign_rhs2 (use_stmt);
>  +
>  +  return (op0 == def && op1 == a)
>  +  || (op0 == a && op1 == def);
>  +}
>  +  return 0;
>  +}
> >>> Seems like is_square_of could now be a light-weight wrapper around
> this.
> >> Indeed, I've done the wrapping now.
>  @@ -652,6 +669,180 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator
>  *def_gsi, tree def)
>   occ_head = NULL;
> }
> +/* Transform sequences like
>  +   x = 1.0 / sqrt (a);
>  +   r1 = x * x;
>  +   r2 = a * x;
>  +   into:
>  +   tmp1 = 1.0 / a;
>  +   tmp2 = sqrt (a);
>  +   tmp3 = tmp1 * tmp2;
>  +   x = tmp3;
>  +   r1 = tmp1;
>  +   r2 = tmp2;
>  +   depending on the uses of x, r1, r2.  This removes one
> multiplication
>  and
>  +   allows the sqrt and division operations to execute in parallel.
>  +   DEF_GSI is the gsi of the initial division by sqrt that defines
>  +   DEF (x in the example abovs). */
>  +
>  +static void
>  +optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
>  +{
>  +  use_operand_p use_p;
>  +  imm_use_iterator use_iter;
>  +  gimple *stmt = gsi_stmt (*def_gsi);
>  +  tree x = def;
>  +  tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
>  +  tree div_rhs1 = gimple_assign_rhs1 (stmt);
>  +
>  +  if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
>  +  || TREE_CODE (div_rhs1) != REAL_CST
>  +  || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
>  +return;
>  +
>  +  gimple *sqrt_stmt = SSA_NAME_DEF_STMT (orig_sqrt_ssa_name);
>  +  if (!is_gimple_call (sqrt_stmt)
>  +  || !gimple_call_lhs (sqrt_stmt))
>  +return;
>  +
>  +  gcall *call = as_a  (sqrt_stmt);
> >>> Very minor, but:
> >>>
> >>> gcall *sqrt_stmt
> >>>   = dyn_cast  (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
> >>> if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
> >>>   return;
> >>>
> >>> would avoid the need for the separate as_a<>, and would mean that
> >>> we only call gimple_call_* on gcalls.
> >> Ok.
>  +  if (has_other_use)
>  +{
>  +  /* Using the two temporaries tmp1, tmp2 from above
>  + the original x is now:
>  + x = tmp1 * tmp2.  */
>  +  gcc_assert (mult_ssa_name);
>  +  gcc_assert (sqr_ssa_name)

[PATCH 01/25] Handle vectors that don't fit in an integer.

2018-09-05 Thread ams

GCN vector sizes range between 64 and 512 bytes, none of which have
correspondingly sized integer modes.  This breaks a number of assumptions
throughout the compiler, but I don't really want to create modes just for this
purpose.

Instead, this patch fixes up the cases that I've found, so far, such that the
compiler tries something else, or fails to optimize, rather than just ICE.

2018-09-05  Andrew Stubbs  
Kwok Cheung Yeung  
Jan Hubicka  
Martin Jambor  

gcc/
* combine.c (gen_lowpart_or_truncate): Return clobber if there is
not a integer mode if the same size as x.
(gen_lowpart_for_combine): Fail if there is no integer mode of the
same size.
* expr.c (expand_expr_real_1): Force first operand to be in memory
if it is a vector register and the result is in BLKmode.
* tree-vect-stmts.c (vectorizable_store): Don't ICE when
int_mode_for_size fails.
(vectorizable_load): Likewise.
---
 gcc/combine.c | 13 -
 gcc/expr.c|  8 
 gcc/tree-vect-stmts.c |  8 
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/gcc/combine.c b/gcc/combine.c
index a2649b6..cbf9dae 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -8621,7 +8621,13 @@ gen_lowpart_or_truncate (machine_mode mode, rtx x)
 {
   /* Bit-cast X into an integer mode.  */
   if (!SCALAR_INT_MODE_P (GET_MODE (x)))
-	x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);
+	{
+	  enum machine_mode imode =
+	int_mode_for_mode (GET_MODE (x)).require ();
+	  if (imode == BLKmode)
+	return gen_rtx_CLOBBER (mode, const0_rtx);
+	  x = gen_lowpart (imode, x);
+	}
   x = simplify_gen_unary (TRUNCATE, int_mode_for_mode (mode).require (),
 			  x, GET_MODE (x));
 }
@@ -11698,6 +11704,11 @@ gen_lowpart_for_combine (machine_mode omode, rtx x)
   if (omode == imode)
 return x;
 
+  /* This can happen when there is no integer mode corresponding
+ to a size of vector mode.  */
+  if (omode == BLKmode)
+goto fail;
+
   /* We can only support MODE being wider than a word if X is a
  constant integer or has a mode the same size.  */
   if (maybe_gt (GET_MODE_SIZE (omode), UNITS_PER_WORD)
diff --git a/gcc/expr.c b/gcc/expr.c
index cd5cf12..776254a 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -10569,6 +10569,14 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 			  || maybe_gt (bitpos + bitsize,
    GET_MODE_BITSIZE (mode2)));
 
+	/* If the result is in BLKmode and the underlying object is a
+	   vector in a register, and the size of the vector is larger than
+	   the largest integer mode, then we must force OP0 to be in memory
+	   as this is assumed in later code.  */
+	if (REG_P (op0) && VECTOR_MODE_P (mode2) && mode == BLKmode
+	&& maybe_gt (bitsize, MAX_FIXED_MODE_SIZE))
+	  must_force_mem = 1;
+
 	/* Handle CONCAT first.  */
 	if (GET_CODE (op0) == CONCAT && !must_force_mem)
 	  {
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 8d94fca..607a2bd 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -6702,12 +6702,12 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 		 supported.  */
 		  unsigned lsize
 		= group_size * GET_MODE_BITSIZE (elmode);
-		  elmode = int_mode_for_size (lsize, 0).require ();
 		  unsigned int lnunits = const_nunits / group_size;
 		  /* If we can't construct such a vector fall back to
 		 element extracts from the original vector type and
 		 element size stores.  */
-		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
+		  if (int_mode_for_size (lsize, 0).exists (&elmode)
+		  && mode_for_vector (elmode, lnunits).exists (&vmode)
 		  && VECTOR_MODE_P (vmode)
 		  && targetm.vector_mode_supported_p (vmode)
 		  && (convert_optab_handler (vec_extract_optab,
@@ -7839,11 +7839,11 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 		 to a larger load.  */
 		  unsigned lsize
 		= group_size * TYPE_PRECISION (TREE_TYPE (vectype));
-		  elmode = int_mode_for_size (lsize, 0).require ();
 		  unsigned int lnunits = const_nunits / group_size;
 		  /* If we can't construct such a vector fall back to
 		 element loads of the original vector type.  */
-		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
+		  if (int_mode_for_size (lsize, 0).exists (&elmode)
+		  && mode_for_vector (elmode, lnunits).exists (&vmode)
 		  && VECTOR_MODE_P (vmode)
 		  && targetm.vector_mode_supported_p (vmode)
 		  && (convert_optab_handler (vec_init_optab, vmode, elmode)


[PATCH 02/25] Propagate address spaces to builtins.

2018-09-05 Thread ams

At present, pointers passed to builtin functions, including atomic operators,
are stripped of their address space properties.  This doesn't seem to be
deliberate, it just omits to copy them.

Not only that, but it forces pointer sizes to Pmode, which isn't appropriate
for all address spaces.

This patch attempts to correct both issues.  It works for GCN atomics and
GCN OpenACC gang-private variables.

2018-09-05  Andrew Stubbs  
Julian Brown  

gcc/
* builtins.c (get_builtin_sync_mem): Handle address spaces.
---
 gcc/builtins.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 58ea747..361361c 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5781,14 +5781,21 @@ static rtx
 get_builtin_sync_mem (tree loc, machine_mode mode)
 {
   rtx addr, mem;
+  int addr_space = TYPE_ADDR_SPACE (POINTER_TYPE_P (TREE_TYPE (loc))
+? TREE_TYPE (TREE_TYPE (loc))
+: TREE_TYPE (loc));
+  scalar_int_mode addr_mode = targetm.addr_space.address_mode (addr_space);
 
-  addr = expand_expr (loc, NULL_RTX, ptr_mode, EXPAND_SUM);
-  addr = convert_memory_address (Pmode, addr);
+  addr = expand_expr (loc, NULL_RTX, addr_mode, EXPAND_SUM);
 
   /* Note that we explicitly do not want any alias information for this
  memory, so that we kill all other live memories.  Otherwise we don't
  satisfy the full barrier semantics of the intrinsic.  */
-  mem = validize_mem (gen_rtx_MEM (mode, addr));
+  mem = gen_rtx_MEM (mode, addr);
+
+  set_mem_addr_space (mem, addr_space);
+
+  mem = validize_mem (mem);
 
   /* The alignment needs to be at least according to that of the mode.  */
   set_mem_align (mem, MAX (GET_MODE_ALIGNMENT (mode),


[PATCH 00/25] AMD GCN Port

2018-09-05 Thread ams
Hi All,

This patch series contains the non-OpenACC/OpenMP portions of a port to
AMD GCN3 and GCN5 GPU processors.  It's sufficient to build
single-threaded programs, with vectorization in the usual way.  C and
Fortran are supported, C++ is not supported, and the other front-ends
have not been tested.  The OpenACC/OpenMP/libgomp portion will follow,
once this is committed, eventually.

If the Steering Committee approve the port and the patches are accepted
then I'd like to see the port make it into GCC 9, please.

The patches, as they are, are not perfect; I still want to massage the
test results a little, but I'd like to find out about big review issues
sooner rather than later.

I've posted the middle-end patches first.  Some of these are target
independent issues, but are included in the series because they are
required for GCN to work properly.

I've then split the back-end patches into libgfortran, libgcc, and the
back-end proper.

Finally I have the testsuite tweaks and fix ups.  I don't have any
GCN-specific tests as yet; the existing tests serve to demonstrate
correctness, and I anticipate future GCN tests being largely
optimization issues, such as instruction selection and vectorization
coverage.

I'm aware that I still need to make the necessary documentation
adjustments.

Thanks in advance

-- 
Andrew Stubbs
Mentor Graphics / CodeSourcery


[PATCH 07/25] [pr82089] Don't sign-extend SFV 1 in BImode

2018-09-05 Thread ams

This is an update of the patch posted to PR82089 long ago.  We ran into the
same bug on GCN, so we need this fixed as part of this series.

2018-09-05  Andrew Stubbs  
Tom de Vries  

PR82089

gcc/
* expmed.c (emit_cstore): Fix handling of result_mode == BImode and
STORE_FLAG_VALUE == 1.
---
 gcc/expmed.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/gcc/expmed.c b/gcc/expmed.c
index 29ce10b..0b87fdc 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -5464,11 +5464,18 @@ emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
  If STORE_FLAG_VALUE does not have the sign bit set when
  interpreted in MODE, we can do this conversion as unsigned, which
  is usually more efficient.  */
-  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
+  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode)
+  || (result_mode == BImode && int_target_mode != BImode))
 {
-  convert_move (target, subtarget,
-		val_signbit_known_clear_p (result_mode,
-	   STORE_FLAG_VALUE));
+  gcc_assert (GET_MODE_SIZE (result_mode) != 1
+		  || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
+  bool unsignedp
+	= (GET_MODE_SIZE (result_mode) == 1
+	   ? STORE_FLAG_VALUE == 1
+	   : val_signbit_known_clear_p (result_mode, STORE_FLAG_VALUE));
+
+  convert_move (target, subtarget, unsignedp);
+
   op0 = target;
   result_mode = int_target_mode;
 }


[PATCH 04/25] SPECIAL_REGNO_P

2018-09-05 Thread ams

GCN has some registers which are special purpose, but not "fixed" because we
want the register allocator to track their usage and select alternatives that
use different special registers (e.g. scalar cc vs. vector cc).

Sometimes this leads the regrename pass to ICE.  Quite how it gets confused is
not well understood, but considering such registers for renaming is surely not
useful.

This patch creates a new macro SPECIAL_REGNO_P which disables regrename.  In
other words, the register is fixed once allocated.

2018-09-05  Kwok Cheung Yeung  

gcc/
* defaults.h (SPECIAL_REGNO_P): Define to false by default.
* regrename.c (check_new_reg_p): Do not rename to a special register.
(rename_chains): Do not rename special registers.
---
 gcc/defaults.h  | 4 
 gcc/regrename.c | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/gcc/defaults.h b/gcc/defaults.h
index 9035b33..40ecf61 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -1198,6 +1198,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define NO_FUNCTION_CSE false
 #endif
 
+#ifndef SPECIAL_REGNO_P
+#define SPECIAL_REGNO_P(REGNO) false
+#endif
+
 #ifndef HARD_REGNO_RENAME_OK
 #define HARD_REGNO_RENAME_OK(FROM, TO) true
 #endif
diff --git a/gcc/regrename.c b/gcc/regrename.c
index 8424093..92e403e 100644
--- a/gcc/regrename.c
+++ b/gcc/regrename.c
@@ -320,6 +320,7 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg,
 if (TEST_HARD_REG_BIT (this_unavailable, new_reg + i)
 	|| fixed_regs[new_reg + i]
 	|| global_regs[new_reg + i]
+	|| SPECIAL_REGNO_P (new_reg + i)
 	/* Can't use regs which aren't saved by the prologue.  */
 	|| (! df_regs_ever_live_p (new_reg + i)
 	&& ! call_used_regs[new_reg + i])
@@ -480,6 +481,7 @@ rename_chains (void)
 	continue;
 
   if (fixed_regs[reg] || global_regs[reg]
+	  || SPECIAL_REGNO_P (reg)
 	  || (!HARD_FRAME_POINTER_IS_FRAME_POINTER && frame_pointer_needed
 	  && reg == HARD_FRAME_POINTER_REGNUM)
 	  || (HARD_FRAME_POINTER_IS_FRAME_POINTER && frame_pointer_needed


[PATCH 03/25] Improve TARGET_MANGLE_DECL_ASSEMBLER_NAME.

2018-09-05 Thread ams

The HSA GPU drivers can't cope with binaries that have the same symbol defined
multiple times, even though the names are not exported.  This happens whenever
there are file-scope static variables with matching names.  I believe it's also
an issue with switch tables.

This is a bug, but outside our control, so we must work around it when multiple
translation units have the same symbol defined.

Therefore, we've implemented name mangling via
TARGET_MANGLE_DECL_ASSEMBLER_NAME, but found some places where the middle-end
assumes that the decl name matches the name in the source.

This patch fixes up those cases by falling back to comparing the unmangled
name, when a lookup fails.

2018-09-05  Julian Brown  

gcc/
* cgraphunit.c (handle_alias_pairs): Scan for aliases by DECL_NAME if
decl assembler name doesn't match.

gcc/c-family/
* c-pragma.c (maye_apply_pending_pragma_weaks): Scan for aliases with
DECL_NAME if decl assembler name doesn't match.
---
 gcc/c-family/c-pragma.c | 14 ++
 gcc/cgraphunit.c| 15 +++
 2 files changed, 29 insertions(+)

diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index 84e4341..1c0be0c 100644
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -323,6 +323,20 @@ maybe_apply_pending_pragma_weaks (void)
 	continue;
 
   target = symtab_node::get_for_asmname (id);
+
+  /* Try again if ID didn't match an assembler name by looking through
+	 decl names.  */
+  if (!target)
+	{
+	  symtab_node *node;
+	  FOR_EACH_SYMBOL (node)
+	if (strcmp (IDENTIFIER_POINTER (id), node->name ()) == 0)
+	  {
+	target = node;
+		break;
+	  }
+	}
+
   decl = build_decl (UNKNOWN_LOCATION,
 			 target ? TREE_CODE (target->decl) : FUNCTION_DECL,
 			 alias_id, default_function_type);
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index ec490d7..fc3f34e 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -1393,6 +1393,21 @@ handle_alias_pairs (void)
 {
   symtab_node *target_node = symtab_node::get_for_asmname (p->target);
 
+  /* If the alias target didn't match a symbol's assembler name (e.g.
+	 because it has been mangled by TARGET_MANGLE_DECL_ASSEMBLER_NAME),
+	 try again with the unmangled decl name.  */
+  if (!target_node)
+	{
+	  symtab_node *node;
+	  FOR_EACH_SYMBOL (node)
+	if (strcmp (IDENTIFIER_POINTER (p->target),
+			node->name ()) == 0)
+	  {
+		target_node = node;
+		break;
+	  }
+	}
+
   /* Weakrefs with target not defined in current unit are easy to handle:
 	 they behave just as external variables except we need to note the
 	 alias flag to later output the weakref pseudo op into asm file.  */


[PATCH 05/25] Add sorry_at diagnostic function.

2018-09-05 Thread ams

The plain "sorry" diagnostic only gives the "current" location, which is
typically the last line of the function or translation unit by time we get to
the back end.

GCN uses "sorry" to report unsupported language features, such as static
constructors, so it's useful to have a "sorry_at" variant.

This patch implements "sorry_at" according to the pattern of the other "at"
variants.

2018-09-05  Andrew Stubbs  

gcc/
* diagnostic-core.h (sorry_at): New prototype.
* diagnostic.c (sorry_at): New function.
---
 gcc/diagnostic-core.h |  1 +
 gcc/diagnostic.c  | 11 +++
 2 files changed, 12 insertions(+)

diff --git a/gcc/diagnostic-core.h b/gcc/diagnostic-core.h
index e4ebe00..80ff395 100644
--- a/gcc/diagnostic-core.h
+++ b/gcc/diagnostic-core.h
@@ -96,6 +96,7 @@ extern bool permerror (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern bool permerror (rich_location *, const char *,
    ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern void sorry (const char *, ...) ATTRIBUTE_GCC_DIAG(1,2);
+extern void sorry_at (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern void inform (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern void inform (rich_location *, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern void inform_n (location_t, unsigned HOST_WIDE_INT, const char *,
diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index aae0934..56a1140 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -1443,6 +1443,17 @@ sorry (const char *gmsgid, ...)
   va_end (ap);
 }
 
+/* Same as above, but use location LOC instead of input_location.  */
+void
+sorry_at (location_t loc, const char *gmsgid, ...)
+{
+  va_list ap;
+  va_start (ap, gmsgid);
+  rich_location richloc (line_table, loc);
+  diagnostic_impl (&richloc, -1, gmsgid, &ap, DK_SORRY);
+  va_end (ap);
+}
+
 /* Return true if an error or a "sorry" has been seen.  Various
processing is disabled after errors.  */
 bool


[PATCH 06/25] Remove constant vec_select restriction.

2018-09-05 Thread ams

The vec_select operator is documented to require a const_int for the lane
selector operand, but GCN has an instruction that can select the lane at
runtime, so it seems reasonable to remove this restriction.

This patch simply replaces assertions that the operand is constant with early
exits from the optimizers.  I think it's reasonable that vec_select with a
non-constant operand cannot be optimized, yet.

Also included is the necessary documentation tweak.

2018-09-05  Andrew Stubbs  

gcc/
* doc/rtl.texi: Adjust vec_select description.
* simplify-rtx.c (simplify_binary_operation_1): Allow VEC_SELECT to use
non-constant selectors.
---
 gcc/doc/rtl.texi   | 11 ++-
 gcc/simplify-rtx.c |  9 +++--
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi
index 5b1e695..0695ad2 100644
--- a/gcc/doc/rtl.texi
+++ b/gcc/doc/rtl.texi
@@ -2939,11 +2939,12 @@ a set bit indicates it is taken from @var{vec1}.
 @item (vec_select:@var{m} @var{vec1} @var{selection})
 This describes an operation that selects parts of a vector.  @var{vec1} is
 the source vector, and @var{selection} is a @code{parallel} that contains a
-@code{const_int} for each of the subparts of the result vector, giving the
-number of the source subpart that should be stored into it.
-The result mode @var{m} is either the submode for a single element of
-@var{vec1} (if only one subpart is selected), or another vector mode
-with that element submode (if multiple subparts are selected).
+@code{const_int} (or another expression, if the selection can be made at
+runtime) for each of the subparts of the result vector, giving the number of
+the source subpart that should be stored into it.  The result mode @var{m} is
+either the submode for a single element of @var{vec1} (if only one subpart is
+selected), or another vector mode with that element submode (if multiple
+subparts are selected).
 
 @findex vec_concat
 @item (vec_concat:@var{m} @var{x1} @var{x2})
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index a9f2586..b4c6883 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -3604,7 +3604,10 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 	  gcc_assert (mode == GET_MODE_INNER (GET_MODE (trueop0)));
 	  gcc_assert (GET_CODE (trueop1) == PARALLEL);
 	  gcc_assert (XVECLEN (trueop1, 0) == 1);
-	  gcc_assert (CONST_INT_P (XVECEXP (trueop1, 0, 0)));
+
+	  /* We can't reason about selections made at runtime.  */
+	  if (!CONST_INT_P (XVECEXP (trueop1, 0, 0)))
+	return 0;
 
 	  if (vec_duplicate_p (trueop0, &elt0))
 	return elt0;
@@ -3703,7 +3706,9 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 		{
 		  rtx x = XVECEXP (trueop1, 0, i);
 
-		  gcc_assert (CONST_INT_P (x));
+		  if (!CONST_INT_P (x))
+		return 0;
+
 		  RTVEC_ELT (v, i) = CONST_VECTOR_ELT (trueop0,
 		   INTVAL (x));
 		}


[PATCH 09/25] Elide repeated RTL elements.

2018-09-05 Thread ams

GCN's 64-lane vectors tend to make RTL dumps very long.  This patch makes them
far more bearable by eliding long sequences of the same element into "repeated"
messages.

2018-09-05  Andrew Stubbs  
Jan Hubicka  
Martin Jambor  

* print-rtl.c (print_rtx_operand_codes_E_and_V): Print how many times
the same elements are repeated rather than printing all of them.
---
 gcc/print-rtl.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c
index 5dd2e31..8a04264 100644
--- a/gcc/print-rtl.c
+++ b/gcc/print-rtl.c
@@ -370,7 +370,20 @@ rtx_writer::print_rtx_operand_codes_E_and_V (const_rtx in_rtx, int idx)
 	m_sawclose = 1;
 
   for (int j = 0; j < XVECLEN (in_rtx, idx); j++)
-	print_rtx (XVECEXP (in_rtx, idx, j));
+	{
+	  int j1;
+
+	  print_rtx (XVECEXP (in_rtx, idx, j));
+	  for (j1 = j + 1; j1 < XVECLEN (in_rtx, idx); j1++)
+	if (XVECEXP (in_rtx, idx, j) != XVECEXP (in_rtx, idx, j1))
+	  break;
+
+	  if (j1 != j + 1)
+	{
+	  fprintf (m_outfile, " repeated %ix", j1 - j);
+	  j = j1 - 1;
+	}
+	}
 
   m_indent -= 2;
 }


[PATCH 08/25] Fix co-array allocation

2018-09-05 Thread ams

The Fortran front-end has a bug in which it uses "int" values for "size_t"
parameters.  I don't know why this isn't problem for all 64-bit architectures,
but GCN ends up with the data in the wrong argument register and/or stack slot,
and bad things happen.

This patch corrects the issue by setting the correct type.

2018-09-05  Kwok Cheung Yeung  

gcc/fortran/
* trans-expr.c (gfc_trans_structure_assign): Ensure that
integer_zero_node is of sizetype when used as the first
argument of a call to _gfortran_caf_register.
* trans-intrinsic.c (conv_intrinsic_event_query): Convert computed
index to a size_t type.
* trans-stmt.c (gfc_trans_event_post_wait): Likewise.
---
 gcc/fortran/trans-expr.c  | 2 +-
 gcc/fortran/trans-intrinsic.c | 3 ++-
 gcc/fortran/trans-stmt.c  | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c
index 56ce98c..91be3fb 100644
--- a/gcc/fortran/trans-expr.c
+++ b/gcc/fortran/trans-expr.c
@@ -7729,7 +7729,7 @@ gfc_trans_structure_assign (tree dest, gfc_expr * expr, bool init, bool coarray)
 		 suffices to recognize the data as array.  */
 	  if (rank < 0)
 		rank = 1;
-	  size = integer_zero_node;
+	  size = fold_convert (sizetype, integer_zero_node);
 	  desc = field;
 	  gfc_add_modify (&block, gfc_conv_descriptor_rank (desc),
 			  build_int_cst (signed_char_type_node, rank));
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index b2cea93..23c13da 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -10732,7 +10732,8 @@ conv_intrinsic_event_query (gfc_code *code)
 	  tmp = fold_build2_loc (input_location, MULT_EXPR,
  integer_type_node, extent, tmp);
 	  index = fold_build2_loc (input_location, PLUS_EXPR,
-   integer_type_node, index, tmp);
+   size_type_node, index,
+   fold_convert (size_type_node, tmp));
 	  if (i < ar->dimen - 1)
 		{
 		  ubound = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[i]);
diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c
index 795d3cc..2c59675 100644
--- a/gcc/fortran/trans-stmt.c
+++ b/gcc/fortran/trans-stmt.c
@@ -1096,7 +1096,8 @@ gfc_trans_event_post_wait (gfc_code *code, gfc_exec_op op)
 	  tmp = fold_build2_loc (input_location, MULT_EXPR,
  integer_type_node, extent, tmp);
 	  index = fold_build2_loc (input_location, PLUS_EXPR,
-   integer_type_node, index, tmp);
+   size_type_node, index,
+   fold_convert (size_type_node, tmp));
 	  if (i < ar->dimen - 1)
 	{
 	  ubound = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[i]);


[PATCH 11/25] Simplify vec_merge according to the mask.

2018-09-05 Thread ams

This patch was part of the original patch we acquired from Honza and Martin.

It simplifies vector elements that are inactive, according to the mask.

2018-09-05  Jan Hubicka  
Martin Jambor  

* simplify-rtx.c (simplify_merge_mask): New function.
(simplify_ternary_operation): Use it, also see if VEC_MERGEs with the
same masks are used in op1 or op2.
---
 gcc/simplify-rtx.c | 81 ++
 1 file changed, 81 insertions(+)

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 89487f2..6f27bda 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -5578,6 +5578,65 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val)
   return NULL_RTX;
 }
 
+/* X is an operand number OP of VEC_MERGE operation with MASK.
+   Try to simplify using knowledge that values outside of MASK
+   will not be used.  */
+
+rtx
+simplify_merge_mask (rtx x, rtx mask, int op)
+{
+  gcc_assert (VECTOR_MODE_P (GET_MODE (x)));
+  poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x));
+  if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))
+{
+  if (!side_effects_p (XEXP (x, 1 - op)))
+	return XEXP (x, op);
+}
+  if (side_effects_p (x))
+return NULL_RTX;
+  if (UNARY_P (x)
+  && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits))
+{
+  rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+  if (top0)
+	return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0,
+   GET_MODE (XEXP (x, 0)));
+}
+  if (BINARY_P (x)
+  && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+  && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits))
+{
+  rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+  rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+  if (top0 || top1)
+	return simplify_gen_binary (GET_CODE (x), GET_MODE (x),
+top0 ? top0 : XEXP (x, 0),
+top1 ? top1 : XEXP (x, 1));
+}
+  if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY
+  && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+  && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)
+  && VECTOR_MODE_P (GET_MODE (XEXP (x, 2)))
+  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits))
+{
+  rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+  rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+  rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op);
+  if (top0 || top1)
+	return simplify_gen_ternary (GET_CODE (x), GET_MODE (x),
+ GET_MODE (XEXP (x, 0)),
+ top0 ? top0 : XEXP (x, 0),
+ top1 ? top1 : XEXP (x, 1),
+ top2 ? top2 : XEXP (x, 2));
+}
+  return NULL_RTX;
+}
+
 
 /* Simplify CODE, an operation with result mode MODE and three operands,
OP0, OP1, and OP2.  OP0_MODE was the mode of OP0 before it became
@@ -5967,6 +6026,28 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
 	  && !side_effects_p (op2) && !side_effects_p (op1))
 	return op0;
 
+  if (!side_effects_p (op2))
+	{
+	  rtx top0 = simplify_merge_mask (op0, op2, 0);
+	  rtx top1 = simplify_merge_mask (op1, op2, 1);
+	  if (top0 || top1)
+	return simplify_gen_ternary (code, mode, mode,
+	 top0 ? top0 : op0,
+	 top1 ? top1 : op1, op2);
+	}
+
+  if (GET_CODE (op0) == VEC_MERGE
+	  && rtx_equal_p (op2, XEXP (op0, 2))
+	  && !side_effects_p (XEXP (op0, 1)) && !side_effects_p (op2))
+	return simplify_gen_ternary (code, mode, mode,
+ XEXP (op0, 0), op1, op2);
+
+  if (GET_CODE (op1) == VEC_MERGE
+	  && rtx_equal_p (op2, XEXP (op1, 2))
+	  && !side_effects_p (XEXP (op0, 0)) && !side_effects_p (op2))
+	return simplify_gen_ternary (code, mode, mode,
+ XEXP (op0, 1), op1, op2);
+
   break;
 
 default:


[PATCH 10/25] Convert BImode vectors.

2018-09-05 Thread ams

GCN uses V64BImode to represent vector masks in the middle-end, and DImode
bit-masks to represent them in the back-end.  These must be converted at expand
time and the most convenient way is to simply use a SUBREG.

This works fine except that simplify_subreg needs to be able to convert
immediates, mostly for REG_EQUAL and REG_EQUIV, and currently does not know how
to convert vectors to integers where there is more than one element per byte.

This patch implements such conversions for the cases that we need.

I don't know why this is not a problem for other targets that use BImode
vectors, such as ARM SVE, so it's possible I missed some magic somewhere?

2018-09-05  Andrew Stubbs  

gcc/
* simplify-rtx.c (convert_packed_vector): New function.
(simplify_immed_subreg): Recognised Boolean vectors and call
convert_packed_vector.
---
 gcc/simplify-rtx.c | 76 ++
 1 file changed, 76 insertions(+)

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index b4c6883..89487f2 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -5976,6 +5976,73 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
   return 0;
 }
 
+/* Convert a CONST_INT to a CONST_VECTOR, or vice versa.
+
+   This should only occur for VECTOR_BOOL_MODE types, so the semantics
+   specified by that are assumed.  In particular, the lowest value is
+   in the first byte.  */
+
+static rtx
+convert_packed_vector (fixed_size_mode to_mode, rtx op,
+		   machine_mode from_mode, unsigned int byte,
+		   unsigned int first_elem, unsigned int inner_bytes)
+{
+  /* Sizes greater than HOST_WIDE_INT would need a better implementation.  */
+  gcc_assert (GET_MODE_SIZE (to_mode) <= sizeof (HOST_WIDE_INT));
+
+  if (GET_CODE (op) == CONST_VECTOR)
+{
+  gcc_assert (!VECTOR_MODE_P (to_mode));
+
+  int num_elem = GET_MODE_NUNITS (from_mode).to_constant();
+  int elem_bitsize = (GET_MODE_SIZE (from_mode).to_constant()
+			  * BITS_PER_UNIT) / num_elem;
+  int elem_mask = (1 << elem_bitsize) - 1;
+  HOST_WIDE_INT subreg_mask =
+	(sizeof (HOST_WIDE_INT) == GET_MODE_SIZE (to_mode)
+	 ? -1
+	 : (((HOST_WIDE_INT)1 << (GET_MODE_SIZE (to_mode) * BITS_PER_UNIT))
+	- 1));
+
+  HOST_WIDE_INT val = 0;
+  for (int i = 0; i < num_elem; i++)
+	val |= ((INTVAL (CONST_VECTOR_ELT (op, i)) & elem_mask)
+		<< (i * elem_bitsize));
+
+  val >>= byte * BITS_PER_UNIT;
+  val &= subreg_mask;
+
+  return gen_rtx_CONST_INT (VOIDmode, val);
+}
+  else if (GET_CODE (op) == CONST_INT)
+{
+  /* Subregs of a vector not implemented yet.  */
+  gcc_assert (maybe_eq (GET_MODE_SIZE (to_mode),
+			GET_MODE_SIZE (from_mode)));
+
+  gcc_assert (VECTOR_MODE_P (to_mode));
+
+  int num_elem = GET_MODE_NUNITS (to_mode);
+  int elem_bitsize = (GET_MODE_SIZE (to_mode) * BITS_PER_UNIT) / num_elem;
+  int elem_mask = (1 << elem_bitsize) - 1;
+
+  rtvec val = rtvec_alloc (num_elem);
+  rtx *elem = &RTVEC_ELT (val, 0);
+
+  for (int i = 0; i < num_elem; i++)
+	elem[i] = gen_rtx_CONST_INT (VOIDmode,
+ (INTVAL (op) >> (i * elem_bitsize))
+ & elem_mask);
+
+  return gen_rtx_CONST_VECTOR (to_mode, val);
+}
+  else
+{
+  gcc_unreachable ();
+  return op;
+}
+}
+
 /* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE
or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or
CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR.
@@ -6017,6 +6084,15 @@ simplify_immed_subreg (fixed_size_mode outermode, rtx op,
   if (COMPLEX_MODE_P (outermode))
 return NULL_RTX;
 
+  /* Vectors with multiple elements per byte are a special case.  */
+  if ((VECTOR_MODE_P (innermode)
+   && ((GET_MODE_NUNITS (innermode).to_constant()
+	/ GET_MODE_SIZE(innermode).to_constant()) > 1))
+  || (VECTOR_MODE_P (outermode)
+	  && (GET_MODE_NUNITS (outermode) / GET_MODE_SIZE(outermode) > 1)))
+return convert_packed_vector (outermode, op, innermode, byte, first_elem,
+  inner_bytes);
+
   /* We support any size mode.  */
   max_bitsize = MAX (GET_MODE_BITSIZE (outermode),
 		 inner_bytes * BITS_PER_UNIT);


[PATCH 12/25] Make default_static_chain return NULL in non-static functions

2018-09-05 Thread ams

This patch allows default_static_chain to be called from the back-end without
it knowing if the function is static or not.  Or, to put it another way,
without duplicating the check everywhere it's used.

2018-09-05  Tom de Vries  

gcc/
* targhooks.c (default_static_chain): Return NULL in non-static
functions.
---
 gcc/targhooks.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index afd56f3..742cfbf 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1021,8 +1021,14 @@ default_internal_arg_pointer (void)
 }
 
 rtx
-default_static_chain (const_tree ARG_UNUSED (fndecl_or_type), bool incoming_p)
+default_static_chain (const_tree fndecl_or_type, bool incoming_p)
 {
+  /* While this function won't be called by the middle-end when a static
+ chain isn't needed, it's also used throughout the backend so it's
+ easiest to keep this check centralized.  */
+  if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
+return NULL;
+
   if (incoming_p)
 {
 #ifdef STATIC_CHAIN_INCOMING_REGNUM


[PATCH 13/25] Create TARGET_DISABLE_CURRENT_VECTOR_SIZE

2018-09-05 Thread ams

This feature probably ought to be reworked as a proper target hook, but I would
like to know if this is the correct solution to the problem first.

The problem is that GCN vectors have a fixed number of elements (64) and the
vector size varies with element size.  E.g. V64QI is 64 bytes and V64SI is 256
bytes.

This is a problem because GCC has an assumption that a) vector registers are
fixed size, and b) if there are multiple vector sizes you want to pick one size
and stick with it for the whole function.

This is a problem in various places, but mostly it's not fatal. However,
get_vectype_for_scalar_type caches the vector size for the first type it
encounters and then tries to apply that to all subsequent vectors, which
completely destroys vectorization.  The caching feature appears to be an
attempt to cope with AVX having a different vector size to other x86 vector
options.

This patch simply disables the cache so that it must ask the backend for the
preferred mode for every type.

2018-09-05  Andrew Stubbs  

gcc/
* tree-vect-stmts.c (get_vectype_for_scalar_type): Implement
TARGET_DISABLE_CURRENT_VECTOR_SIZE.
---
 gcc/tree-vect-stmts.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 607a2bd..8875201 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9945,9 +9945,12 @@ get_vectype_for_scalar_type (tree scalar_type)
   tree vectype;
   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
 		  current_vector_size);
+/* FIXME: use a proper target hook or macro.  */
+#ifndef TARGET_DISABLE_CURRENT_VECTOR_SIZE
   if (vectype
   && known_eq (current_vector_size, 0U))
 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
+#endif
   return vectype;
 }
 


[PATCH 16/25] Fix IRA ICE.

2018-09-05 Thread ams

The IRA pass makes an assumption that any pseudos created after the pass begins
were created explicitly by the pass itself and therefore will have
corresponding entries in its other tables.

The GCN back-end, however, often creates additional pseudos, in expand
patterns, to represent the necessary EXEC value, and these break IRA's
assumption and cause ICEs.

This patch simply has IRA skip unknown pseudos, and the problem goes away.

Presumably, it's not ideal that these registers have not been processed by IRA,
but it does not appear to do any real harm.

2018-09-05  Andrew Stubbs  

gcc/
* ira.c (setup_preferred_alternate_classes_for_new_pseudos): Skip
pseudos not created by this pass.
(move_unallocated_pseudos): Likewise.
---
 gcc/ira.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/ira.c b/gcc/ira.c
index def194a..e0c293c 100644
--- a/gcc/ira.c
+++ b/gcc/ira.c
@@ -2769,7 +2769,12 @@ setup_preferred_alternate_classes_for_new_pseudos (int start)
   for (i = start; i < max_regno; i++)
 {
   old_regno = ORIGINAL_REGNO (regno_reg_rtx[i]);
-  ira_assert (i != old_regno);
+
+  /* Skip any new pseudos not created directly by this pass.
+	 gen_move_insn can do this on AMD GCN, for example.  */
+  if (i == old_regno)
+	continue;
+
   setup_reg_classes (i, reg_preferred_class (old_regno),
 			 reg_alternate_class (old_regno),
 			 reg_allocno_class (old_regno));
@@ -5054,6 +5059,12 @@ move_unallocated_pseudos (void)
   {
 	int idx = i - first_moveable_pseudo;
 	rtx other_reg = pseudo_replaced_reg[idx];
+
+	/* Skip any new pseudos not created directly by find_moveable_pseudos.
+	   gen_move_insn can do this on AMD GCN, for example.  */
+	if (!other_reg)
+	  continue;
+
 	rtx_insn *def_insn = DF_REF_INSN (DF_REG_DEF_CHAIN (i));
 	/* The use must follow all definitions of OTHER_REG, so we can
 	   insert the new definition immediately after any of them.  */


[PATCH 14/25] Disable inefficient vectorization of elementwise loads/stores.

2018-09-05 Thread ams

If the autovectorizer tries to load a GCN 64-lane vector elementwise then it
blows away the register file and produces horrible code.

This patch simply disallows elementwise loads for such large vectors.  Is there
a better way to disable this in the middle-end?

2018-09-05  Julian Brown  

gcc/
* tree-vect-stmts.c (get_load_store_type): Don't use VMAT_ELEMENTWISE
loads/stores with many-element (>=64) vectors.
---
 gcc/tree-vect-stmts.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 8875201..a333991 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2452,6 +2452,26 @@ get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
 	*memory_access_type = VMAT_CONTIGUOUS;
 }
 
+  /* FIXME: Element-wise accesses can be extremely expensive if we have a
+ large number of elements to deal with (e.g. 64 for AMD GCN) using the
+ current generic code expansion.  Until an efficient code sequence is
+ supported for affected targets instead, don't attempt vectorization for
+ VMAT_ELEMENTWISE at all.  */
+  if (*memory_access_type == VMAT_ELEMENTWISE)
+{
+  poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
+
+  if (maybe_ge (nelements, 64))
+	{
+	  if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+	  "too many elements (%u) for elementwise accesses\n",
+	  (unsigned) nelements.to_constant ());
+
+	  return false;
+	}
+}
+
   if ((*memory_access_type == VMAT_ELEMENTWISE
|| *memory_access_type == VMAT_STRIDED_SLP)
   && !nunits.is_constant ())


[PATCH 17/25] Fix Fortran STOP.

2018-09-05 Thread ams

The minimal libgfortran setup was created for NVPTX, but will also be used by
AMD GCN.

This patch simply removes an assumption that NVPTX is the only user.
Specifically, NVPTX exit is broken, but AMD GCN exit works just fine.

2018-09-05  Andrew Stubbs  

libgfortran/
* runtime/minimal.c (exit): Only work around nvptx bugs on nvptx.
---
 libgfortran/runtime/minimal.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libgfortran/runtime/minimal.c b/libgfortran/runtime/minimal.c
index 0b1efeb..8940f97 100644
--- a/libgfortran/runtime/minimal.c
+++ b/libgfortran/runtime/minimal.c
@@ -197,10 +197,12 @@ sys_abort (void)
 #define st_printf printf
 #undef estr_write
 #define estr_write printf
+#if __nvptx__
 /* Map "exit" to "abort"; see PR85463 '[nvptx] "exit" in offloaded region
doesn't terminate process'.  */
 #undef exit
 #define exit(...) do { abort (); } while (0)
+#endif
 #undef exit_error
 #define exit_error(...) do { abort (); } while (0)
 


[PATCH 15/25] Don't double-count early-clobber matches.

2018-09-05 Thread ams

Given a pattern with a number of operands:

(match_operand 0 "" "=&v")
(match_operand 1 "" " v0")
(match_operand 2 "" " v0")
(match_operand 3 "" " v0")

GCC will currently increment "reject" once, for operand 0, and then decrement
it once for each of the other operands, ending with reject == -2 and an
assertion failure.  If there's a conflict then it might try to decrement reject
yet again.

Incidentally, what these patterns are trying to achieve is an allocation in
which operand 0 may match one of the other operands, but may not partially
overlap any of them.  Ideally there'd be a better way to do this.

In any case, it will affect any pattern in which multiple operands may (or
must) match an early-clobber operand.

The patch only allows a reject-- when one has not already occurred, for that
operand.

2018-09-05  Andrew Stubbs  

gcc/
* lra-constraints.c (process_alt_operands): Check
matching_early_clobber before decrementing reject, and set
matching_early_clobber after.
* lra-int.h (struct lra_operand_data): Add matching_early_clobber.
* lra.c (setup_operand_alternative): Initialize matching_early_clobber.
---
 gcc/lra-constraints.c | 22 ++
 gcc/lra-int.h |  3 +++
 gcc/lra.c |  1 +
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index 8be4d46..55163f1 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -2202,7 +2202,13 @@ process_alt_operands (int only_alternative)
  "%d Matching earlyclobber alt:"
  " reject--\n",
  nop);
-			reject--;
+			if (!curr_static_id->operand[m]
+		 .matching_early_clobber)
+			  {
+reject--;
+curr_static_id->operand[m]
+		.matching_early_clobber = 1;
+			  }
 			  }
 			/* Otherwise we prefer no matching
 			   alternatives because it gives more freedom
@@ -2948,15 +2954,11 @@ process_alt_operands (int only_alternative)
 	  curr_alt_dont_inherit_ops[curr_alt_dont_inherit_ops_num++]
 		= last_conflict_j;
 	  losers++;
-	  /* Early clobber was already reflected in REJECT. */
-	  lra_assert (reject > 0);
 	  if (lra_dump_file != NULL)
 		fprintf
 		  (lra_dump_file,
 		   "%d Conflict early clobber reload: reject--\n",
 		   i);
-	  reject--;
-	  overall += LRA_LOSER_COST_FACTOR - 1;
 	}
 	  else
 	{
@@ -2980,17 +2982,21 @@ process_alt_operands (int only_alternative)
 		}
 	  curr_alt_win[i] = curr_alt_match_win[i] = false;
 	  losers++;
-	  /* Early clobber was already reflected in REJECT. */
-	  lra_assert (reject > 0);
 	  if (lra_dump_file != NULL)
 		fprintf
 		  (lra_dump_file,
 		   "%d Matched conflict early clobber reloads: "
 		   "reject--\n",
 		   i);
+	}
+	  /* Early clobber was already reflected in REJECT. */
+	  if (!curr_static_id->operand[i].matching_early_clobber)
+	{
+	  lra_assert (reject > 0);
 	  reject--;
-	  overall += LRA_LOSER_COST_FACTOR - 1;
+	  curr_static_id->operand[i].matching_early_clobber = 1;
 	}
+	  overall += LRA_LOSER_COST_FACTOR - 1;
 	}
   if (lra_dump_file != NULL)
 	fprintf (lra_dump_file, "  alt=%d,overall=%d,losers=%d,rld_nregs=%d\n",
diff --git a/gcc/lra-int.h b/gcc/lra-int.h
index 5267b53..f193e1f 100644
--- a/gcc/lra-int.h
+++ b/gcc/lra-int.h
@@ -147,6 +147,9 @@ struct lra_operand_data
  This field is set up every time when corresponding
  operand_alternative in lra_static_insn_data is set up.  */
   unsigned int early_clobber : 1;
+  /* True if there is an early clobber that has a matching alternative.
+ This field is used to prevent multiple matches being counted.  */
+  unsigned int matching_early_clobber : 1;
   /* True if the operand is an address.  */
   unsigned int is_address : 1;
 };
diff --git a/gcc/lra.c b/gcc/lra.c
index aa768fb..01dd8b8 100644
--- a/gcc/lra.c
+++ b/gcc/lra.c
@@ -797,6 +797,7 @@ setup_operand_alternative (lra_insn_recog_data_t data,
 {
   static_data->operand[i].early_clobber_alts = 0;
   static_data->operand[i].early_clobber = false;
+  static_data->operand[i].matching_early_clobber = false;
   static_data->operand[i].is_address = false;
   if (static_data->operand[i].constraint[0] == '%')
 	{


[PATCH 19/25] GCN libgfortran.

2018-09-05 Thread ams

This patch contains the GCN port of libgfortran.  We use the minimal
configuration created for NVPTX.  That's all that's required, besides the
target-independent bug fixes posted already.

2018-09-05  Andrew Stubbs  
Kwok Cheung Yeung  
Julian Brown  
Tom de Vries  

libgfortran/
* configure.ac: Use minimal mode for amdgcn.
* configure: Regenerate.
---
 libgfortran/configure| 7 ---
 libgfortran/configure.ac | 3 ++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/libgfortran/configure b/libgfortran/configure
index a583b67..fd8b697 100755
--- a/libgfortran/configure
+++ b/libgfortran/configure
@@ -5994,7 +5994,8 @@ fi
 # * C library support for other features such as signal, environment
 #   variables, time functions
 
- if test "x${target_cpu}" = xnvptx; then
+ if test "x${target_cpu}" = xnvptx \
+ || test "x${target_cpu}" = xamdgcn; then
   LIBGFOR_MINIMAL_TRUE=
   LIBGFOR_MINIMAL_FALSE='#'
 else
@@ -12514,7 +12515,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12517 "configure"
+#line 12518 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12620,7 +12621,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12623 "configure"
+#line 12624 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac
index 05952aa..11b629d 100644
--- a/libgfortran/configure.ac
+++ b/libgfortran/configure.ac
@@ -206,7 +206,8 @@ AM_CONDITIONAL(LIBGFOR_USE_SYMVER_SUN, [test "x$gfortran_use_symver" = xsun])
 # * C library support for other features such as signal, environment
 #   variables, time functions
 
-AM_CONDITIONAL(LIBGFOR_MINIMAL, [test "x${target_cpu}" = xnvptx])
+AM_CONDITIONAL(LIBGFOR_MINIMAL, [test "x${target_cpu}" = xnvptx \
+ || test "x${target_cpu}" = xamdgcn])
 
 # Figure out whether the compiler supports "-ffunction-sections -fdata-sections",
 # similarly to how libstdc++ does it


[PATCH 18/25] Fix interleaving of Fortran stop messages

2018-09-05 Thread ams

Fortran STOP and ERROR STOP use a different function to print the "STOP" string
and the message string.  On GCN this results in out-of-order output, such as
"ERROR STOP ".

This patch fixes the problem by making estr_write use the proper Fortran write,
not C printf, so both parts are now output the same way.  This also ensures
that both parts are output to STDERR (not that that means anything on GCN).

2018-09-05  Kwok Cheung Yeung  

libgfortran/
* runtime/minimal.c (estr_write): Define in terms of write.
---
 libgfortran/runtime/minimal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgfortran/runtime/minimal.c b/libgfortran/runtime/minimal.c
index 8940f97..b6d26fd 100644
--- a/libgfortran/runtime/minimal.c
+++ b/libgfortran/runtime/minimal.c
@@ -196,7 +196,7 @@ sys_abort (void)
 #undef st_printf
 #define st_printf printf
 #undef estr_write
-#define estr_write printf
+#define estr_write(X) write(STDERR_FILENO, (X), strlen (X))
 #if __nvptx__
 /* Map "exit" to "abort"; see PR85463 '[nvptx] "exit" in offloaded region
doesn't terminate process'.  */


[PATCH 20/25] GCN libgcc.

2018-09-05 Thread ams

This patch contains the GCN port of libgcc.  I've broken it out just to keep
both parts more manageable.

We have the usual stuff, plus a "gomp_print" implementation intended to provide
a means to output text to console without using the full printf.  Originally
this was because we did not have a working Newlib port, but now it provides the
underlying mechanism for printf.  It's also much lighter than printf, and
therefore more suitable for debugging offload kernels (for which there is no
debugger, yet).

In order to work in offload kernels the same function must be present in both
host and GCN toolchains.  Therefore it needs to live in libgomp (hence the
name).  However, having found it also useful in stand alone testing I have
moved the GCN implementation to libgcc.

It was also necessary to provide a means to disable EMUTLS.

2018-09-05  Andrew Stubbs  
Kwok Cheung Yeung  
Julian Brown  
Tom de Vries  

libgcc/
* Makefile.in: Don't add emutls.c when --enable-emutls is "no".
* config.host: Recognize amdgcn*-*-amdhsa.
* config/gcn/crt0.c: New file.
* config/gcn/gomp_print.c: New file.
* config/gcn/lib2-divmod-hi.c: New file.
* config/gcn/lib2-divmod.c: New file.
* config/gcn/lib2-gcn.h: New file.
* config/gcn/reduction.c: New file.
* config/gcn/sfp-machine.h: New file.
* config/gcn/t-amdgcn: New file.
---
 libgcc/Makefile.in |   2 +
 libgcc/config.host |   8 +++
 libgcc/config/gcn/crt0.c   |  23 
 libgcc/config/gcn/gomp_print.c |  99 +++
 libgcc/config/gcn/lib2-divmod-hi.c | 117 +
 libgcc/config/gcn/lib2-divmod.c| 117 +
 libgcc/config/gcn/lib2-gcn.h   |  49 
 libgcc/config/gcn/reduction.c  |  30 ++
 libgcc/config/gcn/sfp-machine.h|  51 
 libgcc/config/gcn/t-amdgcn |  25 
 10 files changed, 521 insertions(+)
 create mode 100644 libgcc/config/gcn/crt0.c
 create mode 100644 libgcc/config/gcn/gomp_print.c
 create mode 100644 libgcc/config/gcn/lib2-divmod-hi.c
 create mode 100644 libgcc/config/gcn/lib2-divmod.c
 create mode 100644 libgcc/config/gcn/lib2-gcn.h
 create mode 100644 libgcc/config/gcn/reduction.c
 create mode 100644 libgcc/config/gcn/sfp-machine.h
 create mode 100644 libgcc/config/gcn/t-amdgcn

diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in
index 0c5b264..6f68257 100644
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -429,9 +429,11 @@ LIB2ADD += enable-execute-stack.c
 # While emutls.c has nothing to do with EH, it is in LIB2ADDEH*
 # instead of LIB2ADD because that's the way to be sure on some targets
 # (e.g. *-*-darwin*) only one copy of it is linked.
+ifneq ($(enable_emutls),no)
 LIB2ADDEH += $(srcdir)/emutls.c
 LIB2ADDEHSTATIC += $(srcdir)/emutls.c
 LIB2ADDEHSHARED += $(srcdir)/emutls.c
+endif
 
 # Library members defined in libgcc2.c.
 lib2funcs = _muldi3 _negdi2 _lshrdi3 _ashldi3 _ashrdi3 _cmpdi2 _ucmpdi2	   \
diff --git a/libgcc/config.host b/libgcc/config.host
index 029f656..29178da 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -91,6 +91,10 @@ alpha*-*-*)
 am33_2.0-*-linux*)
 	cpu_type=mn10300
 	;;
+amdgcn*-*-*)
+	cpu_type=gcn
+	tmake_file="${tmake_file} t-softfp-sfdf t-softfp"
+	;;
 arc*-*-*)
 	cpu_type=arc
 	;;
@@ -384,6 +388,10 @@ alpha*-dec-*vms*)
 	extra_parts="$extra_parts vms-dwarf2.o vms-dwarf2eh.o"
 	md_unwind_header=alpha/vms-unwind.h
 	;;
+amdgcn*-*-amdhsa)
+	tmake_file="$tmake_file gcn/t-amdgcn"
+	extra_parts="crt0.o"
+	;;
 arc*-*-elf*)
 	tmake_file="arc/t-arc"
 	extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o"
diff --git a/libgcc/config/gcn/crt0.c b/libgcc/config/gcn/crt0.c
new file mode 100644
index 000..f4f367b
--- /dev/null
+++ b/libgcc/config/gcn/crt0.c
@@ -0,0 +1,23 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+/* Provide an entry point sy

[PATCH 22/25] Add dg-require-effective-target exceptions

2018-09-05 Thread ams

There are a number of tests that fail because they assume that exceptions are
available, but GCN does not support them, yet.

This patch adds "dg-require-effective-target exceptions" in all the affected
tests.  There's probably an automatic way to test for exceptions, but the
current implementation simply says that AMD GCN does not support them.  This
should ensure that no other targets are affected by the change.

2018-09-05  Andrew Stubbs  
Kwok Cheung Yeung  
Julian Brown  
Tom de Vries  

gcc/testsuite/
* c-c++-common/ubsan/pr71512-1.c: Require exceptions.
* c-c++-common/ubsan/pr71512-2.c: Require exceptions.
* gcc.c-torture/compile/pr34648.c: Require exceptions.
* gcc.c-torture/compile/pr41469.c: Require exceptions.
* gcc.dg/20111216-1.c: Require exceptions.
* gcc.dg/cleanup-10.c: Require exceptions.
* gcc.dg/cleanup-11.c: Require exceptions.
* gcc.dg/cleanup-12.c: Require exceptions.
* gcc.dg/cleanup-13.c: Require exceptions.
* gcc.dg/cleanup-5.c: Require exceptions.
* gcc.dg/cleanup-8.c: Require exceptions.
* gcc.dg/cleanup-9.c: Require exceptions.
* gcc.dg/gomp/pr29955.c: Require exceptions.
* gcc.dg/lto/pr52097_0.c: Require exceptions.
* gcc.dg/nested-func-5.c: Require exceptions.
* gcc.dg/pch/except-1.c: Require exceptions.
* gcc.dg/pch/valid-2.c: Require exceptions.
* gcc.dg/pr41470.c: Require exceptions.
* gcc.dg/pr42427.c: Require exceptions.
* gcc.dg/pr44545.c: Require exceptions.
* gcc.dg/pr47086.c: Require exceptions.
* gcc.dg/pr51481.c: Require exceptions.
* gcc.dg/pr51644.c: Require exceptions.
* gcc.dg/pr52046.c: Require exceptions.
* gcc.dg/pr54669.c: Require exceptions.
* gcc.dg/pr56424.c: Require exceptions.
* gcc.dg/pr64465.c: Require exceptions.
* gcc.dg/pr65802.c: Require exceptions.
* gcc.dg/pr67563.c: Require exceptions.
* gcc.dg/tree-ssa/pr41469-1.c: Require exceptions.
* gcc.dg/tree-ssa/ssa-dse-28.c: Require exceptions.
* gcc.dg/vect/pr46663.c: Require exceptions.
* lib/target-supports.exp (check_effective_target_exceptions): New.
---
 gcc/testsuite/c-c++-common/ubsan/pr71512-1.c  |  1 +
 gcc/testsuite/c-c++-common/ubsan/pr71512-2.c  |  1 +
 gcc/testsuite/gcc.c-torture/compile/pr34648.c |  1 +
 gcc/testsuite/gcc.c-torture/compile/pr41469.c |  1 +
 gcc/testsuite/gcc.dg/20111216-1.c |  1 +
 gcc/testsuite/gcc.dg/cleanup-10.c |  1 +
 gcc/testsuite/gcc.dg/cleanup-11.c |  1 +
 gcc/testsuite/gcc.dg/cleanup-12.c |  1 +
 gcc/testsuite/gcc.dg/cleanup-13.c |  1 +
 gcc/testsuite/gcc.dg/cleanup-5.c  |  1 +
 gcc/testsuite/gcc.dg/cleanup-8.c  |  1 +
 gcc/testsuite/gcc.dg/cleanup-9.c  |  1 +
 gcc/testsuite/gcc.dg/gomp/pr29955.c   |  1 +
 gcc/testsuite/gcc.dg/lto/pr52097_0.c  |  1 +
 gcc/testsuite/gcc.dg/nested-func-5.c  |  1 +
 gcc/testsuite/gcc.dg/pch/except-1.c   |  1 +
 gcc/testsuite/gcc.dg/pch/valid-2.c|  2 +-
 gcc/testsuite/gcc.dg/pr41470.c|  1 +
 gcc/testsuite/gcc.dg/pr42427.c|  1 +
 gcc/testsuite/gcc.dg/pr44545.c|  1 +
 gcc/testsuite/gcc.dg/pr47086.c|  1 +
 gcc/testsuite/gcc.dg/pr51481.c|  1 +
 gcc/testsuite/gcc.dg/pr51644.c|  1 +
 gcc/testsuite/gcc.dg/pr52046.c|  1 +
 gcc/testsuite/gcc.dg/pr54669.c|  1 +
 gcc/testsuite/gcc.dg/pr56424.c|  1 +
 gcc/testsuite/gcc.dg/pr64465.c|  1 +
 gcc/testsuite/gcc.dg/pr65802.c|  1 +
 gcc/testsuite/gcc.dg/pr67563.c|  1 +
 gcc/testsuite/gcc.dg/tree-ssa/pr41469-1.c |  1 +
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c|  1 +
 gcc/testsuite/gcc.dg/vect/pr46663.c   |  1 +
 gcc/testsuite/lib/target-supports.exp | 10 ++
 33 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c b/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c
index 2a90ab1..8af9365 100644
--- a/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c
+++ b/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c
@@ -1,5 +1,6 @@
 /* PR c/71512 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -fnon-call-exceptions -ftrapv -fexceptions -fsanitize=undefined" } */
+/* { dg-require-effective-target exceptions } */
 
 #include "../../gcc.dg/pr44545.c"
diff --git a/gcc/testsuite/c-c++-common/ubsan/pr71512-2.c b/gcc/testsuite/c-c++-common/ubsan/pr71512-2.c
index 1c95593..0c16934 100644
--- a/gcc/testsuite/c-c++-common/ubsan/pr71512-2.c
+++ b/gcc/testsuite/c-c++-common/ubsan/pr71512-2.c
@@ -1,5 +1,6 @@
 /* PR c/71512 */
 /* { dg-do compile } */
 /* { dg-options "-O -fexceptions -fnon-call-exceptions -ftrapv -fsanitize=un

[PATCH 23/25] Testsuite: GCN is always PIE.

2018-09-05 Thread ams

The GCN/HSA loader ignores the load address and uses a random location, so we
build all GCN binaries as PIE, by default.

This patch makes the necessary testsuite adjustments to make this work
correctly.

2018-09-05  Andrew Stubbs  

gcc/testsuite/
* gcc.dg/graphite/scop-19.c: Check pie_enabled.
* gcc.dg/pic-1.c: Disable on amdgcn.
* gcc.dg/pic-2.c: Disable on amdgcn.
* gcc.dg/pic-3.c: Disable on amdgcn.
* gcc.dg/pic-4.c: Disable on amdgcn.
* gcc.dg/pie-3.c: Disable on amdgcn.
* gcc.dg/pie-4.c: Disable on amdgcn.
* gcc.dg/uninit-19.c: Check pie_enabled.
* lib/target-supports.exp (check_effective_target_pie): Add amdgcn.
---
 gcc/testsuite/gcc.dg/graphite/scop-19.c | 4 ++--
 gcc/testsuite/gcc.dg/pic-1.c| 2 +-
 gcc/testsuite/gcc.dg/pic-2.c| 1 +
 gcc/testsuite/gcc.dg/pic-3.c| 2 +-
 gcc/testsuite/gcc.dg/pic-4.c| 2 +-
 gcc/testsuite/gcc.dg/pie-3.c| 2 +-
 gcc/testsuite/gcc.dg/pie-4.c| 2 +-
 gcc/testsuite/gcc.dg/uninit-19.c| 4 ++--
 gcc/testsuite/lib/target-supports.exp   | 3 ++-
 9 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/graphite/scop-19.c b/gcc/testsuite/gcc.dg/graphite/scop-19.c
index c89717b..6028132 100644
--- a/gcc/testsuite/gcc.dg/graphite/scop-19.c
+++ b/gcc/testsuite/gcc.dg/graphite/scop-19.c
@@ -31,6 +31,6 @@ d_growable_string_append_buffer (struct d_growable_string *dgs,
   if (need > dgs->alc)
 d_growable_string_resize (dgs, need);
 }
-/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 2 "graphite" { target nonpic } } } */
-/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 1 "graphite" { target { ! nonpic } } } } */
+/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 2 "graphite" { target { nonpic || pie_enabled } } } } */
+/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 1 "graphite" { target { ! { nonpic || pie_enabled } } } } } */
 
diff --git a/gcc/testsuite/gcc.dg/pic-1.c b/gcc/testsuite/gcc.dg/pic-1.c
index 82ba43d..4bb332e 100644
--- a/gcc/testsuite/gcc.dg/pic-1.c
+++ b/gcc/testsuite/gcc.dg/pic-1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! { *-*-darwin* hppa*-*-* } } } } */
+/* { dg-do compile { target { ! { *-*-darwin* hppa*-*-* amdgcn*-*-* } } } } */
 /* { dg-require-effective-target fpic } */
 /* { dg-options "-fpic" } */
 
diff --git a/gcc/testsuite/gcc.dg/pic-2.c b/gcc/testsuite/gcc.dg/pic-2.c
index bccec13..3846ec4 100644
--- a/gcc/testsuite/gcc.dg/pic-2.c
+++ b/gcc/testsuite/gcc.dg/pic-2.c
@@ -2,6 +2,7 @@
 /* { dg-require-effective-target fpic } */
 /* { dg-options "-fPIC" } */
 /* { dg-skip-if "__PIC__ is always 1 for MIPS" { mips*-*-* } } */
+/* { dg-skip-if "__PIE__ is always defined for GCN" { amdgcn*-*-* } } */
 
 #if __PIC__ != 2
 # error __PIC__ is not 2!
diff --git a/gcc/testsuite/gcc.dg/pic-3.c b/gcc/testsuite/gcc.dg/pic-3.c
index c56f06f..1397977 100644
--- a/gcc/testsuite/gcc.dg/pic-3.c
+++ b/gcc/testsuite/gcc.dg/pic-3.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* } } } } */
+/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* amdgcn*-*-* } } } } */
 /* { dg-options "-fno-pic" } */
 
 #ifdef __PIC__
diff --git a/gcc/testsuite/gcc.dg/pic-4.c b/gcc/testsuite/gcc.dg/pic-4.c
index 2afdd99..d6d9dc9 100644
--- a/gcc/testsuite/gcc.dg/pic-4.c
+++ b/gcc/testsuite/gcc.dg/pic-4.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* } } } } */
+/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* amdgcn*-*-* } } } } */
 /* { dg-options "-fno-PIC" } */
 
 #ifdef __PIC__
diff --git a/gcc/testsuite/gcc.dg/pie-3.c b/gcc/testsuite/gcc.dg/pie-3.c
index 5577437..fd4a48d 100644
--- a/gcc/testsuite/gcc.dg/pie-3.c
+++ b/gcc/testsuite/gcc.dg/pie-3.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* } } } } */
+/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* amdgcn*-*-* } } } } */
 /* { dg-options "-fno-pie" } */
 
 #ifdef __PIC__
diff --git a/gcc/testsuite/gcc.dg/pie-4.c b/gcc/testsuite/gcc.dg/pie-4.c
index 4134676..5523602 100644
--- a/gcc/testsuite/gcc.dg/pie-4.c
+++ b/gcc/testsuite/gcc.dg/pie-4.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* } } } } */
+/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* amdgcn*-*-* } } } } */
 /* { dg-options "-fno-PIE" } */
 
 #ifdef __PIC__
diff --git a/gcc/testsuite/gcc.dg/uninit-19.c b/gcc/testsuite/gcc.dg/uninit-19.c
index 094dc0e..3f5f06a 100644
--- a/gcc/testsuite/gcc.dg/uninit-19.c
+++ b/gcc/testsuite/gcc.dg/uninit-19.c
@@ -12,7 +12,7 @@ fn1 (int p1, float *f1, float *f2, float *f3, unsigned char *c1, float *f4,
 {
   if (p1 & 8)
 b[3] = p10[a];
-  /* { dg-warning "may be used uninitialized" "" { target { { nonpic } || { hppa*64*-*-* } } } .-1

[PATCH 24/25] Ignore LLVM's blank lines.

2018-09-05 Thread ams

The GCN toolchain must use the LLVM assembler and linker because there's no
binutils port.  The LLVM tools do not have the same diagnostic style as
binutils, so the "blank line(s) in output" tests are inappropriate (and very
noisy).

The LLVM tools also have different command line options, so it's not possible
to autodetect object formats in the same way.

This patch addresses both issues.

2018-09-05  Andrew Stubbs  

gcc/testsuite/
* lib/file-format.exp (gcc_target_object_format): Handle AMD GCN.
* lib/gcc-dg.exp (gcc-dg-prune): Ignore blank lines from the LLVM
linker.
* lib/target-supports.exp (check_effective_target_llvm_binutils): New.
---
 gcc/testsuite/lib/file-format.exp |  3 +++
 gcc/testsuite/lib/gcc-dg.exp  |  2 +-
 gcc/testsuite/lib/target-supports.exp | 14 ++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/lib/file-format.exp b/gcc/testsuite/lib/file-format.exp
index 5c47246..c595fe2 100644
--- a/gcc/testsuite/lib/file-format.exp
+++ b/gcc/testsuite/lib/file-format.exp
@@ -41,6 +41,9 @@ proc gcc_target_object_format { } {
 } elseif { [istarget *-*-aix*] } {
 	# AIX doesn't necessarily have objdump, so hand-code it.
 	set gcc_target_object_format_saved coff
+} elseif { [istarget *-*-amdhsa*] } {
+	# AMD GCN uses LLVM objdump which is not CLI-compatible
+	set gcc_target_object_format_saved elf
 } else {
 set objdump_name [find_binutils_prog objdump]
 set open_file [open objfmtst.c w]
diff --git a/gcc/testsuite/lib/gcc-dg.exp b/gcc/testsuite/lib/gcc-dg.exp
index f5e6bef..7df348e 100644
--- a/gcc/testsuite/lib/gcc-dg.exp
+++ b/gcc/testsuite/lib/gcc-dg.exp
@@ -361,7 +361,7 @@ proc gcc-dg-prune { system text } {
 
 # Complain about blank lines in the output (PR other/69006)
 global allow_blank_lines
-if { !$allow_blank_lines } {
+if { !$allow_blank_lines && ![check_effective_target_llvm_binutils]} {
 	set num_blank_lines [llength [regexp -all -inline "\n\n" $text]]
 	if { $num_blank_lines } {
 	global testname_with_flags
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 61442bd..1e627fa 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -9129,6 +9129,14 @@ proc check_effective_target_offload_hsa { } {
 } "-foffload=hsa" ]
 }
 
+# Return 1 if the compiler has been configured with hsa offloading.
+
+proc check_effective_target_offload_gcn { } {
+return [check_no_compiler_messages offload_gcn assembly {
+	int main () {return 0;}
+} "-foffload=amdgcn-unknown-amdhsa" ]
+}
+
 # Return 1 if the target support -fprofile-update=atomic
 proc check_effective_target_profile_update_atomic {} {
 return [check_no_compiler_messages profile_update_atomic assembly {
@@ -9427,3 +9435,9 @@ proc check_effective_target_cet { } {
 	}
 } "-O2" ]
 }
+
+# Return 1 if this target uses an LLVM assembler and/or linker
+proc check_effective_target_llvm_binutils { } {
+return [expr { [istarget amdgcn*-*-*]
+		   || [check_effective_target_offload_gcn] } ]
+}


[PATCH 25/25] Port testsuite to GCN

2018-09-05 Thread ams

This collection of miscellaneous patches configures the testsuite to run on AMD
GCN in a standalone (i.e. not offloading) configuration.  It assumes you have
your Dejagnu set up to run binaries via the gcn-run tool.

2018-09-05  Andrew Stubbs  
Kwok Cheung Yeung  
Julian Brown  
Tom de Vries  

gcc/testsuite/
* gcc.dg/20020312-2.c: Add amdgcn support.
* gcc.dg/Wno-frame-address.c: Disable on amdgcn.
* gcc.dg/builtin-apply2.c: Likewise.
* gcc.dg/torture/stackalign/builtin-apply-2.c: Likewise.
* gcc.dg/gimplefe-28.c: Force -ffast-math.
* gcc.dg/intermod-1.c: Add -mlocal-symbol-id on amdgcn.
* gcc.dg/memcmp-1.c: Increase timeout factor.
* gcc.dg/pr59605-2.c: Addd -DMAX_COPY=1025 on amdgcn.
* gcc.dg/sibcall-10.c: xfail on amdgcn.
* gcc.dg/sibcall-9.c: Likewise.
* gcc.dg/tree-ssa/gen-vect-11c.c: Likewise.
* gcc.dg/tree-ssa/pr84512.c: Likewise.
* gcc.dg/tree-ssa/loop-1.c: Adjust expectations for amdgcn.
* gfortran.dg/bind_c_array_params_2.f90: Likewise.
* gcc.dg/vect/tree-vect.h: Avoid signal on amdgcn.
* lib/target-supports.exp (check_effective_target_trampolines):
Configure amdgcn.
(check_profiling_available): Likewise.
(check_effective_target_global_constructor): Likewise.
(check_effective_target_return_address): Likewise.
(check_effective_target_fopenacc): Likewise.
(check_effective_target_fopenmp): Likewise.
(check_effective_target_vect_int): Likewise.
(check_effective_target_vect_intfloat_cvt): Likewise.
(check_effective_target_vect_uintfloat_cvt): Likewise.
(check_effective_target_vect_floatint_cvt): Likewise.
(check_effective_target_vect_floatuint_cvt): Likewise.
(check_effective_target_vect_simd_clones): Likewise.
(check_effective_target_vect_shift): Likewise.
(check_effective_target_whole_vector_shift): Likewise.
(check_effective_target_vect_bswap): Likewise.
(check_effective_target_vect_shift_char): Likewise.
(check_effective_target_vect_long): Likewise.
(check_effective_target_vect_float): Likewise.
(check_effective_target_vect_double): Likewise.
(check_effective_target_vect_perm): Likewise.
(check_effective_target_vect_perm_byte): Likewise.
(check_effective_target_vect_perm_short): Likewise.
(check_effective_target_vect_widen_mult_qi_to_hi): Likewise.
(check_effective_target_vect_widen_mult_hi_to_si): Likewise.
(check_effective_target_vect_widen_mult_qi_to_hi_pattern): Likewise.
(check_effective_target_vect_widen_mult_hi_to_si_pattern): Likewise.
(check_effective_target_vect_natural_alignment): Likewise.
(check_effective_target_vect_fully_masked): Likewise.
(check_effective_target_vect_element_align): Likewise.
(check_effective_target_vect_masked_store): Likewise.
(check_effective_target_vect_scatter_store): Likewise.
(check_effective_target_vect_condition): Likewise.
(check_effective_target_vect_cond_mixed): Likewise.
(check_effective_target_vect_char_mult): Likewise.
(check_effective_target_vect_short_mult): Likewise.
(check_effective_target_vect_int_mult): Likewise.
(check_effective_target_sqrt_insn): Likewise.
(check_effective_target_vect_call_sqrtf): Likewise.
(check_effective_target_vect_call_btrunc): Likewise.
(check_effective_target_vect_call_btruncf): Likewise.
(check_effective_target_vect_call_ceil): Likewise.
(check_effective_target_vect_call_floorf): Likewise.
(check_effective_target_lto): Likewise.
(check_vect_support_and_set_flags): Likewise.
(check_effective_target_vect_stridedN): Enable when fully masked is
available.
---
 gcc/testsuite/gcc.dg/20020312-2.c  |   2 +
 gcc/testsuite/gcc.dg/Wno-frame-address.c   |   2 +-
 gcc/testsuite/gcc.dg/builtin-apply2.c  |   2 +-
 gcc/testsuite/gcc.dg/gimplefe-28.c |   2 +-
 gcc/testsuite/gcc.dg/intermod-1.c  |   1 +
 gcc/testsuite/gcc.dg/memcmp-1.c|   1 +
 gcc/testsuite/gcc.dg/pr59605-2.c   |   2 +-
 gcc/testsuite/gcc.dg/sibcall-10.c  |   2 +-
 gcc/testsuite/gcc.dg/sibcall-9.c   |   2 +-
 .../gcc.dg/torture/stackalign/builtin-apply-2.c|   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c   |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/loop-1.c |   6 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr84512.c|   2 +-
 gcc/testsuite/gcc.dg/vect/tree-vect.h  |   4 +
 .../gfortran.dg/bind_c_array_params_2.f90  |   3 +-
 gcc/testsuite/lib/target-supports.exp  | 126 +++--
 16 files changed, 113 insertions(+), 48 deletions(-)

diff 

Re: [PATCH 01/25] Handle vectors that don't fit in an integer.

2018-09-05 Thread Jakub Jelinek
On Wed, Sep 05, 2018 at 12:48:49PM +0100, a...@codesourcery.com wrote:
> +++ b/gcc/combine.c
> @@ -8621,7 +8621,13 @@ gen_lowpart_or_truncate (machine_mode mode, rtx x)
>  {
>/* Bit-cast X into an integer mode.  */
>if (!SCALAR_INT_MODE_P (GET_MODE (x)))
> - x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);
> + {
> +   enum machine_mode imode =
> + int_mode_for_mode (GET_MODE (x)).require ();

Just a formatting nit, not a review - = should be on the next line.

Jakub


Re: [PATCH 10/25] Convert BImode vectors.

2018-09-05 Thread Jakub Jelinek
On Wed, Sep 05, 2018 at 12:50:25PM +0100, a...@codesourcery.com wrote:
> 2018-09-05  Andrew Stubbs  
> 
>   gcc/
>   * simplify-rtx.c (convert_packed_vector): New function.
>   (simplify_immed_subreg): Recognised Boolean vectors and call
>   convert_packed_vector.
> ---

> +  int elem_bitsize = (GET_MODE_SIZE (from_mode).to_constant()

Further formatting nits, no space before (.

> +   * BITS_PER_UNIT) / num_elem;
> +  int elem_mask = (1 << elem_bitsize) - 1;
> +  HOST_WIDE_INT subreg_mask =

= at the end of line.

> + (sizeof (HOST_WIDE_INT) == GET_MODE_SIZE (to_mode)
> +  ? -1
> +  : (((HOST_WIDE_INT)1 << (GET_MODE_SIZE (to_mode) * BITS_PER_UNIT))
> + - 1));
> +  /* Vectors with multiple elements per byte are a special case.  */

> +  if ((VECTOR_MODE_P (innermode)
> +   && ((GET_MODE_NUNITS (innermode).to_constant()
> + / GET_MODE_SIZE(innermode).to_constant()) > 1))

Missing spaces before ( several times.

Jakub


[PATCH v2] combine: perform jump threading at the end

2018-09-05 Thread Ilya Leoshkevich
gcc/ChangeLog:

2018-09-05  Ilya Leoshkevich  

PR target/80080
* combine.c (rest_of_handle_combine): Perform jump threading.

gcc/testsuite/ChangeLog:

2018-09-05  Ilya Leoshkevich  

PR target/80080
* gcc.target/s390/pr80080-4.c: New test.
---
 gcc/combine.c | 10 --
 gcc/testsuite/gcc.target/s390/pr80080-4.c | 16 
 2 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/pr80080-4.c

diff --git a/gcc/combine.c b/gcc/combine.c
index a2649b6d5a1..818b4c5b77d 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -14960,10 +14960,16 @@ rest_of_handle_combine (void)
free_dominance_info (CDI_DOMINATORS);
   timevar_push (TV_JUMP);
   rebuild_jump_labels (get_insns ());
-  cleanup_cfg (0);
-  timevar_pop (TV_JUMP);
 }
 
+  /* Combining insns can change basic blocks in a way that they end up
+ containing a single jump_insn. This creates an opportunity to improve code
+ with jump threading.  */
+  cleanup_cfg (CLEANUP_THREADING);
+
+  if (rebuild_jump_labels_after_combine)
+timevar_pop (TV_JUMP);
+
   regstat_free_n_sets_and_refs ();
   return 0;
 }
diff --git a/gcc/testsuite/gcc.target/s390/pr80080-4.c 
b/gcc/testsuite/gcc.target/s390/pr80080-4.c
new file mode 100644
index 000..91d31ec7845
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/pr80080-4.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=z196 -O2" } */
+
+extern void bar(int *mem);
+
+void foo4(int *mem)
+{
+  int oldval = 0;
+  if (!__atomic_compare_exchange_n (mem, (void *) &oldval, 1,
+   1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+{
+  bar (mem);
+}
+}
+
+/* { dg-final { scan-assembler 
"\n\tlt\t.*\n\tjne\t(\\.L\\d+)\n(.*\n)*\tcs\t.*\n\tber\t%r14\n\\1:\n\tjg\tbar\n"
 } } */
-- 
2.18.0



Re: [PATCH 10/25] Convert BImode vectors.

2018-09-05 Thread Richard Biener
On Wed, Sep 5, 2018 at 1:51 PM  wrote:
>
>
> GCN uses V64BImode to represent vector masks in the middle-end, and DImode
> bit-masks to represent them in the back-end.  These must be converted at 
> expand
> time and the most convenient way is to simply use a SUBREG.

x86 with AVX512 uses SImode in the middle-end as well via the get_mask_mode
vectorization target hook.  Maybe you can avoid another special-case
by piggy-backing on
that?

> This works fine except that simplify_subreg needs to be able to convert
> immediates, mostly for REG_EQUAL and REG_EQUIV, and currently does not know 
> how
> to convert vectors to integers where there is more than one element per byte.
>
> This patch implements such conversions for the cases that we need.
>
> I don't know why this is not a problem for other targets that use BImode
> vectors, such as ARM SVE, so it's possible I missed some magic somewhere?
>
> 2018-09-05  Andrew Stubbs  
>
> gcc/
> * simplify-rtx.c (convert_packed_vector): New function.
> (simplify_immed_subreg): Recognised Boolean vectors and call
> convert_packed_vector.
> ---
>  gcc/simplify-rtx.c | 76 
> ++
>  1 file changed, 76 insertions(+)
>


Re: [ARM/FDPIC v2 04/21] [ARM] FDPIC: Add support for FDPIC for arm architecture

2018-09-05 Thread Christophe Lyon
On Tue, 4 Sep 2018 at 17:29, Richard Earnshaw (lists)
 wrote:
>
> On 29/08/18 11:46, Kyrill Tkachov wrote:
> > Hi Christophe,
> >
> > On 13/07/18 17:10, christophe.l...@st.com wrote:
> >> From: Christophe Lyon 
> >>
> >> The FDPIC register is hard-coded to r9, as defined in the ABI.
> >>
> >> We have to disable tailcall optimizations if we don't know if the
> >> target function is in the same module. If not, we have to set r9 to
> >> the value associated with the target module.
> >>
> >> When generating a symbol address, we have to take into account whether
> >> it is a pointer to data or to a function, because different
> >> relocations are needed.
> >>
> >> 2018-XX-XX  Christophe Lyon  
> >> Mickaël Guêné 
> >>
> >> * config/arm/arm-c.c (__FDPIC__): Define new pre-processor macro
> >> in FDPIC mode.
> >> * config/arm/arm-protos.h (arm_load_function_descriptor): Declare
> >> new function.
> >> * config/arm/arm.c (arm_option_override): Define pic register to
> >> FDPIC_REGNUM.
> >> (arm_function_ok_for_sibcall) Disable sibcall optimization if we
> >> have no decl or go through PLT.
> >> (arm_load_pic_register): Handle TARGET_FDPIC.
> >> (arm_is_segment_info_known): New function.
> >> (arm_pic_static_addr): Add support for FDPIC.
> >> (arm_load_function_descriptor): New function.
> >> (arm_assemble_integer): Add support for FDPIC.
> >> * config/arm/arm.h (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED):
> >> Define. (FDPIC_REGNUM): New define.
> >> * config/arm/arm.md (call): Add support for FDPIC.
> >> (call_value): Likewise.
> >> (*restore_pic_register_after_call): New pattern.
> >> (untyped_call): Disable if FDPIC.
> >> (untyped_return): Likewise.
> >> * config/arm/unspecs.md (UNSPEC_PIC_RESTORE): New.
> >>
> >
> > In general, you can use SYMBOL_REF_P to check RTXes for SYMBOL_REF code.
> >
> >> Change-Id: Icee8484772f97ac6f3a9574df4aa4f25a8196786
> >>
> >> diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c
> >> index 4471f79..90733cc 100644
> >> --- a/gcc/config/arm/arm-c.c
> >> +++ b/gcc/config/arm/arm-c.c
> >> @@ -202,6 +202,8 @@ arm_cpu_builtins (struct cpp_reader* pfile)
> >>builtin_define ("__ARM_EABI__");
> >>  }
> >>
> >> +  def_or_undef_macro (pfile, "__FDPIC__", TARGET_FDPIC);
> >> +
> >>def_or_undef_macro (pfile, "__ARM_ARCH_EXT_IDIV__", TARGET_IDIV);
> >>def_or_undef_macro (pfile, "__ARM_FEATURE_IDIV", TARGET_IDIV);
> >>
> >> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> >> index 8537262..edebeb7 100644
> >> --- a/gcc/config/arm/arm-protos.h
> >> +++ b/gcc/config/arm/arm-protos.h
> >> @@ -134,6 +134,7 @@ extern int arm_max_const_double_inline_cost (void);
> >>  extern int arm_const_double_inline_cost (rtx);
> >>  extern bool arm_const_double_by_parts (rtx);
> >>  extern bool arm_const_double_by_immediates (rtx);
> >> +extern rtx arm_load_function_descriptor (rtx funcdesc);
> >>  extern void arm_emit_call_insn (rtx, rtx, bool);
> >>  bool detect_cmse_nonsecure_call (tree);
> >>  extern const char *output_call (rtx *);
> >> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> >> index c70be36..44c3b08 100644
> >> --- a/gcc/config/arm/arm.c
> >> +++ b/gcc/config/arm/arm.c
> >> @@ -3466,6 +3466,14 @@ arm_option_override (void)
> >>if (flag_pic && TARGET_VXWORKS_RTP)
> >>  arm_pic_register = 9;
> >>
> >> +  /* If in FDPIC mode then force arm_pic_register to be r9.  */
> >> +  if (TARGET_FDPIC)
> >> +{
> >> +  arm_pic_register = FDPIC_REGNUM;
> >> +  if (TARGET_ARM_ARCH < 7)
> >> +   error ("FDPIC mode is not supported on architectures older
> >> than 7");
> >
> > Armv7 rather than 7 please.
> >
>
> What about R and M profiles, especially armv8-m.base?

The whole toolchain has been used on R and M v7 profiles, but I
haven't tested specifically on v8.
I have noticed an issue with M profile in recent binutils, which I
plan to work on once the GCC patches have been pushed.

My hope was to be able to commit our existing level of support
upstream, and then work v8 or v6 problems if needed as bugs or
enhancements.

>
> R.
>
> >> +}
> >> +
> >>if (arm_pic_register_string != NULL)
> >>  {
> >>int pic_register = decode_reg_name (arm_pic_register_string);
> >> @@ -7247,6 +7255,21 @@ arm_function_ok_for_sibcall (tree decl, tree exp)
> >>if (cfun->machine->sibcall_blocked)
> >>  return false;
> >>
> >> +  if (TARGET_FDPIC)
> >> +{
> >> +  /* In FDPIC, never tailcall something for which we have no decl:
> >> +the target function could be in a different module, requiring
> >> +a different FDPIC register value.  */
> >> +  if (decl == NULL)
> >> +   return false;
> >> +
> >> +  /* Don't tailcall if we go through the PLT since the FDPIC
> >> +register is then corrupted and we don't restore it aft

Re: [PATCH v2] combine: perform jump threading at the end

2018-09-05 Thread Richard Biener
On Wed, Sep 5, 2018 at 2:01 PM Ilya Leoshkevich  wrote:
>
> gcc/ChangeLog:
>
> 2018-09-05  Ilya Leoshkevich  
>
> PR target/80080
> * combine.c (rest_of_handle_combine): Perform jump threading.
>
> gcc/testsuite/ChangeLog:
>
> 2018-09-05  Ilya Leoshkevich  
>
> PR target/80080
> * gcc.target/s390/pr80080-4.c: New test.
> ---
>  gcc/combine.c | 10 --
>  gcc/testsuite/gcc.target/s390/pr80080-4.c | 16 
>  2 files changed, 24 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/pr80080-4.c
>
> diff --git a/gcc/combine.c b/gcc/combine.c
> index a2649b6d5a1..818b4c5b77d 100644
> --- a/gcc/combine.c
> +++ b/gcc/combine.c
> @@ -14960,10 +14960,16 @@ rest_of_handle_combine (void)
> free_dominance_info (CDI_DOMINATORS);
>timevar_push (TV_JUMP);
>rebuild_jump_labels (get_insns ());
> -  cleanup_cfg (0);
> -  timevar_pop (TV_JUMP);
>  }
>
> +  /* Combining insns can change basic blocks in a way that they end up
> + containing a single jump_insn. This creates an opportunity to improve 
> code
> + with jump threading.  */
> +  cleanup_cfg (CLEANUP_THREADING);
> +
> +  if (rebuild_jump_labels_after_combine)
> +timevar_pop (TV_JUMP);

cleanup_cfg pushes its own timevar so it doesn't make sense to try covering it
with TV_JUMP.  And rebuild_jump_labels immediately pushes TV_REBUILD_JUMP.

So I suggest to remove the timevar_push/pop of TV_JUMP here.

No comment in general about the change, maybe we can detect transforms that
make jump-threading viable and conditionalize that properly?  Note the only
setter of CLEANUP_THREADING guards it with flag_thread_jumps so maybe better
do it above as well (avoids cost at -O0 for example).

Richard.

> +
>regstat_free_n_sets_and_refs ();
>return 0;
>  }
> diff --git a/gcc/testsuite/gcc.target/s390/pr80080-4.c 
> b/gcc/testsuite/gcc.target/s390/pr80080-4.c
> new file mode 100644
> index 000..91d31ec7845
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/pr80080-4.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=z196 -O2" } */
> +
> +extern void bar(int *mem);
> +
> +void foo4(int *mem)
> +{
> +  int oldval = 0;
> +  if (!__atomic_compare_exchange_n (mem, (void *) &oldval, 1,
> +   1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
> +{
> +  bar (mem);
> +}
> +}
> +
> +/* { dg-final { scan-assembler 
> "\n\tlt\t.*\n\tjne\t(\\.L\\d+)\n(.*\n)*\tcs\t.*\n\tber\t%r14\n\\1:\n\tjg\tbar\n"
>  } } */
> --
> 2.18.0
>


Re: [PATCH] Frame pointer for arm with THUMB2 mode

2018-09-05 Thread Wilco Dijkstra
Hi Denis,

> We are working on applying Address/LeakSanitizer for the full Tizen OS
> distribution. It's about ~1000 packages, ASan/LSan runtime is installed 
> to ld.so.preload. As we know ASan/LSan has interceptors for 
> allocators/deallocators such as (malloc/realloc/calloc/free) and so on.
> On every allocation from user space program, ASan calls
> GET_STACK_TRACE_MALLOC;
> which unwinds the stack frame, and by default uses frame based stack
> unwinder. So, it requires to build with "-fno-omit-frame-pointer", 
> switching it to default unwinder really hits the performance in our case.

So this sounds like the first thing to do is reducing the size of the stack 
traces.
The default is 30 which is far larger than useful. Using 1 for example should
always be fast (since you can use __builtin_return_address(0)) and still get
the function that called malloc. Also if the unwinder happens to be too slow,
it should be optimized and caching added etc.

>> Doing real unwinding is also far more accurate than frame pointer based
> > unwinding (the latter doesn't handle leaf functions correctly, 
> entry/exit in
> > non-leaf functions and shrinkwrapped functions - and this breaks 
> callgraph
> > profiling).
>
> I agree, but in our case, all interceptors for allocators are
> leaf functions, so the frame based stack unwinder works well for us.

Yes a frame chain would work for this case. But it's not currently supported.

> By default we build packages with ("-marm" "-fno-omit-frame-pointer"),
> because need frame based stack unwinder for every allocation, as I said
> before. As we know GCC sets fp to lr on the stack with 
> ("-fno-omit-frame-pointer" and "-marm") and I don't really know why.
> But the binary size is bigger than for thumb, so, we cannot use default 
> thumb frame pointer and want to reduce binary size for the full 
> sanitized image.

The issue is that the frame pointer and frame chain always add a large
overhead even when you do not use any sanitizers. This is especially bad
for the proposed patch - you lose much of the benefit of using Thumb-2...

Using normal unwinding means your code runs at full speed and still can be
used by the sanitizer.

> In other case clang works the same way, as I offered at the patch.
> It has the same issue, but it was fixed at the end of 2017
> https://bugs.llvm.org/show_bug.cgi?id=18505 (The topics starts from
> discussion about APCS, but it is not the main point.)
>
> Also, unresolved issue related to this
> https://github.com/google/sanitizers/issues/640

Adding support for a frame chain would require an ABI change. It would have to 
work across GCC, LLVM, Arm, Thumb-1 and Thumb-2 - not a trivial amount of
effort.

Wilco


Re: [PATCH 04/25] SPECIAL_REGNO_P

2018-09-05 Thread Joseph Myers
On Wed, 5 Sep 2018, a...@codesourcery.com wrote:

> This patch creates a new macro SPECIAL_REGNO_P which disables regrename.  In
> other words, the register is fixed once allocated.

Creating new target macros is generally suspect - the presumption is that 
target hooks should be used instead, unless it's clear the macro is part 
of a group of very closely related macros that should all become hooks at 
the same time (e.g. if adding a new one of the set of *_TYPE macros for 
standard typedefs, a macro would probably be appropriate rather than 
making just the new one into a hook).

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH 20/25] GCN libgcc.

2018-09-05 Thread Joseph Myers
On Wed, 5 Sep 2018, a...@codesourcery.com wrote:

> diff --git a/libgcc/config/gcn/crt0.c b/libgcc/config/gcn/crt0.c
> new file mode 100644
> index 000..f4f367b
> --- /dev/null
> +++ b/libgcc/config/gcn/crt0.c
> @@ -0,0 +1,23 @@
> +/* Copyright (C) 2017 Free Software Foundation, Inc.

Copyright ranges on all new files should include 2018.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH 10/25] Convert BImode vectors.

2018-09-05 Thread Andrew Stubbs

On 05/09/18 13:05, Richard Biener wrote:

On Wed, Sep 5, 2018 at 1:51 PM  wrote:



GCN uses V64BImode to represent vector masks in the middle-end, and DImode
bit-masks to represent them in the back-end.  These must be converted at expand
time and the most convenient way is to simply use a SUBREG.


x86 with AVX512 uses SImode in the middle-end as well via the get_mask_mode
vectorization target hook.  Maybe you can avoid another special-case
by piggy-backing on
that?


That's exactly what I wanted to do, but I found that returning 
non-vector modes ran into trouble further down the road.  I don't recall 
the exact details now, but there were assertion failures and failures to 
vectorize.


That was in a GCC 8 codebase though, so is the AVX thing a recent change?

Andrew


Re: [PATCH] Improve x % c1 == c2 expansion (PR middle-end/82853)

2018-09-05 Thread Richard Biener
On Tue, 4 Sep 2018, Jakub Jelinek wrote:

> Hi!
> 
> Improve expansion of x % c1 == c2 and x % c1 != c2 checks.
> 
> As mentioned in Hacker's Delight book, section 10-{16,17}, we can improve
> generated code for modulo by constant, if we only use the result to equality
> compare against some other constant (0 for all divisors, other constants
> only in certain cases at least so far).
> 
> Right now for modulo we usually emit a highpart multiplication (to get
> quotient) followed by normal multiplication by the quotient and subtraction
> (plus the comparison).
> 
> As the comment in the code try to explain, if c1 is odd and it is unsigned
> modulo, we can expand it as (x - c2) * c3 <= c4 where c3 is modular
> multiplicative inverse of c1 in the corresponding unsigned type and c4 is
> either -1U / c1 or one less than that, depending on the c2 value.
> If c1 is even, the patch uses r>> ctz (c1), but then supports only
> a subset of c2 values (only if the highest unsigned c2 values % c1
> don't yield 0).
> The patch also supports signed modulo, again both for odd and even c1,
> but only for c2 == 0.
> 
> The optimization is done during expansion using TER, and the patch computes
> cost of emitting normal modulo + cost of the c2 constant vs.
> cost of emitting the new optimized code + cost of the c4 constant.
> 
> The patch doesn't try to optimize if x is used in division or another modulo
> by the same constant in the same basic block, assuming that it is likely
> optimized into division + modulo combined operation or at least the high
> part multiply reused between the two.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> This optimization during those 2 bootstraps + regtests triggered 1239
> times (counted cases where the cost of optimized sequence was smaller
> than original), in particular 545x with unsigned modulo with odd c1, 51x with
> signed modulo with odd c1, 454x with unsigned modulo with even c1 and
> 189x with signed modulo with even c1.
> 
> In the PR there is somewhat bigger test coverage, but I'm not sure if it
> isn't too big to be included in the testsuite.  On a fast box each of the
> 6 tests takes 2-4 seconds to compile and runtime for each test is 2-18
> seconds (if skipping the 128-bit tests 2-6 seconds), the non-32/64-bit tests
> are 36-64KiB long, 128-bit tests 104-184KiB (plus it needs random ()).

IIRC you said we're already doing x % power-of-two == 0 optimized but the 
new
code isn't in that place?  Specifically you're looking at immediate
uses during RTL expansion which makes me a bit nervous.  The code
looks like it performs a GIMPLE transform so I think it should
be implemented as such (and thus also not need TER restrictions).

Our current GIMPLE kitchen-sink for pre-expand insn recog is
pass_optimize_widening_mul.

I'm not sure where regular jump RTL expansion happens but I
think I remeber some TER pattern stuff there as well.

So I'm not happy with more interwinding between RTL and GIMPLE
this way - I hoped this would be more-or-less a RTL transform
hooked into jumpif*_* (and store_flag).

Richard.

> 2018-09-04  Jakub Jelinek  
> 
>   PR middle-end/82853
>   * expr.h (maybe_optimize_mod_cmp): Declare.
>   * expr.c (mod_inv): New function.
>   (maybe_optimize_mod_cmp): New function.
>   (do_store_flag): Use it.
>   * cfgexpand.c (expand_gimple_cond): Likewise.
> 
>   * gcc.target/i386/pr82853-1.c: New test.
>   * gcc.target/i386/pr82853-2.c: New test.
> 
> --- gcc/expr.h.jj 2018-08-29 23:36:15.806122967 +0200
> +++ gcc/expr.h2018-09-04 09:38:35.215881588 +0200
> @@ -290,6 +290,8 @@ expand_normal (tree exp)
> a string constant.  */
>  extern tree string_constant (tree, tree *, tree *, tree *);
>  
> +extern enum tree_code maybe_optimize_mod_cmp (enum tree_code, tree *, tree 
> *);
> +
>  /* Two different ways of generating switch statements.  */
>  extern int try_casesi (tree, tree, tree, tree, rtx, rtx, rtx, 
> profile_probability);
>  extern int try_tablejump (tree, tree, tree, tree, rtx, rtx, 
> profile_probability);
> --- gcc/expr.c.jj 2018-08-29 23:36:15.806122967 +0200
> +++ gcc/expr.c2018-09-04 12:31:37.538106639 +0200
> @@ -11491,6 +11491,241 @@ string_constant (tree arg, tree *ptr_off
>return init;
>  }
>  
> +/* Compute the modular multiplicative inverse of A modulo M
> +   using extended Euclid's algorithm.  Assumes A and M are coprime.  */
> +static wide_int
> +mod_inv (const wide_int &a, const wide_int &b)
> +{
> +  /* Verify the assumption.  */
> +  gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1));
> +
> +  unsigned int p = a.get_precision () + 1;
> +  gcc_checking_assert (b.get_precision () + 1 == p);
> +  wide_int c = wide_int::from (a, p, UNSIGNED);
> +  wide_int d = wide_int::from (b, p, UNSIGNED);
> +  wide_int x0 = wide_int::from (0, p, UNSIGNED);
> +  wide_int x1 = wide_int::from (1, p, UNSIGNED);
> +
> +  if (wi::eq_p (b, 1))
> +return wide_int::from (1, p,

Re: [PATCH 10/25] Convert BImode vectors.

2018-09-05 Thread Richard Biener
On Wed, Sep 5, 2018 at 2:40 PM Andrew Stubbs  wrote:
>
> On 05/09/18 13:05, Richard Biener wrote:
> > On Wed, Sep 5, 2018 at 1:51 PM  wrote:
> >>
> >>
> >> GCN uses V64BImode to represent vector masks in the middle-end, and DImode
> >> bit-masks to represent them in the back-end.  These must be converted at 
> >> expand
> >> time and the most convenient way is to simply use a SUBREG.
> >
> > x86 with AVX512 uses SImode in the middle-end as well via the get_mask_mode
> > vectorization target hook.  Maybe you can avoid another special-case
> > by piggy-backing on
> > that?
>
> That's exactly what I wanted to do, but I found that returning
> non-vector modes ran into trouble further down the road.  I don't recall
> the exact details now, but there were assertion failures and failures to
> vectorize.
>
> That was in a GCC 8 codebase though, so is the AVX thing a recent change?

No.  You might want to look into the x86 backend if there's maybe more tweaks
needed when using non-vector mask modes.

Richard.

> Andrew


Re: VRP: abstract out wide int CONVERT_EXPR_P code

2018-09-05 Thread Michael Matz
Hi,

On Tue, 4 Sep 2018, Aldy Hernandez wrote:

> > to make the result ~[0, 5], is it?  At least the original code dropped
> > that to VARYING.  For the same reason truncating [3, 765] from
> > short to unsigned char isn't [3, 253].  But maybe I'm missing something.

Sorry for chiming in, but this catched my eye:

> Correct, but in that case we will realize that in wide_int_range_convert and
> refuse to do the conversion:
> 
>   /* If the conversion is not truncating we can convert the min and
>  max values and canonicalize the resulting range.  Otherwise we
>  can do the conversion if the size of the range is less than what
>  the precision of the target type can represent.  */
>   if (outer_prec >= inner_prec
>   || wi::rshift (wi::sub (vr0_max, vr0_min),
>wi::uhwi (outer_prec, inner_prec),
>inner_sign) == 0)
(followed by this code:)
+{
+  min = widest_int::from (vr0_min, inner_sign);
+  max = widest_int::from (vr0_max, inner_sign);
+  widest_int min_value
+   = widest_int::from (wi::min_value (outer_prec, outer_sign),outer_sign);
+  widest_int max_value
+   = widest_int::from (wi::max_value (outer_prec, outer_sign),outer_sign);
+  return !wi::eq_p (min, min_value) || !wi::eq_p (max, max_value);
+}
+  return false;

How can this test and following code catch all problematic cases?  Assume 
a range of [253..257], truncating to 8 bits unsigned.  The size of the 
range is 5 (not 4 as the code above calculates), well representable in 8 
bits.  Nevertheless, the min of the new range isn't 253, nor is the max of 
the new range 257.  In fact the new range must be [0..255] (if you have no 
multi ranges or anti-ranges).  So whatever this function is supposed to 
do (what btw?), it certainly does not convert a range.


Ciao,
Michael.


Re: [PATCH] Improve x % c1 == c2 expansion (PR middle-end/82853)

2018-09-05 Thread Jakub Jelinek
On Wed, Sep 05, 2018 at 02:42:36PM +0200, Richard Biener wrote:
> IIRC you said we're already doing x % power-of-two == 0 optimized but the 
> new
> code isn't in that place?

For unsigned %, there is no need for anything special, we just
expand that as x % (power-of-two - 1) == 0, as any other % power-of-two.

For signed x % power-of-two == N, Kyrill posted a patch 3 years ago
and it could be handled in the same spot as this too.  x % power-of-two == 0
can be expanded as (unsigned) x % (unsigned) power-of-two == 0, for
x % power-of-two == N where N is in [1 .. power-of-two - 1] we can
expand it (if cheaper) as x & (msb | (power-of-two - 1)) == N and
x % power-of-two == N where N is in [-power-of-two + 1, -1] we could
do x & (msb | (power-of-two - 1)) == (N & (msb | (power-of-two - 1))).

> Specifically you're looking at immediate
> uses during RTL expansion which makes me a bit nervous.  The code

What's wrong about it?  We don't kill the gimple statements until expansion
is done.  Plus it is just a heuristic, not affecting emitted code
functionality, it just picks whether we emit it one way or another
(and only checks uses in the current bb); cfgexpand.c also uses
FOR_EACH_IMM_USE_FAST in multiple spots.

> looks like it performs a GIMPLE transform so I think it should
> be implemented as such (and thus also not need TER restrictions).

It isn't really a GIMPLE transform, just needs to provide the callers
with a tree.  Specifically, if it passes several initial checks, it
expands the X expression unconditionally and then just uses the result of
that.  What I could and probably should do is to just emit the cheaper
of the two sequences and make_tree the result thereof, so the caller
would emit only the actual comparison, rather than the modulo or its
replacement again too as the patch does.

> Our current GIMPLE kitchen-sink for pre-expand insn recog is
> pass_optimize_widening_mul.

We don't do anything close to this there though, use the expander to expand
quite complex tree expressions into RTL.  I'm afraid the expander isn't even
sufficiently initialized there, we'd need to rewrite the expressions into
trees using magic VAR_DECLs that would map to some virtual pseudos, etc.
The only similar case to what we'd need is in ivopts, and that has quite
some code to make that work.

> I'm not sure where regular jump RTL expansion happens but I
> think I remeber some TER pattern stuff there as well.

expand_gimple_cond has:
We're sometimes presented with such code:
   D.123_1 = x < y;
   if (D.123_1 != 0)
... TER stuff in it, and this patch is done next to it (with do_store_flag
being another spot).  Sticking the optimization later would be much harder
(I mean e.g. into jumpif*_*.

Jakub


[PATCH][OBVIOUS] Close file on return from verify-intermediate

2018-09-05 Thread Joey Ye
This is a fix to an obvious issue in gcov.exp, where proc verify-intermediate 
returns without closing the open file.

This can be a possible fix to PR85871. gcov-8.C diffs to other gcov testcases 
that it invokes verify-intermediate. Not closing an open file may result in 
random failure quietly.

It is only a possible fix as I failed to reproduce the PR85871 random failure 
in my local machine despite continuous testing of multiple days. So I cannot 
verify if this patch fixes the regression either.

To verify, https://gcc.gnu.org/ml/gcc-testresults/ need to be watched whether 
gcov-8 regression will disappear completely one month after this patch 
committed to trunk.

Tested with make check with no new regressions.

OK to trunk?

testsuite/ChangeLog:
2018-09-05  Joey Ye  

    * lib/gcov.exp (verify-intermediate): Add missing close.


gcov-20180905.patch
Description: gcov-20180905.patch


Re: [PATCH 05/25] Add sorry_at diagnostic function.

2018-09-05 Thread David Malcolm
On Wed, 2018-09-05 at 12:49 +0100, a...@codesourcery.com wrote:
> The plain "sorry" diagnostic only gives the "current" location, which
> is
> typically the last line of the function or translation unit by time
> we get to
> the back end.
> 
> GCN uses "sorry" to report unsupported language features, such as
> static
> constructors, so it's useful to have a "sorry_at" variant.
> 
> This patch implements "sorry_at" according to the pattern of the
> other "at"
> variants.
> 
> 2018-09-05  Andrew Stubbs  
> 
>   gcc/
>   * diagnostic-core.h (sorry_at): New prototype.
>   * diagnostic.c (sorry_at): New function.
> ---
>  gcc/diagnostic-core.h |  1 +
>  gcc/diagnostic.c  | 11 +++
>  2 files changed, 12 insertions(+)

OK, thanks.


Re: [PATCH 05/25] Add sorry_at diagnostic function.

2018-09-05 Thread David Malcolm
On Wed, 2018-09-05 at 09:39 -0400, David Malcolm wrote:
> On Wed, 2018-09-05 at 12:49 +0100, a...@codesourcery.com wrote:
> > The plain "sorry" diagnostic only gives the "current" location,
> > which
> > is
> > typically the last line of the function or translation unit by time
> > we get to
> > the back end.
> > 
> > GCN uses "sorry" to report unsupported language features, such as
> > static
> > constructors, so it's useful to have a "sorry_at" variant.
> > 
> > This patch implements "sorry_at" according to the pattern of the
> > other "at"
> > variants.
> > 
> > 2018-09-05  Andrew Stubbs  
> > 
> > gcc/
> > * diagnostic-core.h (sorry_at): New prototype.
> > * diagnostic.c (sorry_at): New function.
> > ---
> >  gcc/diagnostic-core.h |  1 +
> >  gcc/diagnostic.c  | 11 +++
> >  2 files changed, 12 insertions(+)
> 
> OK, thanks.

Actually, not quite: "sorry" has changed slightly on trunk since you
copied it.

Please add the:

  auto_diagnostic_group d;

line to the top of the function.

OK with that change.


Re: [PATCH] Frame pointer for arm with THUMB2 mode

2018-09-05 Thread Denis Khalikov

Hi Wilco,
thanks for the answer.

> Adding support for a frame chain would require an ABI change. It 
would have to

> work across GCC, LLVM, Arm, Thumb-1 and Thumb-2 - not a trivial amount of
> effort.

Clang already works that way.
Please look at this commit:
http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMCallingConv.td?r1=269459&r2=269458&pathrev=269459


This is an example of code which clang generates
with -mthumb -fno-omit-frame-pointer -O2.

 @ %bb.0:
 push{r4, r5, r6, r7, lr}
 add r7, sp, #12
 push.w  {r8, r9, r10}
 sub sp, #56
 mov r8, r0
 movwr0, :lower16:_ZTVN6sensor14sensor_handlerE
 movtr0, :upper16:_ZTVN6sensor14sensor_handlerE
 mov r9, r8
 addsr0, #8
 str r0, [r9], #4

The only difference is clang sets frame pointer to the r7 on the stack 
instead gcc sets to lr, but it already handles by the Sanitizers.


>
> So this sounds like the first thing to do is reducing the size of the 
stack traces.
> The default is 30 which is far larger than useful. Using 1 for 
example should
> always be fast (since you can use __builtin_return_address(0)) and 
still get
> the function that called malloc. Also if the unwinder happens to be 
too slow,

> it should be optimized and caching added etc.
>
If we change the size of the traces to 2, it could be something like this:

0xb42a50a0 is located 0 bytes inside of 88-byte region 
[0xb42a50a0,0xb42a50f8)

freed by thread T0 here:
#0 0xb6a35cc7 in __interceptor_free (/usr/lib/libasan.so+0x127cc7)
#1 0xb5fa64e3 in ipc::message::unref() 
(/lib/libsensord-shared.so+0xe4e3)


previously allocated by thread T0 here:
#0 0xb6a36157 in malloc (/usr/lib/libasan.so+0x128157)
#1 0xb2f8852d in accel_device::get_data(unsigned int, 
sensor_data_t**, int*) (/usr/lib/sensor/libsensor-hal-tm1.so+0x1052d)


Instead this:

0xb42250a0 is located 0 bytes inside of 88-byte region 
[0xb42250a0,0xb42250f8)

freed by thread T0 here:
#0 0xb6989cff in __interceptor_free (/usr/lib/libasan.so+0x127cff)
#1 0xb5fa64e3 in ipc::message::unref() 
(/lib/libsensord-shared.so+0xe4e3)
#2 0xb6efbf47 in sensor::sensor_handler::notify(char const*, 
sensor_data_t*, int) (/usr/bin/sensord+0x1ef47)
#3 0xb6efad43 in sensor::sensor_event_handler::handle(int, unsigned 
int) (/usr/bin/sensord+0x1dd43)

#4 0xb5fa3bbb  (/lib/libsensord-shared.so+0x)
#5 0xb62d9a15 in g_main_context_dispatch 
(/lib/libglib-2.0.so.0+0x91a15)

#6 0xb62da2d9  (/lib/libglib-2.0.so.0+0x922d9)
#7 0xb62da9a9 in g_main_loop_run (/lib/libglib-2.0.so.0+0x929a9)
#8 0xb5fa4e1b in ipc::event_loop::run(int) 
(/lib/libsensord-shared.so+0xce1b)

#9 0xb6eec9a5 in main (/usr/bin/sensord+0xf9a5)
#10 0xb5cb663b in __libc_start_main (/lib/libc.so.6+0x1663b)

previously allocated by thread T0 here:
#0 0xb698a18f in malloc (/usr/lib/libasan.so+0x12818f)
#1 0xb2f8852d in accel_device::get_data(unsigned int, 
sensor_data_t**, int*) (/usr/lib/sensor/libsensor-hal-tm1.so+0x1052d)
#2 0xb6ef848f in 
sensor::physical_sensor_handler::get_data(sensor_data_t**, int*) 
(/usr/bin/sensord+0x1b48f)
#3 0xb6efaa51 in sensor::sensor_event_handler::handle(int, unsigned 
int) (/usr/bin/sensord+0x1da51)

#4 0xb5fa3bbb  (/lib/libsensord-shared.so+0x)
#5 0xb62d9a15 in g_main_context_dispatch 
(/lib/libglib-2.0.so.0+0x91a15)

#6 0xb62da2d9  (/lib/libglib-2.0.so.0+0x922d9)
#7 0xb62da9a9 in g_main_loop_run (/lib/libglib-2.0.so.0+0x929a9)
#8 0xb5fa4e1b in ipc::event_loop::run(int) 
(/lib/libsensord-shared.so+0xce1b)

#9 0xb6eec9a5 in main (/usr/bin/sensord+0xf9a5)
#10 0xb5cb663b in __libc_start_main (/lib/libc.so.6+0x1663b)


At the first example we lost the full context, from where the 
control/data flow comes from.


>
> The issue is that the frame pointer and frame chain always add a large
> overhead even when you do not use any sanitizers. This is especially bad
> for the proposed patch - you lose much of the benefit of using Thumb-2...
>

The stack layout like this enables only with compile time flag 
(-mthumb-fp and works only together with -mthumb and

-fno-omit-frame-pointer). It does not affect other codegen.


Thanks.


On 09/05/2018 03:11 PM, Wilco Dijkstra wrote:

Hi Denis,


We are working on applying Address/LeakSanitizer for the full Tizen OS
distribution. It's about ~1000 packages, ASan/LSan runtime is installed
to ld.so.preload. As we know ASan/LSan has interceptors for
allocators/deallocators such as (malloc/realloc/calloc/free) and so on.
On every allocation from user space program, ASan calls
GET_STACK_TRACE_MALLOC;
which unwinds the stack frame, and by default uses frame based stack
unwinder. So, it requires to build with "-fno-omit-frame-pointer",
switching it to default unwinder really hits the performance in our case.


So this sounds like the first thing to do is reducing the size of the stack 
traces.
The defau

Re: [PATCH] Improve x % c1 == c2 expansion (PR middle-end/82853)

2018-09-05 Thread Jakub Jelinek
On Wed, Sep 05, 2018 at 03:14:40PM +0200, Jakub Jelinek wrote:
> It isn't really a GIMPLE transform, just needs to provide the callers
> with a tree.  Specifically, if it passes several initial checks, it
> expands the X expression unconditionally and then just uses the result of
> that.  What I could and probably should do is to just emit the cheaper
> of the two sequences and make_tree the result thereof, so the caller
> would emit only the actual comparison, rather than the modulo or its
> replacement again too as the patch does.

In particular I meant this incremental patch, where the effect is really
that the first argument of the modulo is expanded unconditionally once,
the two possible variants are then each expanded into a sequence, then based
on cost we decide which one to use, emit it (the other one is GCed) and
let the callers emit the comparison.

--- gcc/expr.c.jj   2018-09-05 15:34:10.395601918 +0200
+++ gcc/expr.c  2018-09-05 15:47:00.609834755 +0200
@@ -11716,12 +11716,13 @@ maybe_optimize_mod_cmp (enum tree_code c
 
   if (mocost <= mucost)
 {
-  *arg0 = build2_loc (loc, TRUNC_MOD_EXPR, TREE_TYPE (treeop0),
- treeop0, treeop1);
+  emit_insn (moinsns);
+  *arg0 = make_tree (TREE_TYPE (*arg0), mor);
   return code;
 }
 
-  *arg0 = t;
+  emit_insn (muinsns);
+  *arg0 = make_tree (type, mur);
   *arg1 = c4;
   return code == EQ_EXPR ? LE_EXPR : GT_EXPR;
 }

Jakub


Re: [PATCH] Frame pointer for arm with THUMB2 mode

2018-09-05 Thread Denis Khalikov

Hi Wilco,
thanks for the answer.

> Adding support for a frame chain would require an ABI change. It 
would have to

> work across GCC, LLVM, Arm, Thumb-1 and Thumb-2 - not a trivial amount of
> effort.

Clang already works that way.
Please look at this commit:
http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMCallingConv.td?r1=269459&r2=269458&pathrev=269459

This is an example of code which clang generates
with -mthumb -fno-omit-frame-pointer -O2.

 @ %bb.0:
 push{r4, r5, r6, r7, lr}
 add r7, sp, #12
 push.w  {r8, r9, r10}
 sub sp, #56
 mov r8, r0
 movwr0, :lower16:_ZTVN6sensor14sensor_handlerE
 movtr0, :upper16:_ZTVN6sensor14sensor_handlerE
 mov r9, r8
 addsr0, #8
 str r0, [r9], #4

The only difference is clang sets frame pointer to the r7 on the stack 
instead gcc sets to lr,

but it already handles by the Sanitizers.

>
> So this sounds like the first thing to do is reducing the size of the 
stack traces.
> The default is 30 which is far larger than useful. Using 1 for 
example should
> always be fast (since you can use __builtin_return_address(0)) and 
still get
> the function that called malloc. Also if the unwinder happens to be 
too slow,

> it should be optimized and caching added etc.
>

If we change the size of the traces to 2, it could be something like this:

0xb42a50a0 is located 0 bytes inside of 88-byte region 
[0xb42a50a0,0xb42a50f8)

freed by thread T0 here:
#0 0xb6a35cc7 in __interceptor_free (/usr/lib/libasan.so+0x127cc7)
#1 0xb5fa64e3 in ipc::message::unref() 
(/lib/libsensord-shared.so+0xe4e3)


previously allocated by thread T0 here:
#0 0xb6a36157 in malloc (/usr/lib/libasan.so+0x128157)
#1 0xb2f8852d in accel_device::get_data(unsigned int, 
sensor_data_t**, int*) (/usr/lib/sensor/libsensor-hal-tm1.so+0x1052d)


Instead this:

0xb42250a0 is located 0 bytes inside of 88-byte region 
[0xb42250a0,0xb42250f8)

freed by thread T0 here:
#0 0xb6989cff in __interceptor_free (/usr/lib/libasan.so+0x127cff)
#1 0xb5fa64e3 in ipc::message::unref() 
(/lib/libsensord-shared.so+0xe4e3)
#2 0xb6efbf47 in sensor::sensor_handler::notify(char const*, 
sensor_data_t*, int) (/usr/bin/sensord+0x1ef47)
#3 0xb6efad43 in sensor::sensor_event_handler::handle(int, unsigned 
int) (/usr/bin/sensord+0x1dd43)

#4 0xb5fa3bbb  (/lib/libsensord-shared.so+0x)
#5 0xb62d9a15 in g_main_context_dispatch 
(/lib/libglib-2.0.so.0+0x91a15)

#6 0xb62da2d9  (/lib/libglib-2.0.so.0+0x922d9)
#7 0xb62da9a9 in g_main_loop_run (/lib/libglib-2.0.so.0+0x929a9)
#8 0xb5fa4e1b in ipc::event_loop::run(int) 
(/lib/libsensord-shared.so+0xce1b)

#9 0xb6eec9a5 in main (/usr/bin/sensord+0xf9a5)
#10 0xb5cb663b in __libc_start_main (/lib/libc.so.6+0x1663b)

previously allocated by thread T0 here:
#0 0xb698a18f in malloc (/usr/lib/libasan.so+0x12818f)
#1 0xb2f8852d in accel_device::get_data(unsigned int, 
sensor_data_t**, int*) (/usr/lib/sensor/libsensor-hal-tm1.so+0x1052d)
#2 0xb6ef848f in 
sensor::physical_sensor_handler::get_data(sensor_data_t**, int*) 
(/usr/bin/sensord+0x1b48f)
#3 0xb6efaa51 in sensor::sensor_event_handler::handle(int, unsigned 
int) (/usr/bin/sensord+0x1da51)

#4 0xb5fa3bbb  (/lib/libsensord-shared.so+0x)
#5 0xb62d9a15 in g_main_context_dispatch 
(/lib/libglib-2.0.so.0+0x91a15)

#6 0xb62da2d9  (/lib/libglib-2.0.so.0+0x922d9)
#7 0xb62da9a9 in g_main_loop_run (/lib/libglib-2.0.so.0+0x929a9)
#8 0xb5fa4e1b in ipc::event_loop::run(int) 
(/lib/libsensord-shared.so+0xce1b)

#9 0xb6eec9a5 in main (/usr/bin/sensord+0xf9a5)
#10 0xb5cb663b in __libc_start_main (/lib/libc.so.6+0x1663b)


At the first example we lost the full context, from where the 
control/data flow comes from.


>
> The issue is that the frame pointer and frame chain always add a large
> overhead even when you do not use any sanitizers. This is especially bad
> for the proposed patch - you lose much of the benefit of using Thumb-2...
>

The stack layout like this enables only with compile time flag 
(-mthumb-fp and works only together with -mthumb and

-fno-omit-frame-pointer). It does not affect other codegen.

Thanks.

On 09/05/2018 03:11 PM, Wilco Dijkstra wrote:

Hi Denis,


We are working on applying Address/LeakSanitizer for the full Tizen OS
distribution. It's about ~1000 packages, ASan/LSan runtime is installed
to ld.so.preload. As we know ASan/LSan has interceptors for
allocators/deallocators such as (malloc/realloc/calloc/free) and so on.
On every allocation from user space program, ASan calls
GET_STACK_TRACE_MALLOC;
which unwinds the stack frame, and by default uses frame based stack
unwinder. So, it requires to build with "-fno-omit-frame-pointer",
switching it to default unwinder really hits the performance in our case.


So this sounds like the first thing to do is reducing the size of the stack 
traces.
The default

Re: VRP: abstract out wide int CONVERT_EXPR_P code

2018-09-05 Thread Aldy Hernandez




On 09/05/2018 08:58 AM, Michael Matz wrote:

Hi,

On Tue, 4 Sep 2018, Aldy Hernandez wrote:


to make the result ~[0, 5], is it?  At least the original code dropped
that to VARYING.  For the same reason truncating [3, 765] from
short to unsigned char isn't [3, 253].  But maybe I'm missing something.


Sorry for chiming in, but this catched my eye:


No apologies needed.  I welcome all masochists to join me in my personal 
hell :).





Correct, but in that case we will realize that in wide_int_range_convert and
refuse to do the conversion:

   /* If the conversion is not truncating we can convert the min and
  max values and canonicalize the resulting range.  Otherwise we
  can do the conversion if the size of the range is less than what
  the precision of the target type can represent.  */
   if (outer_prec >= inner_prec
   || wi::rshift (wi::sub (vr0_max, vr0_min),
 wi::uhwi (outer_prec, inner_prec),
 inner_sign) == 0)

(followed by this code:)
+{
+  min = widest_int::from (vr0_min, inner_sign);
+  max = widest_int::from (vr0_max, inner_sign);
+  widest_int min_value
+   = widest_int::from (wi::min_value (outer_prec, outer_sign),outer_sign);
+  widest_int max_value
+   = widest_int::from (wi::max_value (outer_prec, outer_sign),outer_sign);
+  return !wi::eq_p (min, min_value) || !wi::eq_p (max, max_value);
+}
+  return false;

How can this test and following code catch all problematic cases?  Assume
a range of [253..257], truncating to 8 bits unsigned.  The size of the
range is 5 (not 4 as the code above calculates), well representable in 8
bits.  Nevertheless, the min of the new range isn't 253, nor is the max of
the new range 257.  In fact the new range must be [0..255] (if you have no
multi ranges or anti-ranges).  So whatever this function is supposed to
do (what btw?), it certainly does not convert a range.


Welcome to the wonderful world of anti ranges, where nothing is as it seems.

First you're not looking at what's currently in mainline.  Richard 
approved the first incantation of this code.  But even so, I believe the 
above is correct as well.


What you're missing is the call to set_and_canonicalize_value_range in 
the caller which will transform ranges into anti-ranges when the 
returned range has min/max swapped:


 if (wide_int_range_convert (wmin, wmax,
  inner_sign, inner_prec,
  outer_sign, outer_prec,
  vr0_min, vr0_max))
{
  tree min = wide_int_to_tree (outer_type, wmin);
  tree max = wide_int_to_tree (outer_type, wmax);
  set_and_canonicalize_value_range (vr, vr_type, min, max, NULL);
}


Take your truncating example of [253, 257].  Wide_int_range_convert will 
return [253,1] and set_and_canonicalize_value_range will transform this 
into ~[2, 252].


See:
  /* Wrong order for min and max, to swap them and the VR type we need
 to adjust them.  */
  if (tree_int_cst_lt (max, min))

Also, I took care of documenting this need for canonizing in my code:

...
   Caller is responsible for canonicalizing the resulting range.  */

If desired, I could further document that the range returned may have 
its contents swapped and that is why it needs canonizing.  But hey, that 
shit was already broken when I got here ;-).


If you follow the code in tree-vrp.c before my patch you will see that 
the path is the same... we calculate a nonsensical range of [253, 1] and 
then massage it into ~[2, 252] in set_and_canonicalize_value_range.


I think the fact that it's so hard to get all this right, is a strong 
argument for getting rid of anti ranges.  Almost all the bugs or 
oversights I've fixed over the past months in VRP have been related to 
anti ranges.


Aldy


Re: [PATCH] Frame pointer for arm with THUMB2 mode

2018-09-05 Thread Richard Earnshaw (lists)
On 05/09/18 14:55, Denis Khalikov wrote:
> Hi Wilco,
> thanks for the answer.
> 
>> Adding support for a frame chain would require an ABI change. It would
> have to
>> work across GCC, LLVM, Arm, Thumb-1 and Thumb-2 - not a trivial amount of
>> effort.
> 
> Clang already works that way.


> Please look at this commit:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMCallingConv.td?r1=269459&r2=269458&pathrev=269459
> 
> 
> This is an example of code which clang generates
> with -mthumb -fno-omit-frame-pointer -O2.
> 
>  @ %bb.0:
>  push    {r4, r5, r6, r7, lr}
>  add r7, sp, #12
>  push.w  {r8, r9, r10}
>  sub sp, #56
>  mov r8, r0
>  movw    r0, :lower16:_ZTVN6sensor14sensor_handlerE
>  movt    r0, :upper16:_ZTVN6sensor14sensor_handlerE
>  mov r9, r8
>  adds    r0, #8
>  str r0, [r9], #4
> 
> The only difference is clang sets frame pointer to the r7 on the stack
> instead gcc sets to lr,
> but it already handles by the Sanitizers.

Then Clang is broken.  You can't have different frame pointers in Arm
and Thumb code.

I object to another hack going in for another ill-specified frame
pointer variant until such time as the ABI is updated to sort this out
properly.  The frame handling code is just too critical to overall
performance and the prologue and epilogue code itself is also quite
fragile in this respect.  Adding more mess on top of that is going to
make sorting this out even more tricky: and once a patch like this goes
on it's not easy to remove it again.

So until the ABI sanctions a proper inter-function frame chain record,
GCC will only support local use of the frame pointer and no chaining.

R.

> 
>>
>> So this sounds like the first thing to do is reducing the size of the
> stack traces.
>> The default is 30 which is far larger than useful. Using 1 for example
> should
>> always be fast (since you can use __builtin_return_address(0)) and
> still get
>> the function that called malloc. Also if the unwinder happens to be
> too slow,
>> it should be optimized and caching added etc.
>>
> 
> If we change the size of the traces to 2, it could be something like this:
> 
> 0xb42a50a0 is located 0 bytes inside of 88-byte region
> [0xb42a50a0,0xb42a50f8)
> freed by thread T0 here:
>     #0 0xb6a35cc7 in __interceptor_free (/usr/lib/libasan.so+0x127cc7)
>     #1 0xb5fa64e3 in ipc::message::unref()
> (/lib/libsensord-shared.so+0xe4e3)
> 
> previously allocated by thread T0 here:
>     #0 0xb6a36157 in malloc (/usr/lib/libasan.so+0x128157)
>     #1 0xb2f8852d in accel_device::get_data(unsigned int,
> sensor_data_t**, int*) (/usr/lib/sensor/libsensor-hal-tm1.so+0x1052d)
> 
> Instead this:
> 
> 0xb42250a0 is located 0 bytes inside of 88-byte region
> [0xb42250a0,0xb42250f8)
> freed by thread T0 here:
>     #0 0xb6989cff in __interceptor_free (/usr/lib/libasan.so+0x127cff)
>     #1 0xb5fa64e3 in ipc::message::unref()
> (/lib/libsensord-shared.so+0xe4e3)
>     #2 0xb6efbf47 in sensor::sensor_handler::notify(char const*,
> sensor_data_t*, int) (/usr/bin/sensord+0x1ef47)
>     #3 0xb6efad43 in sensor::sensor_event_handler::handle(int, unsigned
> int) (/usr/bin/sensord+0x1dd43)
>     #4 0xb5fa3bbb  (/lib/libsensord-shared.so+0x)
>     #5 0xb62d9a15 in g_main_context_dispatch
> (/lib/libglib-2.0.so.0+0x91a15)
>     #6 0xb62da2d9  (/lib/libglib-2.0.so.0+0x922d9)
>     #7 0xb62da9a9 in g_main_loop_run (/lib/libglib-2.0.so.0+0x929a9)
>     #8 0xb5fa4e1b in ipc::event_loop::run(int)
> (/lib/libsensord-shared.so+0xce1b)
>     #9 0xb6eec9a5 in main (/usr/bin/sensord+0xf9a5)
>     #10 0xb5cb663b in __libc_start_main (/lib/libc.so.6+0x1663b)
> 
> previously allocated by thread T0 here:
>     #0 0xb698a18f in malloc (/usr/lib/libasan.so+0x12818f)
>     #1 0xb2f8852d in accel_device::get_data(unsigned int,
> sensor_data_t**, int*) (/usr/lib/sensor/libsensor-hal-tm1.so+0x1052d)
>     #2 0xb6ef848f in
> sensor::physical_sensor_handler::get_data(sensor_data_t**, int*)
> (/usr/bin/sensord+0x1b48f)
>     #3 0xb6efaa51 in sensor::sensor_event_handler::handle(int, unsigned
> int) (/usr/bin/sensord+0x1da51)
>     #4 0xb5fa3bbb  (/lib/libsensord-shared.so+0x)
>     #5 0xb62d9a15 in g_main_context_dispatch
> (/lib/libglib-2.0.so.0+0x91a15)
>     #6 0xb62da2d9  (/lib/libglib-2.0.so.0+0x922d9)
>     #7 0xb62da9a9 in g_main_loop_run (/lib/libglib-2.0.so.0+0x929a9)
>     #8 0xb5fa4e1b in ipc::event_loop::run(int)
> (/lib/libsensord-shared.so+0xce1b)
>     #9 0xb6eec9a5 in main (/usr/bin/sensord+0xf9a5)
>     #10 0xb5cb663b in __libc_start_main (/lib/libc.so.6+0x1663b)
> 
> 
> At the first example we lost the full context, from where the
> control/data flow comes from.
> 
>>
>> The issue is that the frame pointer and frame chain always add a large
>> overhead even when you do not use any sanitizers. This is especially bad
>> for the proposed patch - you lose much of the benefit of using Thumb-2...
>>
> 
> The stack la

[PATCH] C++: special-case single non-viable candidate (more PR c++/85110)

2018-09-05 Thread David Malcolm
On Thu, 2018-08-30 at 18:18 -0400, Jason Merrill wrote:
> On Thu, Aug 23, 2018 at 2:08 PM, David Malcolm 
> wrote:
> > This is a followup to:
> >
> >   "[PATCH] C++: underline param in print_conversion_rejection (more
> > PR c++/85110)"
> >  https://gcc.gnu.org/ml/gcc-patches/2018-08/msg01480.html
> >
> > to highlight the pertinent argument in a unmatched function call
> > for which there is one candidate.
> >
> > It updates the output from:
> >
> > demo.cc: In function 'int test_4(int, const char*, float)':
> > demo.cc:5:44: error: no matching function for call to
> > 's4::member_1(int&, const char*&, float&)'
> > 5 |   return s4::member_1 (first, second, third);
> >   |^
> > demo.cc:1:24: note: candidate: 'static int s4::member_1(int, const
> > char**, float)'
> > 1 | struct s4 { static int member_1 (int one, const char **two,
> > float three); };
> >   |^~~~
> > demo.cc:1:56: note:   no known conversion for argument 2 from
> > 'const char*' to 'const char**'
> > 1 | struct s4 { static int member_1 (int one, const char **two,
> > float three); };
> >   |   ~^~~
> >
> > to:
> >
> > demo.cc: In function 'int test_4(int, const char*, float)':
> > demo.cc:5:31: error: no matching function for call to
> > 's4::member_1(int&, const char*&, float&)'
> > 5 |   return s4::member_1 (first, second, third);
> >   |   ^~
>
> Hmm, it seems pretty subtle to just change the highlighting when the
> message talks about the call as a whole.  I think if we're going to
> focus in this way we might change the diagnostic to something like
>
> error: no known conversion for argument 2 from 'const char*' to
> 'const char**'
> note: in call to 'static int s4::member_1(int, const char**, float)'
>
> or whatever the messages are from
> convert_arguments/convert_for_initialization which already deal with
> the single-candidate case for non-member functions.
>
> Jason

Thanks.

Here's an updated version of the patch.

I broke out the "no viable candidates" case in build_new_method_call_1
into a subroutine, and added special-case handling for when there's
a single non-viable candidate where there's an argument conversion
error.  I turned the error-handling from convert_for_assignment into
a subroutine, calling it from this new special-case.

This converts:

demo.cc: In function 'int test_4(int, const char*, float)':
demo.cc:5:44: error: no matching function for call to 's4::member_1(int&, const 
char*&, float&)'
5 |   return s4::member_1 (first, second, third);
  |^
demo.cc:1:24: note: candidate: 'static int s4::member_1(int, const char**, 
float)'
1 | struct s4 { static int member_1 (int one, const char **two, float three); };
  |^~~~
demo.cc:1:56: note:   no known conversion for argument 2 from 'const char*' to 
'const char**'
1 | struct s4 { static int member_1 (int one, const char **two, float three); };
  |   ~^~~

to:

demo.cc: In function 'int test_4(int, const char*, float)':
demo.cc:5:31: error: cannot convert 'const char*' to 'const char**'
5 |   return s4::member_1 (first, second, third);
  |   ^~
  |   |
  |   const char*
demo.cc:1:56: note:   initializing argument 2 of 'static int s4::member_1(int, 
const char**, float)'
1 | struct s4 { static int member_1 (int one, const char **two, float three); };
  |   ~^~~

thus highlighting the problematic argument at the callsite (and its type).

BTW, updating the test cases shows that some of our messages are worded:
  "could not convert"
and sometimes:
  "cannot convert"
(see e.g. g++.old-deja/g++.jason/conversion11.C which would now
have two of them side-by-side).  I'm not sure that this is a problem
(maybe save for a followup?  If it is an issue, it's presumably a
pre-existing one)

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

OK for trunk?

gcc/cp/ChangeLog:
PR c++/85110
* call.c (struct conversion_info): Add "loc" field.
(arg_conversion_rejection): Add "loc" param, using it to
initialize the new field.
(bad_arg_conversion_rejection): Likewise.
(explicit_conversion_rejection): Initialize the new field to
UNKNOWN_LOCATION.
(template_conversion_rejection): Likewise.
(add_function_candidate): Pass on the argument location to the new
param of arg_conversion_rejection.
(add_conv_candidate): Likewise.
(build_builtin_candidate): Likewise.
(build_user_type_conversion_1): Likewise.
(single_z_candidate): New function.
(maybe_get_bad_conversion_for_unmatched_call): New function.
(complain_about_bad_argument): New function, base

Re: [PATCH 21/25] GCN Back-end (part 2/2).

2018-09-05 Thread Joseph Myers
On Wed, 5 Sep 2018, Andrew Stubbs wrote:

> +   warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
> +   OPT_Wopenacc_dims,
> +   (dims[GOMP_DIM_VECTOR]
> +? "using vector_length (64), ignoring %d"
> +: "using vector_length (64), ignoring runtime setting"),

In cases like this with alternative diagnostic messages using ?:, you need 
to mark up each message with G_() so they both get extracted for 
translation by exgettext.

> +fatal_error (input_location, "COLLECT_GCC must be set.");

No '.' at end of diagnostic.

> +#define STR "-foffload-abi="

> +   fatal_error (input_location,
> +"unrecognizable argument of option " STR);

This concatenation with a macro won't work with exgettext extracting 
messages for translation.

> +fatal_error (input_location, "cannot open '%s'", gcn_s2_name);

> +fatal_error (input_location, "cannot open '%s'", gcn_cfile_name);

Use %qs (presuming this code is using the generic diagnostic machinery 
that supports it).

+gcn-run$(exeext): gcn-run.o
+   +$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ $< -ldl

I'd expect this to fail on non-Unix configurations that don't have -ldl, 
and thus to need appropriate conditionals / configure tests to avoid that 
build failure.

A new port should add an appropriate entry to contrib/config-list.mk.  
You should also verify that the port does build using that 
contrib/config-list.mk entry, with the same version of GCC, built 
natively, in the PATH, or equivalently that the port builds with the same 
version of GCC, built natively, in the PATH, when you configure with 
--enable-werror-always and the other options config-list.mk uses - this is 
the cross-compiler equivalent of the native use of -Werror in the later 
stages of bootstrap.  (Preferably verify this building for both 32-bit and 
64-bit hosts, since it's easy to have warnings that only show up for one 
but not the other.)

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [patch, fortan] PR87103 - [OOP] ICE in gfc_new_symbol() due to overlong symbol name

2018-09-05 Thread Andrew Benson
On Wednesday, September 5, 2018 12:35:04 PM PDT Bernhard Reutner-Fischer 
wrote:
> On Wed, 5 Sep 2018 at 03:30, Jerry DeLisle  wrote:
> > On 09/04/2018 10:43 AM, Bernhard Reutner-Fischer wrote:
> > > On Tue, 4 Sep 2018 at 18:43, Andrew Benson  
wrote:
> > >> As suggested by Janus, PR87103 is easily fixed by the attached patch
> > >> which
> > >> increases GFC_MAX_SYMBOL_LEN to 76 (sufficient to hold the maximum
> > >> allowed F08 symbol length of 63, plus a null terminator, plus the
> > >> "__tmp_class_" prefix).> > 
> > > This is so much wrong.
> > > Note that this will be fixed properly by the changes contained in the
> > > https://gcc.gnu.org/git/?p=gcc.git;a=shortlog;h=refs/heads/aldot/fortran
> > > -fe-stringpool branch.
> > > There we keep the GFC_MAX_SYMBOL_LEN at 63 proper but use an internal
> > > buffer double that size which in turn is sufficient to hold all
> > > compiler-generated identifiers.
> > > See gfc_get_string() even in current TOT.
> > > 
> > > Maybe we should bite the bullet and start to merge the stringpool
> > > changes now instead of this hack?
> > 
> > It all makes sense to me, please proceed. (my 2 cents worth)
> 
> Ok so i will reread the fortran-fe-stringpool series and submit it
> here for review.
> 
> Let's return to the issue at hand for a moment, though.
> I tested the attached alternate fix on top of the
> fortran-fe-stringpool branch where it fixes PR87103.
> Maybe somebody has spare cycles to test it on top of current trunk?
> 
> thanks,
> 
> [PATCH,FORTRAN] PR87103: Remove max symbol length check from gfc_new_symbol
> 
> gfc_match_name does check for too long names already. Since
> gfc_new_symbol is also called for symbols with internal names containing
> compiler-generated prefixes, these internal names can easily exceed the
> max_identifier_length mandated by the standard.
> 
> gcc/fortran/ChangeLog
> 
> 2018-09-04  Bernhard Reutner-Fischer  
> 
> PR fortran/87103
> * expr.c (gfc_check_conformance): Check vsnprintf for truncation.
> * iresolve.c (gfc_get_string): Likewise.
> * symbol.c (gfc_new_symbol): Remove check for maximum symbol
> name length.  Remove redundant 0 setting of new calloc()ed
> gfc_symbol.

This patch tests successfully on the current trunk for me.

-- 

* Andrew Benson: http://users.obs.carnegiescience.edu/abenson/contact.html

* Galacticus: https://bitbucket.org/abensonca/galacticus



Re: [patch,nvptx] Basic -misa support for nvptx

2018-09-05 Thread Tom de Vries
On 09/05/2018 12:19 AM, Cesar Philippidis wrote:
> On 09/02/2018 07:57 AM, Cesar Philippidis wrote:
>> On 09/01/2018 12:04 PM, Tom de Vries wrote:
>>> On 08/31/2018 04:14 PM, Cesar Philippidis wrote:
>>
 Is this patch OK for trunk?

>>>
>>> Well, how did you test this (
>>> https://gcc.gnu.org/contribute.html#patches : "Bootstrapping and
>>> testing. State the host and target combinations you used to do proper
>>> testing as described above, and the results of your testing.") ?
>>
>> I tested the standalone nvptx compiler. I'll retest with libgomp with
>> -misa=sm_35. Bootstrapping won't help much here, unfortunately.
 +++ b/gcc/testsuite/gcc.target/nvptx/atomic_fetch-1.c
 @@ -0,0 +1,24 @@
 +/* Test the nvptx atomic instructions for __atomic_fetch_OP for SM_35
 +   targets.  */
 +
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -misa=sm_35" } */
 +
 +int
 +main()
 +{
 +  unsigned long long a = ~0;
 +  unsigned b = 0xa;
 +
 +  __atomic_fetch_add (&a, b, 0);
 +  __atomic_fetch_and (&a, b, 0);
 +  __atomic_fetch_or (&a, b, 0);
 +  __atomic_fetch_xor (&a, b, 0);
 +  
 +  return a;
 +}
 +
 +/* { dg-final { scan-assembler "atom.add.u64" } } */
 +/* { dg-final { scan-assembler "atom.b64.and" } } */
 +/* { dg-final { scan-assembler "atom.b64.or" } } */
 +/* { dg-final { scan-assembler "atom.b64.xor" } } */
 -- 2.17.1

>>>
>>> Hmm, the add.u64 vs b64.and looks odd (and the scan-assembler-not
>>> testcase does not use this difference, so that needs to be fixed, or for
>>> bonus points, changed into a scan-assembler testcase).
>>>
>>> The documentation uses "op.type", we should fix the compiler to emit
>>> that consistently. Separate patch that fixes that pre-approved.
>>
>> ACK. I think there are a lot of other cases like that in the BE.
>>
>>> This is ok (with, as I mentioned above, the SI part split off into a
>>> separate patch), on the condition that you test libgomp with
>>> -foffload=-misa=sm_35.
> 
> Adding -foffload=misa=sm_35 didn't work because the host gcc doesn't
> support the -misa flag.

That doesn't make sense to me. For me this works without any problems.
Have you tried a clean build?

> When I forced the nvptx BE to set TARGET_SM35 to
> always be true, I ran into problems with SM_30 code linking against
> SM_35 code.

I also cannot reproduce this, works for me.

> Therefore, I don't think this patch is ready for trunk yet.
>> By the way, is -misa really necessary for atomic_fetch_?
> Looking at the PTX documentation I see
> :
> 
> PTX ISA version 3.1 introduces the following new features:
> 
> * Support for sm_35 target architecture.
> * Extends atomic and reduction instructions to perform 64-bit {and, or,
> xor} operations, and 64-bit integer {min, max} operations.
> 
> Is there a table for which list which GPUs are compatible with which
> instructions?

Yes, every instruction has a table in the ptx manual, and there's a "PTX
ISA Notes" entry.

For the atom instruction in ptx isa 3.1 manual, we have "PTX ISA Notes":
...
atom.global requires sm_11 or higher.
atom.shared requires sm_12 or higher.
64-bit atom.global.{add,cas,exch} require sm_12 or higher.
64-bit atom.shared.{add,cas,exch} require sm_20 or higher.
64-bit atom.{and,or.xor,min,max} require sm_35 or higher.
atom.add.f32 requires sm_20 or higher.
Use of generic addressing requires sm_20 or higher.
...

Thanks,
- Tom


[PATCH] PR libstdc++/87228 Use heap for large buffers instead of alloca

2018-09-05 Thread Jonathan Wakely

Using a huge width in a formatted output operation results in stack
overflow due to no restriction on the size passed to alloca. This patch
causes the formatting functions to use the heap instead of the stack
when a large buffer is needed.

PR libstdc++/87228
* include/bits/locale_facets.tcc (num_put:_M_insert_int): Use heap
for large buffers instead of alloca.
(num_put:_M_insert_float): Likewise.
* testsuite/22_locale/num_put/put/char/87228.cc: New test.
* testsuite/22_locale/num_put/put/wchar_t/87228.cc: New test.

Tested x86_64-linux.

Even with this patch we can still put 3kb on the stack, but that's
much better than trying (and failing) to use alloca for huge values.

An alternative would be to just check for silly values and throw
std::length_error, but I think this is the right fix.

I'll wait a day or two for any comments or better ideas.

commit 5b61805c1ccd106d2271dbf22663b898d2f03ab2
Author: Jonathan Wakely 
Date:   Wed Sep 5 14:30:28 2018 +0100

PR libstdc++/87228 Use heap for large buffers instead of alloca

Using a huge width in a formatted output operation results in stack
overflow due to no restriction on the size passed to alloca. This patch
causes the formatting functions to use the heap instead of the stack
when a large buffer is needed.

PR libstdc++/87228
* include/bits/locale_facets.tcc (num_put:_M_insert_int): Use heap
for large buffers instead of alloca.
(num_put:_M_insert_float): Likewise.
* testsuite/22_locale/num_put/put/char/87228.cc: New test.
* testsuite/22_locale/num_put/put/wchar_t/87228.cc: New test.

diff --git a/libstdc++-v3/include/bits/locale_facets.h 
b/libstdc++-v3/include/bits/locale_facets.h
index f6e0283fec9..b9a87aa38d5 100644
--- a/libstdc++-v3/include/bits/locale_facets.h
+++ b/libstdc++-v3/include/bits/locale_facets.h
@@ -2543,6 +2543,17 @@ _GLIBCXX_BEGIN_NAMESPACE_LDBL
   do_put(iter_type, ios_base&, char_type, long double) const;
 #endif
   //@}
+
+private:
+  template
+   struct _Char_buf {
+ _Char_buf() : _M_p(0) { }
+ _Char_buf(_Char_buf& __b) : _M_p(__b._M_p) { __b._M_p = 0; }
+ ~_Char_buf() { delete[] _M_p; }
+ _Tp* allocate(size_t __n) { return _M_p = new _Tp[__n]; }
+   private:
+ _Tp* _M_p;
+   };
 };
 
   template 
diff --git a/libstdc++-v3/include/bits/locale_facets.tcc 
b/libstdc++-v3/include/bits/locale_facets.tcc
index 39da5766075..86c43d78d76 100644
--- a/libstdc++-v3/include/bits/locale_facets.tcc
+++ b/libstdc++-v3/include/bits/locale_facets.tcc
@@ -917,12 +917,17 @@ _GLIBCXX_BEGIN_NAMESPACE_LDBL
  }
  }
 
+   _Char_buf<_CharT> __wbuf;
// Pad.
const streamsize __w = __io.width();
if (__w > static_cast(__len))
  {
-   _CharT* __cs3 = static_cast<_CharT*>(__builtin_alloca(sizeof(_CharT)
- * __w));
+   _CharT* __cs3;
+   const size_t __wlen = __w * sizeof(_CharT);
+   if (__wlen > 1024)
+ __cs3 = __wbuf.allocate(__w);
+   else
+ __cs3 = static_cast<_CharT*>(__builtin_alloca(__wlen));
_M_pad(__fill, __w, __io, __cs3, __cs, __len);
__cs = __cs3;
  }
@@ -992,6 +997,10 @@ _GLIBCXX_BEGIN_NAMESPACE_LDBL
char __fbuf[16];
__num_base::_S_format_float(__io, __fbuf, __mod);
 
+   const size_t __max_alloca = 1024;
+   _Char_buf __cbuf;
+   _Char_buf<_CharT> __wbuf;
+
 #if _GLIBCXX_USE_C99_STDIO
// Precision is always used except for hexfloat format.
const bool __use_prec =
@@ -1012,7 +1021,10 @@ _GLIBCXX_BEGIN_NAMESPACE_LDBL
if (__len >= __cs_size)
  {
__cs_size = __len + 1;
-   __cs = static_cast(__builtin_alloca(__cs_size));
+   if (__cs_size > __max_alloca)
+ __cs = __cbuf.allocate(__cs_size);
+   else
+ __cs = static_cast(__builtin_alloca(__cs_size));
if (__use_prec)
  __len = std::__convert_from_v(_S_get_c_locale(), __cs, __cs_size,
__fbuf, __prec, __v);
@@ -1034,7 +1046,11 @@ _GLIBCXX_BEGIN_NAMESPACE_LDBL
// largely sufficient.
const int __cs_size = __fixed ? __max_exp + __prec + 4
  : __max_digits * 2 + __prec;
-   char* __cs = static_cast(__builtin_alloca(__cs_size));
+   char* __cs;
+   if (__cs_size > __max_alloca)
+ __cs = __cbuf.allocate(__cs_size);
+   else
+ __cs = static_cast(__builtin_alloca(__cs_size));
__len = std::__convert_from_v(_S_get_c_locale(), __cs, 0, __fbuf, 
  __prec, __v);
 #endif
@@ -1043,8 +1059,12 @@ _GLIBCXX_BEGIN_NAMESPACE_LDBL
// numpunct.decimal_point() values for '.' and addi

Re: [PATCH 21/25] GCN Back-end (part 2/2).

2018-09-05 Thread Andrew Stubbs

On 05/09/18 15:22, Joseph Myers wrote:

+gcn-run$(exeext): gcn-run.o
+   +$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ $< -ldl

I'd expect this to fail on non-Unix configurations that don't have -ldl,
and thus to need appropriate conditionals / configure tests to avoid that
build failure.


We don't support any host system other than x86_64 linux. There are no 
drivers for any other system, and the offloaded datatypes need to be 
binary compatible, so even 32-bit x86 doesn't work.


I suppose someone might choose to compile things on an alternative 
system for running on a compatible system, in which case we'd want to 
simply skip this binary.


How does one normally do this?


A new port should add an appropriate entry to contrib/config-list.mk.
You should also verify that the port does build using that
contrib/config-list.mk entry, with the same version of GCC, built
natively, in the PATH, or equivalently that the port builds with the same
version of GCC, built natively, in the PATH, when you configure with
--enable-werror-always and the other options config-list.mk uses - this is
the cross-compiler equivalent of the native use of -Werror in the later
stages of bootstrap.  (Preferably verify this building for both 32-bit and
64-bit hosts, since it's easy to have warnings that only show up for one
but not the other.)


I didn't know about that one.

I see it uses "--enable-languages=all", but GCN is known to fail to 
build libstdc++ (exceptions and static constructors are not 
implemented), so I wouldn't expect the build to succeed.


Andrew


Re: [PATCH 21/25] GCN Back-end (part 2/2).

2018-09-05 Thread Joseph Myers
On Wed, 5 Sep 2018, Andrew Stubbs wrote:

> I suppose someone might choose to compile things on an alternative system for
> running on a compatible system, in which case we'd want to simply skip this
> binary.
> 
> How does one normally do this?

I'd expect a configure test plus makefile conditionals in the makefile 
fragment.

> > A new port should add an appropriate entry to contrib/config-list.mk.
> > You should also verify that the port does build using that
> > contrib/config-list.mk entry, with the same version of GCC, built
> > natively, in the PATH, or equivalently that the port builds with the same
> > version of GCC, built natively, in the PATH, when you configure with
> > --enable-werror-always and the other options config-list.mk uses - this is
> > the cross-compiler equivalent of the native use of -Werror in the later
> > stages of bootstrap.  (Preferably verify this building for both 32-bit and
> > 64-bit hosts, since it's easy to have warnings that only show up for one
> > but not the other.)
> 
> I didn't know about that one.

See sourcebuild.texi, "Back End", for lists of places to update for a new 
port, which includes config-list.mk in the list of places to update for a 
port being contributed upstream.

> I see it uses "--enable-languages=all", but GCN is known to fail to build
> libstdc++ (exceptions and static constructors are not implemented), so I
> wouldn't expect the build to succeed.

It also uses "make all-gcc", so only the host-side tools need to build 
(without warnings when building with the same version of GCC, except for 
the files that specifically use -Wno-), not any libraries.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] PR libstdc++/87228 Use heap for large buffers instead of alloca

2018-09-05 Thread Jonathan Wakely

On 05/09/18 15:32 +0100, Jonathan Wakely wrote:

Using a huge width in a formatted output operation results in stack
overflow due to no restriction on the size passed to alloca. This patch
causes the formatting functions to use the heap instead of the stack
when a large buffer is needed.

PR libstdc++/87228
* include/bits/locale_facets.tcc (num_put:_M_insert_int): Use heap
for large buffers instead of alloca.
(num_put:_M_insert_float): Likewise.
* testsuite/22_locale/num_put/put/char/87228.cc: New test.
* testsuite/22_locale/num_put/put/wchar_t/87228.cc: New test.

Tested x86_64-linux.

Even with this patch we can still put 3kb on the stack, but that's
much better than trying (and failing) to use alloca for huge values.

An alternative would be to just check for silly values and throw
std::length_error, but I think this is the right fix.

I'll wait a day or two for any comments or better ideas.


Unlimited alloca use is the subject of PR 28277. In the comments
Andrew Pinski said "If there are checks then it will be slow." I think
in the specific case of num_put::do_put we're already dealing with
iostreams and locales, so a few extra branches isn't going to hurt
much. I think it's certainly better than a segfault.




Re: VRP: abstract out wide int CONVERT_EXPR_P code

2018-09-05 Thread Michael Matz
Hi,

On Wed, 5 Sep 2018, Aldy Hernandez wrote:

> No apologies needed.  I welcome all masochists to join me in my personal hell
> :).

;-)

> > How can this test and following code catch all problematic cases?  Assume
> > a range of [253..257], truncating to 8 bits unsigned.  The size of the
> > range is 5 (not 4 as the code above calculates), well representable in 8
> > bits.  Nevertheless, the min of the new range isn't 253, nor is the max of
> > the new range 257.  In fact the new range must be [0..255] (if you have no
> > multi ranges or anti-ranges).  So whatever this function is supposed to
> > do (what btw?), it certainly does not convert a range.
> 
> Welcome to the wonderful world of anti ranges, where nothing is as it 
> seems.

Yes, as I said, to precisely capture the result of '[253..257] & 255' you 
either need multi-ranges or an anti-range.  

> First you're not looking at what's currently in mainline.  Richard 
> approved the first incantation of this code.  But even so, I believe the 
> above is correct as well.

Well, maybe as part of a larger sequence of code, but the above code 
doesn't even return [253..1], there's actually no conversion/change of the 
inputs done at all.  Yes, the current code actually does the truncation.  
I guess I was triggered by the name range_convert that didn't actually do 
any conversion.  I now have no issue with the code in mainline anymore 
(well, maybe except disliking functions with eight parameters).

> If desired, I could further document that the range returned may have 
> its contents swapped and that is why it needs canonizing.  But hey, that 
> shit was already broken when I got here ;-).
> 
> If you follow the code in tree-vrp.c before my patch you will see that 
> the path is the same... we calculate a nonsensical range of [253, 1] and 
> then massage it into ~[2, 252] in set_and_canonicalize_value_range.

Yes, I saw that, I think I was sceptical about splitting off a part of the 
code that wasn't self-sufficient.

> I think the fact that it's so hard to get all this right, is a strong 
> argument for getting rid of anti ranges.  Almost all the bugs or 
> oversights I've fixed over the past months in VRP have been related to 
> anti ranges.

Well, without anti-ranges you need multi-ranges (or at least two-ranges); 
it's a tradeoff.  If I'd have to choose a high-level interface and the 
choice is between supporting two-ranges directly (as two parameters) or a 
single range/anti-range I'd always take the latter (internally of course 
it would be represented as a two-range to make arguing and implementing 
stuff easier).


Ciao,
Michael.


[PATCH,FORTRAN 01/29] gdbinit: break on gfc_internal_error

2018-09-05 Thread Bernhard Reutner-Fischer
From: Bernhard Reutner-Fischer 

Aids debugging the fortran FE.

gcc/ChangeLog:

2017-11-12  Bernhard Reutner-Fischer  

* gdbinit.in: Break on gfc_internal_error.
---
 gcc/gdbinit.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/gdbinit.in b/gcc/gdbinit.in
index 4db977f0bab..ac4d7c42e21 100644
--- a/gcc/gdbinit.in
+++ b/gcc/gdbinit.in
@@ -227,6 +227,7 @@ b fancy_abort
 
 # Put a breakpoint on internal_error to help with debugging ICEs.
 b internal_error
+b gfc_internal_error
 
 set complaints 0
 # Don't let abort actually run, as it will make
-- 
2.19.0.rc1



[PATCH,FORTRAN 00/29] Move towards stringpool, part 1

2018-09-05 Thread Bernhard Reutner-Fischer
Hi,

The fortran frontend still uses stack-based handling of (symbol) names
with fixed-sized buffers. Furthermore these buffers often are too small
when dealing with F2003 identifiers which can be up to, including 63
bytes long.

Other frontends use the stringpool since many years.
This janitorial series is a first step towards using the stringpool in
the frontend.
Consequently this allows us to use pointer-comparison to see if two
given "names" are identical instead of doing lots and lots of string
comparisons.


Part 1 switches most of the fortran FE. An eventual part 2 would
continue to switch the few remaining stack-based identifier
manipulations to use the stringpool. My initial plan was to also see if
switching gfc_symtree from treap to a hash_map would bring us any
measurable benefit, but that, too, is left for an eventual part 2.

Bootstrapped and regtested on x86_64-foo-linux.

I'd appreciate if someone could double check for regressions on other
setups. Git branch:
https://gcc.gnu.org/git/?p=gcc.git;a=log;h=refs/heads/aldot/fortran-fe-stringpool

Ok for trunk?

Bernhard Reutner-Fischer (29):
  gdbinit: break on gfc_internal_error
  Use stringpool for gfc_match_defined_op_name()
  Use stringpool for gfc_get_name
  Use stringpool for gfc_match_generic_spec
  Use stringpool for gfc_match("%n")
  Use stringpool for association_list
  Use stringpool for some gfc_code2string return values
  Add uop/name helpers
  Use stringpool for modules
  Do not copy name for check_function_name
  Do pointer comparison instead of strcmp
  Use stringpool for remaining names
  Use stringpool for intrinsics and common
  Fix write_omp_udr for user-operator REDUCTIONs
  Use stringpool for iso_c_binding module names
  Do pointer comparison in iso_c_binding_module
  Use stringpool for iso_fortran_env
  Use stringpool for charkind
  Use stringpool and unified uppercase handling for types
  Use stringpool in class et al
  Use stringpool for module tbp
  Use stringpool in class and procedure-pointer result
  Use stringpool for module binding_label
  Use stringpool for intrinsic functions
  Use stringpool on loading module symbols
  Use stringpool for mangled common names
  Use stringpool for OMP clause reduction code
  Free type-bound procedure structs
  PR87103: Remove max symbol length check from gfc_new_symbol

 gcc/fortran/check.c   |   2 +-
 gcc/fortran/class.c   |  96 +--
 gcc/fortran/decl.c| 265 ++---
 gcc/fortran/expr.c|   4 +-
 gcc/fortran/frontend-passes.c |  16 +-
 gcc/fortran/gfortran.h|  18 +-
 gcc/fortran/interface.c   | 109 ++--
 gcc/fortran/intrinsic.c   |  11 +-
 gcc/fortran/io.c  |  10 +-
 gcc/fortran/iresolve.c|  35 ++--
 gcc/fortran/match.c   | 143 
 gcc/fortran/match.h   |   9 +-
 gcc/fortran/matchexp.c|  22 ++-
 gcc/fortran/misc.c|   2 +-
 gcc/fortran/module.c  | 311 ++
 gcc/fortran/openmp.c  | 120 +++--
 gcc/fortran/parse.c   |  23 ++-
 gcc/fortran/primary.c |  58 ---
 gcc/fortran/resolve.c |  81 +
 gcc/fortran/symbol.c  |  58 ---
 gcc/fortran/trans-array.c |   4 +-
 gcc/fortran/trans-common.c|  10 +-
 gcc/fortran/trans-decl.c  |  38 ++---
 gcc/fortran/trans-expr.c  |  11 +-
 gcc/fortran/trans-openmp.c|   1 +
 gcc/fortran/trans-types.c |  20 +--
 gcc/fortran/trans.c   |   6 +-
 gcc/gdbinit.in|   1 +
 28 files changed, 719 insertions(+), 765 deletions(-)

-- 
2.19.0.rc1



[PATCH,FORTRAN 02/29] Use stringpool for gfc_match_defined_op_name()

2018-09-05 Thread Bernhard Reutner-Fischer
From: Bernhard Reutner-Fischer 

The openmp part will be cleaned up later in this series.

gcc/fortran/ChangeLog:

2017-10-22  Bernhard Reutner-Fischer  

* match.h (gfc_match_defined_op_name): Adjust prototype and add
a parameter USER_OPERATOR.
* matchexp.c (gfc_match_defined_op_name): Use gfc_get_string and
return a user operator if USER_OPERATOR is true.
(match_defined_operator): Update calls to gfc_match_defined_op_name.
* interface.c (gfc_match_generic_spec): Likewise.
* openmp.c (gfc_match_omp_clauses): Likewise. Use gfc_get_string
where appropriate.
(gfc_match_omp_declare_reduction): Likewise.
---
 gcc/fortran/interface.c |  5 +++--
 gcc/fortran/match.h |  2 +-
 gcc/fortran/matchexp.c  | 18 --
 gcc/fortran/openmp.c| 31 +--
 4 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c
index f85c76bad0f..14137cebd6c 100644
--- a/gcc/fortran/interface.c
+++ b/gcc/fortran/interface.c
@@ -160,7 +160,8 @@ gfc_match_generic_spec (interface_type *type,
   *op = INTRINSIC_NONE;
   if (gfc_match (" operator ( ") == MATCH_YES)
 {
-  m = gfc_match_defined_op_name (buffer, 1);
+  const char *oper = NULL;
+  m = gfc_match_defined_op_name (oper, 1, 0);
   if (m == MATCH_NO)
goto syntax;
   if (m != MATCH_YES)
@@ -172,7 +173,7 @@ gfc_match_generic_spec (interface_type *type,
   if (m != MATCH_YES)
return MATCH_ERROR;
 
-  strcpy (name, buffer);
+  strcpy (name, oper);
   *type = INTERFACE_USER_OP;
   return MATCH_YES;
 }
diff --git a/gcc/fortran/match.h b/gcc/fortran/match.h
index 418542bd5a6..b3ced3f8454 100644
--- a/gcc/fortran/match.h
+++ b/gcc/fortran/match.h
@@ -315,7 +315,7 @@ match gfc_match_write (void);
 match gfc_match_print (void);
 
 /* matchexp.c.  */
-match gfc_match_defined_op_name (char *, int);
+match gfc_match_defined_op_name (const char *&, int, bool);
 match gfc_match_expr (gfc_expr **);
 
 /* module.c.  */
diff --git a/gcc/fortran/matchexp.c b/gcc/fortran/matchexp.c
index fb81e10a6c2..bb01af9f636 100644
--- a/gcc/fortran/matchexp.c
+++ b/gcc/fortran/matchexp.c
@@ -30,10 +30,14 @@ static const char expression_syntax[] = N_("Syntax error in 
expression at %C");
 
 /* Match a user-defined operator name.  This is a normal name with a
few restrictions.  The error_flag controls whether an error is
-   raised if 'true' or 'false' are used or not.  */
+   raised if 'true' or 'false' are used or not.
+   If USER_OPERATOR is true, a user operator is returned in RESULT
+   upon success.
+ */
 
 match
-gfc_match_defined_op_name (char *result, int error_flag)
+gfc_match_defined_op_name (const char *&result, int error_flag,
+bool user_operator)
 {
   static const char * const badops[] = {
 "and", "or", "not", "eqv", "neqv", "eq", "ne", "ge", "le", "lt", "gt",
@@ -72,8 +76,10 @@ gfc_match_defined_op_name (char *result, int error_flag)
gfc_error ("Bad character %qc in OPERATOR name at %C", name[i]);
return MATCH_ERROR;
   }
-
-  strcpy (result, name);
+  if (user_operator)
+result = gfc_get_string (".%s.", name);
+  else
+result = gfc_get_string ("%s", name);
   return MATCH_YES;
 
 error:
@@ -91,10 +97,10 @@ error:
 static match
 match_defined_operator (gfc_user_op **result)
 {
-  char name[GFC_MAX_SYMBOL_LEN + 1];
+  const char *name = NULL;
   match m;
 
-  m = gfc_match_defined_op_name (name, 0);
+  m = gfc_match_defined_op_name (name, 0, 0);
   if (m != MATCH_YES)
 return m;
 
diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index 94a7f7eaa50..a852fc490db 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -1581,6 +1581,7 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
omp_mask mask,
{
  gfc_omp_reduction_op rop = OMP_REDUCTION_NONE;
  char buffer[GFC_MAX_SYMBOL_LEN + 3];
+ const char *op = NULL;
  if (gfc_match_char ('+') == MATCH_YES)
rop = OMP_REDUCTION_PLUS;
  else if (gfc_match_char ('*') == MATCH_YES)
@@ -1596,13 +1597,10 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
omp_mask mask,
  else if (gfc_match (".neqv.") == MATCH_YES)
rop = OMP_REDUCTION_NEQV;
  if (rop != OMP_REDUCTION_NONE)
-   snprintf (buffer, sizeof buffer, "operator %s",
+   op = gfc_get_string ("operator %s",
  gfc_op2string ((gfc_intrinsic_op) rop));
- else if (gfc_match_defined_op_name (buffer + 1, 1) == MATCH_YES)
-   {
- buffer[0] = '.';
- strcat (buffer, ".");
-   }
+ else if (gfc_match_defined_op_name (op, 1, 1) == MATCH_YES)
+   ;
  else if (gfc_match_name (buffer) == MATCH_YES)
{
  gfc_symbol *sym;
@@ -1

[PATCH,FORTRAN 08/29] Add uop/name helpers

2018-09-05 Thread Bernhard Reutner-Fischer
From: Bernhard Reutner-Fischer 

Introduce a helper to construct a user operator from a name and the
reverse operation, i.e. a helper to construct a name from a user
operator.

gcc/fortran/ChangeLog:

2017-10-29  Bernhard Reutner-Fischer  

* gfortran.h (gfc_get_uop_from_name):
(gfc_get_name_from_uop): Declare.
* symbol.c (gfc_get_uop_from_name):
(gfc_get_name_from_uop): Define.
* module.c (load_omp_udrs): Use them.
---
 gcc/fortran/gfortran.h |  2 ++
 gcc/fortran/module.c   | 21 +++--
 gcc/fortran/symbol.c   | 21 +
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index ff42b39b453..6c32b8ac71f 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -3019,6 +3019,8 @@ void gfc_delete_symtree (gfc_symtree **, const char *);
 gfc_symtree *gfc_get_unique_symtree (gfc_namespace *);
 gfc_user_op *gfc_get_uop (const char *);
 gfc_user_op *gfc_find_uop (const char *, gfc_namespace *);
+const char *gfc_get_uop_from_name (const char*);
+const char *gfc_get_name_from_uop (const char*);
 void gfc_free_symbol (gfc_symbol *);
 void gfc_release_symbol (gfc_symbol *);
 gfc_symbol *gfc_new_symbol (const char *, gfc_namespace *);
diff --git a/gcc/fortran/module.c b/gcc/fortran/module.c
index 8628f3aeda9..b3f68b8803f 100644
--- a/gcc/fortran/module.c
+++ b/gcc/fortran/module.c
@@ -4785,7 +4785,7 @@ load_omp_udrs (void)
   while (peek_atom () != ATOM_RPAREN)
 {
   const char *name = NULL, *newname;
-  char *altname;
+  const char *altname = NULL;
   gfc_typespec ts;
   gfc_symtree *st;
   gfc_omp_reduction_op rop = OMP_REDUCTION_USER;
@@ -4812,15 +4812,8 @@ load_omp_udrs (void)
  else if (strcmp (p, ".neqv.") == 0)
rop = OMP_REDUCTION_NEQV;
}
-  altname = NULL;
   if (rop == OMP_REDUCTION_USER && name[0] == '.')
-   {
- size_t len = strlen (name + 1);
- altname = XALLOCAVEC (char, len);
- gcc_assert (name[len] == '.');
- memcpy (altname, name + 1, len - 1);
- altname[len - 1] = '\0';
-   }
+   altname = gfc_get_name_from_uop (name);
   newname = name;
   if (rop == OMP_REDUCTION_USER)
newname = find_use_name (altname ? altname : name, !!altname);
@@ -4832,15 +4825,7 @@ load_omp_udrs (void)
  continue;
}
   if (altname && newname != altname)
-   {
- size_t len = strlen (newname);
- altname = XALLOCAVEC (char, len + 3);
- altname[0] = '.';
- memcpy (altname + 1, newname, len);
- altname[len + 1] = '.';
- altname[len + 2] = '\0';
- name = gfc_get_string ("%s", altname);
-   }
+   name = altname = gfc_get_uop_from_name (newname);
   st = gfc_find_symtree (gfc_current_ns->omp_udr_root, name);
   gfc_omp_udr *udr = gfc_omp_udr_find (st, &ts);
   if (udr)
diff --git a/gcc/fortran/symbol.c b/gcc/fortran/symbol.c
index 0a4f7c1711b..a8f841185f1 100644
--- a/gcc/fortran/symbol.c
+++ b/gcc/fortran/symbol.c
@@ -3026,6 +3026,27 @@ gfc_find_uop (const char *name, gfc_namespace *ns)
   return (st == NULL) ? NULL : st->n.uop;
 }
 
+/* Given a name return a string usable as user operator name.  */
+const char *
+gfc_get_uop_from_name (const char* name) {
+  gcc_assert (name[0] != '.');
+  return gfc_get_string (".%s.", name);
+}
+
+/* Given a user operator name return a string usable as name.  */
+const char *
+gfc_get_name_from_uop (const char* name) {
+  gcc_assert (name[0] == '.');
+  const size_t len = strlen (name) - 1;
+  gcc_assert (len > 1);
+  gcc_assert (name[len] == '.');
+  char *buffer = XNEWVEC (char, len);
+  memcpy (buffer, name + 1, len - 1);
+  buffer[len - 1] = '\0';
+  const char *ret = gfc_get_string ("%s", buffer);
+  XDELETEVEC (buffer);
+  return ret;
+}
 
 /* Update a symbol's common_block field, and take care of the associated
memory management.  */
-- 
2.19.0.rc1



[PATCH,FORTRAN 07/29] Use stringpool for some gfc_code2string return values

2018-09-05 Thread Bernhard Reutner-Fischer
From: Bernhard Reutner-Fischer 

Use a stringpool-node for those gfc_code2string values that are used as
names.

gcc/fortran/ChangeLog:

2017-10-26  Bernhard Reutner-Fischer  

* interface.c (gfc_match_generic_spec, gfc_check_dtio_interfaces,
gfc_find_typebound_dtio_proc, gfc_find_specific_dtio_proc): Use
stringpool node for those return values of gfc_code2string that
are used as names.
---
 gcc/fortran/interface.c | 50 -
 1 file changed, 19 insertions(+), 31 deletions(-)

diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c
index 19a0eb28edd..8716813b7b2 100644
--- a/gcc/fortran/interface.c
+++ b/gcc/fortran/interface.c
@@ -182,12 +182,12 @@ gfc_match_generic_spec (interface_type *type,
   *op = dtio_op (name);
   if (*op == INTRINSIC_FORMATTED)
{
- name = gfc_code2string (dtio_procs, DTIO_RF);
+ name = gfc_get_string ("%s", gfc_code2string (dtio_procs, DTIO_RF));
  *type = INTERFACE_DTIO;
}
   if (*op == INTRINSIC_UNFORMATTED)
{
- name = gfc_code2string (dtio_procs, DTIO_RUF);
+ name = gfc_get_string ("%s", gfc_code2string (dtio_procs, DTIO_RUF));
  *type = INTERFACE_DTIO;
}
   if (*op != INTRINSIC_NONE)
@@ -199,12 +199,12 @@ gfc_match_generic_spec (interface_type *type,
   *op = dtio_op (name);
   if (*op == INTRINSIC_FORMATTED)
{
- name = gfc_code2string (dtio_procs, DTIO_WF);
+ name = gfc_get_string ("%s", gfc_code2string (dtio_procs, DTIO_WF));
  *type = INTERFACE_DTIO;
}
   if (*op == INTRINSIC_UNFORMATTED)
{
- name = gfc_code2string (dtio_procs, DTIO_WUF);
+ name = gfc_get_string ("%s", gfc_code2string (dtio_procs, DTIO_WUF));
  *type = INTERFACE_DTIO;
}
   if (*op != INTRINSIC_NONE)
@@ -4927,8 +4927,8 @@ gfc_check_dtio_interfaces (gfc_symbol *derived)
   || ((dtio_codes)code == DTIO_WF);
 
   tb_io_st = gfc_find_typebound_proc (derived, &t,
- gfc_code2string (dtio_procs, code),
- true, &derived->declared_at);
+ gfc_get_string ("%s", gfc_code2string (dtio_procs, code)),
+ true, &derived->declared_at);
   if (tb_io_st != NULL)
check_dtio_interface1 (derived, tb_io_st, true, formatted, code);
 }
@@ -4940,7 +4940,7 @@ gfc_check_dtio_interfaces (gfc_symbol *derived)
   || ((dtio_codes)code == DTIO_WF);
 
   tb_io_st = gfc_find_symtree (derived->ns->sym_root,
-  gfc_code2string (dtio_procs, code));
+ gfc_get_string ("%s", gfc_code2string (dtio_procs, code)));
   if (tb_io_st != NULL)
check_dtio_interface1 (derived, tb_io_st, false, formatted, code);
 }
@@ -4961,31 +4961,23 @@ gfc_find_typebound_dtio_proc (gfc_symbol *derived, bool 
write, bool formatted)
 {
   if (write == true)
 tb_io_st = gfc_find_typebound_proc (derived, &t,
-   gfc_code2string (dtio_procs,
-DTIO_WF),
-   true,
-   &derived->declared_at);
+   gfc_get_string ("%s", gfc_code2string (dtio_procs, DTIO_WF)),
+   true, &derived->declared_at);
   else
 tb_io_st = gfc_find_typebound_proc (derived, &t,
-   gfc_code2string (dtio_procs,
-DTIO_RF),
-   true,
-   &derived->declared_at);
+   gfc_get_string ("%s", gfc_code2string (dtio_procs, DTIO_RF)),
+   true, &derived->declared_at);
 }
   else
 {
   if (write == true)
 tb_io_st = gfc_find_typebound_proc (derived, &t,
-   gfc_code2string (dtio_procs,
-DTIO_WUF),
-   true,
-   &derived->declared_at);
+   gfc_get_string ("%s", gfc_code2string (dtio_procs, DTIO_WUF)),
+   true, &derived->declared_at);
   else
 tb_io_st = gfc_find_typebound_proc (derived, &t,
-   gfc_code2string (dtio_procs,
-DTIO_RUF),
-   true,
-   &derived->declared_at);
+   gfc_get_string ("%s", gfc_code2string (dtio_procs, DTIO_RUF)),
+   true, &derived->declared_at);
 }
   return tb_io_st;
 }
@@ -5041,23 +5033,19 @@ gfc_find_specific_dtio_proc (gfc_symbol *derived, bool 
write, bool formatted)
{
  if (write

[PATCH,FORTRAN 06/29] Use stringpool for association_list

2018-09-05 Thread Bernhard Reutner-Fischer
From: Bernhard Reutner-Fischer 

2017-10-26  Bernhard Reutner-Fischer  

* gfortran.h (struct gfc_association_list): Change name to
pointer.
* match.c (gfc_match_associate): Adjust.
---
 gcc/fortran/gfortran.h | 2 +-
 gcc/fortran/match.c| 6 ++
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 774a6de6168..ff42b39b453 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -2482,7 +2482,7 @@ typedef struct gfc_association_list
   /* True when the rank of the target expression is guessed during parsing.  */
   unsigned rankguessed:1;
 
-  char name[GFC_MAX_SYMBOL_LEN + 1];
+  const char *name;
   gfc_symtree *st; /* Symtree corresponding to name.  */
   locus where;
 
diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c
index 1b03e7251a5..38827ed4637 100644
--- a/gcc/fortran/match.c
+++ b/gcc/fortran/match.c
@@ -1891,8 +1891,7 @@ gfc_match_associate (void)
   gfc_association_list* a;
 
   /* Match the next association.  */
-  const char *name_hack = NULL;
-  if (gfc_match (" %n =>", &name_hack) != MATCH_YES)
+  if (gfc_match (" %n =>", &newAssoc->name) != MATCH_YES)
{
  gfc_error ("Expected association at %C");
  goto assocListError;
@@ -1909,12 +1908,11 @@ gfc_match_associate (void)
}
  gfc_matching_procptr_assignment = 0;
}
-  strcpy (newAssoc->name, name_hack);
   newAssoc->where = gfc_current_locus;
 
   /* Check that the current name is not yet in the list.  */
   for (a = new_st.ext.block.assoc; a; a = a->next)
-   if (!strcmp (a->name, newAssoc->name))
+   if (a->name == newAssoc->name)
  {
gfc_error ("Duplicate name %qs in association at %C",
   newAssoc->name);
-- 
2.19.0.rc1



[PATCH,FORTRAN 04/29] Use stringpool for gfc_match_generic_spec

2018-09-05 Thread Bernhard Reutner-Fischer
From: Bernhard Reutner-Fischer 

Ideally we would populate mstrings structs with strings obtained through
the stringpool. Doing so by means of minit wouldn't work out too well
though, see comment in gfortran.h. We could replace the initialized
strings in gfc_init_1 but that's for a later patch.

gcc/fortran/ChangeLog:

2017-10-23  Bernhard Reutner-Fischer  

* match.h (gfc_match_generic_spec): Pass argument name by reference.
Adjust all callers.
* decl.c (access_attr_decl): Adjust.
(gfc_match_generic): Adjust.
* interface.c (gfc_match_generic_spec, gfc_match_interface,
gfc_match_end_interface): Adjust.
* module.c (gfc_match_use): Adjust.
---
 gcc/fortran/decl.c  | 11 +--
 gcc/fortran/gfortran.h  |  5 +
 gcc/fortran/interface.c | 20 +---
 gcc/fortran/match.h |  3 ++-
 gcc/fortran/module.c| 16 +---
 5 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c
index f0ff5138ca1..2f8d2aca695 100644
--- a/gcc/fortran/decl.c
+++ b/gcc/fortran/decl.c
@@ -8582,7 +8582,7 @@ gfc_match_target (void)
 static match
 access_attr_decl (gfc_statement st)
 {
-  char name[GFC_MAX_SYMBOL_LEN + 1];
+  const char *name = NULL;
   interface_type type;
   gfc_user_op *uop;
   gfc_symbol *sym, *dt_sym;
@@ -10768,7 +10768,7 @@ syntax:
 match
 gfc_match_generic (void)
 {
-  char name[GFC_MAX_SYMBOL_LEN + 1];
+  const char *name = NULL;
   char bind_name[GFC_MAX_SYMBOL_LEN + 16]; /* Allow space for OPERATOR(...).  
*/
   gfc_symbol* block;
   gfc_typebound_proc tbattr; /* Used for match_binding_attributes.  */
@@ -10931,9 +10931,8 @@ gfc_match_generic (void)
 {
   gfc_symtree* target_st;
   gfc_tbp_generic* target;
-  const char *name2 = NULL;
 
-  m = gfc_match_name (&name2);
+  m = gfc_match_name (&name);
   if (m == MATCH_ERROR)
goto error;
   if (m == MATCH_NO)
@@ -10942,14 +10941,14 @@ gfc_match_generic (void)
  goto error;
}
 
-  target_st = gfc_get_tbp_symtree (&ns->tb_sym_root, name2);
+  target_st = gfc_get_tbp_symtree (&ns->tb_sym_root, name);
 
   /* See if this is a duplicate specification.  */
   for (target = tb->u.generic; target; target = target->next)
if (target_st == target->specific_st)
  {
gfc_error ("%qs already defined as specific binding for the"
-  " generic %qs at %C", name2, bind_name);
+  " generic %qs at %C", name, bind_name);
goto error;
  }
 
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 04b0024a992..774a6de6168 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -95,6 +95,11 @@ not after.
 
 /* Macro to initialize an mstring structure.  */
 #define minit(s, t) { s, NULL, t }
+/* Ideally we would want that to be
+   { IDENTIFIER_POINTER (get_identifier_with_length (s, sizeof(s)-1)), NULL, t 
}
+   but stringpool's hash table is not allocated yet and we would have to do
+   tricks to have a ctor to initialize it. And even that wouldn't work too
+   well as toplevel would later on wipe ident_hash.  */
 
 /* Structure for storing strings to be matched by gfc_match_string.  */
 typedef struct
diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c
index de58eed23f0..6a5fe928b93 100644
--- a/gcc/fortran/interface.c
+++ b/gcc/fortran/interface.c
@@ -136,11 +136,10 @@ dtio_op (char* mode)
 
 match
 gfc_match_generic_spec (interface_type *type,
-   char *name,
+   const char *&name,
gfc_intrinsic_op *op)
 {
   char buffer[GFC_MAX_SYMBOL_LEN + 1];
-  const char *name2 = NULL;
   match m;
   gfc_intrinsic_op i;
 
@@ -174,7 +173,7 @@ gfc_match_generic_spec (interface_type *type,
   if (m != MATCH_YES)
return MATCH_ERROR;
 
-  strcpy (name, oper);
+  name = oper;
   *type = INTERFACE_USER_OP;
   return MATCH_YES;
 }
@@ -184,12 +183,12 @@ gfc_match_generic_spec (interface_type *type,
   *op = dtio_op (buffer);
   if (*op == INTRINSIC_FORMATTED)
{
- strcpy (name, gfc_code2string (dtio_procs, DTIO_RF));
+ name = gfc_code2string (dtio_procs, DTIO_RF);
  *type = INTERFACE_DTIO;
}
   if (*op == INTRINSIC_UNFORMATTED)
{
- strcpy (name, gfc_code2string (dtio_procs, DTIO_RUF));
+ name = gfc_code2string (dtio_procs, DTIO_RUF);
  *type = INTERFACE_DTIO;
}
   if (*op != INTRINSIC_NONE)
@@ -201,21 +200,20 @@ gfc_match_generic_spec (interface_type *type,
   *op = dtio_op (buffer);
   if (*op == INTRINSIC_FORMATTED)
{
- strcpy (name, gfc_code2string (dtio_procs, DTIO_WF));
+ name = gfc_code2string (dtio_procs, DTIO_WF);
  *type = INTERFACE_DTIO;
}
   if (*op == INTRINSIC_UNFORMATTED)
{
- strcpy (name, gfc_code2string (dtio_pr

[PATCH,FORTRAN 09/29] Use stringpool for modules

2018-09-05 Thread Bernhard Reutner-Fischer
From: Bernhard Reutner-Fischer 

gcc/fortran/ChangeLog:

2017-10-29  Bernhard Reutner-Fischer  

* gfortran.h (struct gfc_use_rename): Use pointers for
local_name and use_name.
* match.c (gfc_match): Set name to NULL on failed match.
* module.c (gfc_match_use): Use pointer comparison instead of
string comparison.
(find_use_name_n): Likewise.
(mio_internal_string): Delete.
(mio_expr): Simplify INTRINSIC_USER handling.
(load_operator_interfaces): Use pointer for name and module.
(load_generic_interfaces): Likewise.
(load_commons): Use pointer for name.
(load_needed): Use pointer comparison instead of string
comparison.
(read_module): Use pointer for name. Use pointer comparison
instead if string comparison.
(import_iso_c_binding_module): Adjust to struct gfc_use_rename
changes.
(use_iso_fortran_env_module): Likewise.
* symbol.c (generate_isocbinding_symbol): Likewise.
* trans-decl.c (gfc_trans_use_stmts): Likewise.
---
 gcc/fortran/gfortran.h   |   3 +-
 gcc/fortran/match.c  |  11 +++-
 gcc/fortran/module.c | 115 ++-
 gcc/fortran/symbol.c |   2 +-
 gcc/fortran/trans-decl.c |   8 +--
 5 files changed, 56 insertions(+), 83 deletions(-)

diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 6c32b8ac71f..cb9195d393e 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -1673,7 +1673,8 @@ gfc_entry_list;
 
 typedef struct gfc_use_rename
 {
-  char local_name[GFC_MAX_SYMBOL_LEN + 1], use_name[GFC_MAX_SYMBOL_LEN + 1];
+  const char *local_name;
+  const char *use_name;
   struct gfc_use_rename *next;
   int found;
   gfc_intrinsic_op op;
diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c
index 38827ed4637..6596bd87c09 100644
--- a/gcc/fortran/match.c
+++ b/gcc/fortran/match.c
@@ -1274,15 +1274,22 @@ not_yes:
case '%':
  matches++;
  break;/* Skip.  */
+#if 0
+   /* If everybody is disciplined we do not need to reset this.  */
+   case 'n':
+ vp = va_arg (argp, void **); /* FORNOW: NULL shouldn't be */
+ *vp = NULL;
+ break;
+#else
+   case 'n':
+#endif
 
/* Matches that don't have to be undone */
case 'o':
case 'l':
-   case 'n':
case 's':
  (void) va_arg (argp, void **);
  break;
-
case 'e':
case 'v':
  vp = va_arg (argp, void **);
diff --git a/gcc/fortran/module.c b/gcc/fortran/module.c
index b3f68b8803f..3ad47f57930 100644
--- a/gcc/fortran/module.c
+++ b/gcc/fortran/module.c
@@ -646,10 +646,10 @@ gfc_match_use (void)
  if (use_list->only_flag)
{
  if (m != MATCH_YES)
-   strcpy (new_use->use_name, name);
+   new_use->use_name = name;
  else
{
- strcpy (new_use->local_name, name);
+ new_use->local_name = name;
  m = gfc_match_generic_spec (&type2, name, &op);
  if (type != type2)
goto syntax;
@@ -657,15 +657,14 @@ gfc_match_use (void)
goto syntax;
  if (m == MATCH_ERROR)
goto cleanup;
- strcpy (new_use->use_name, name);
+ new_use->use_name = name;
}
}
  else
{
  if (m != MATCH_YES)
goto syntax;
- strcpy (new_use->local_name, name);
-
+ new_use->local_name = name;
  m = gfc_match_generic_spec (&type2, name, &op);
  if (type != type2)
goto syntax;
@@ -673,11 +672,11 @@ gfc_match_use (void)
goto syntax;
  if (m == MATCH_ERROR)
goto cleanup;
- strcpy (new_use->use_name, name);
+ new_use->use_name = name;
}
 
- if (strcmp (new_use->use_name, use_list->module_name) == 0
- || strcmp (new_use->local_name, use_list->module_name) == 0)
+ if (new_use->use_name == use_list->module_name
+ || new_use->local_name == use_list->module_name)
{
  gfc_error ("The name %qs at %C has already been used as "
 "an external module name", use_list->module_name);
@@ -848,8 +847,8 @@ find_use_name_n (const char *name, int *inst, bool 
interface)
   i = 0;
   for (u = gfc_rename_list; u; u = u->next)
 {
-  if ((!low_name && strcmp (u->use_name, name) != 0)
- || (low_name && strcmp (u->use_name, low_name) != 0)
+  if ((!low_name && u->use_name != name)
+ || (low_name && u->use_name != low_name)
  || (u->op == INTRINSIC_USER && !interface)
  || (u->op != INTRINSIC_USER &&  interface))

[PATCH,FORTRAN 13/29] Use stringpool for intrinsics and common

2018-09-05 Thread Bernhard Reutner-Fischer
From: Bernhard Reutner-Fischer 

gcc/fortran/ChangeLog:

2017-11-15  Bernhard Reutner-Fischer  

* gfortran.h (struct gfc_common_head, struct gfc_intrinsic_arg):
Make name a pointer.
* intrinsic.c (add_sym): Use stringpool for name.
* match.c (gfc_get_common): Likewise.
* symbol.c (set_symbol_common_block): Likewise.
* trans-common.c (gfc_sym_mangled_common_id): Likewise.
(finish_equivalences): Likewise.
(gfc_trans_common): Likewise.
---
 gcc/fortran/gfortran.h |  4 ++--
 gcc/fortran/intrinsic.c| 11 +++
 gcc/fortran/match.c|  2 +-
 gcc/fortran/symbol.c   |  2 +-
 gcc/fortran/trans-common.c | 10 +-
 5 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index cb9195d393e..039719644ea 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -1641,7 +1641,7 @@ typedef struct gfc_common_head
   char use_assoc, saved, threadprivate;
   unsigned char omp_declare_target : 1;
   unsigned char omp_declare_target_link : 1;
-  char name[GFC_MAX_SYMBOL_LEN + 1];
+  const char *name;
   struct gfc_symbol *head;
   const char* binding_label;
   int is_bind_c;
@@ -1978,7 +1978,7 @@ gfc_ref;
 /* Structures representing intrinsic symbols and their arguments lists.  */
 typedef struct gfc_intrinsic_arg
 {
-  char name[GFC_MAX_SYMBOL_LEN + 1];
+  const char *name;
 
   gfc_typespec ts;
   unsigned optional:1, value:1;
diff --git a/gcc/fortran/intrinsic.c b/gcc/fortran/intrinsic.c
index 609668613a7..3a32a7824bf 100644
--- a/gcc/fortran/intrinsic.c
+++ b/gcc/fortran/intrinsic.c
@@ -317,7 +317,6 @@ add_sym (const char *name, gfc_isym_id id, enum klass cl, 
int actual_ok, bt type
 int standard, gfc_check_f check, gfc_simplify_f simplify,
 gfc_resolve_f resolve, ...)
 {
-  char buf[GFC_MAX_SYMBOL_LEN + 11]; /* 10 for '_gfortran_', 1 for '\0'  */
   int optional, first_flag;
   sym_intent intent;
   va_list argp;
@@ -334,11 +333,7 @@ add_sym (const char *name, gfc_isym_id id, enum klass cl, 
int actual_ok, bt type
 
 case SZ_NOTHING:
   next_sym->name = gfc_get_string ("%s", name);
-
-  strcpy (buf, "_gfortran_");
-  strcat (buf, name);
-  next_sym->lib_name = gfc_get_string ("%s", buf);
-
+  next_sym->lib_name = gfc_get_string ("_gfortran_%s", name);
   next_sym->pure = (cl != CLASS_IMPURE);
   next_sym->elemental = (cl == CLASS_ELEMENTAL);
   next_sym->inquiry = (cl == CLASS_INQUIRY);
@@ -388,7 +383,7 @@ add_sym (const char *name, gfc_isym_id id, enum klass cl, 
int actual_ok, bt type
 
  first_flag = 0;
 
- strcpy (next_arg->name, name);
+ next_arg->name = gfc_get_string ("%s", name);
  next_arg->ts.type = type;
  next_arg->ts.kind = kind;
  next_arg->optional = optional;
@@ -4145,7 +4140,7 @@ keywords:
   for (; a; a = a->next)
 {
   for (f = formal; f; f = f->next)
-   if (strcmp (a->name, f->name) == 0)
+   if (a->name == f->name)
  break;
 
   if (f == NULL)
diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c
index 2c4d6e8228c..fd91e280b93 100644
--- a/gcc/fortran/match.c
+++ b/gcc/fortran/match.c
@@ -5029,7 +5029,7 @@ gfc_get_common (const char *name, int from_module)
 {
   st->n.common = gfc_get_common_head ();
   st->n.common->where = gfc_current_locus;
-  strcpy (st->n.common->name, name);
+  st->n.common->name = name;
 }
 
   return st->n.common;
diff --git a/gcc/fortran/symbol.c b/gcc/fortran/symbol.c
index 00a178772df..cc9d4e3f9d8 100644
--- a/gcc/fortran/symbol.c
+++ b/gcc/fortran/symbol.c
@@ -3057,7 +3057,7 @@ set_symbol_common_block (gfc_symbol *sym, gfc_common_head 
*common_block)
   if (sym->common_block == common_block)
 return;
 
-  if (sym->common_block && sym->common_block->name[0] != '\0')
+  if (sym->common_block && sym->common_block->name != NULL)
 {
   sym->common_block->refs--;
   if (sym->common_block->refs == 0)
diff --git a/gcc/fortran/trans-common.c b/gcc/fortran/trans-common.c
index bd9721dee41..18f87e00320 100644
--- a/gcc/fortran/trans-common.c
+++ b/gcc/fortran/trans-common.c
@@ -243,16 +243,16 @@ gfc_sym_mangled_common_id (gfc_common_head *com)
 {
   int has_underscore;
   char mangled_name[GFC_MAX_MANGLED_SYMBOL_LEN + 1];
-  char name[GFC_MAX_SYMBOL_LEN + 1];
+  const char *name;
 
   /* Get the name out of the common block pointer.  */
-  strcpy (name, com->name);
+  name = com->name;
 
   /* If we're suppose to do a bind(c).  */
   if (com->is_bind_c == 1 && com->binding_label)
 return get_identifier (com->binding_label);
 
-  if (strcmp (name, BLANK_COMMON_NAME) == 0)
+  if (name == gfc_get_string (BLANK_COMMON_NAME))
 return get_identifier (name);
 
   if (flag_underscoring)
@@ -1252,7 +1252,7 @@ finish_equivalences (gfc_namespace *ns)
  c->where = ns->proc_name->declared_at;
else if (ns->is_block_data)
  c->wher

[PATCH,FORTRAN 03/29] Use stringpool for gfc_get_name

2018-09-05 Thread Bernhard Reutner-Fischer
From: Bernhard Reutner-Fischer 

Occurrences of name2 in this patch will be fixed later in this series.

gcc/fortran/ChangeLog:

2017-10-23  Bernhard Reutner-Fischer  

* match.h (gfc_match_name): Pass argument by reference. Adjust
all callers.
(match_common_name): Likewise.
* match.c (gfc_match_name): Set result to IDENTIFIER_POINTER of
stringpool node.
(gfc_match_member_sep, gfc_match_sym_tree, gfc_match,
gfc_match_else, gfc_match_elseif, match_common_name,
gfc_match_common, gfc_match_ptr_fcn_assign, match_case_eos,
gfc_match_elsewhere): Adjust.
* decl.c (variable_decl): Set name via gfc_get_string() and
adjust calls to gfc_match_name.
(match_data_constant, check_function_name, get_bind_c_idents,
gfc_match_formal_arglist, match_result, match_procedure_interface,
match_ppc_decl, match_procedure_in_interface, gfc_match_entry,
gfc_match_end, attr_decl1, gfc_match_modproc, gfc_match_type,
enumerator_decl, match_procedure_in_type, gfc_match_generic,
gfc_match_final_decl, gfc_match_gcc_attributes): Adjust.
* interface.c (gfc_match_generic_spec): Adjust.
* io.c (match_io): Adjust.
* module.c (gfc_match_use): Adjust.
* openmp.c (gfc_match_omp_clauses, gfc_match_oacc_routine): Adjust.
* primary.c (match_kind_param, match_sym_complex_part,
match_actual_arg, match_keyword_arg, gfc_match_varspec,
gfc_match_rvalue): Adjust.
---
 gcc/fortran/decl.c  | 95 +
 gcc/fortran/interface.c |  5 ++-
 gcc/fortran/io.c|  6 +--
 gcc/fortran/match.c | 56 +---
 gcc/fortran/match.h |  4 +-
 gcc/fortran/module.c|  5 ++-
 gcc/fortran/openmp.c| 25 +--
 gcc/fortran/primary.c   | 31 +++---
 8 files changed, 116 insertions(+), 111 deletions(-)

diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c
index 03298833c98..f0ff5138ca1 100644
--- a/gcc/fortran/decl.c
+++ b/gcc/fortran/decl.c
@@ -352,7 +352,7 @@ syntax:
 static match
 match_data_constant (gfc_expr **result)
 {
-  char name[GFC_MAX_SYMBOL_LEN + 1];
+  const char *name = NULL;
   gfc_symbol *sym, *dt_sym = NULL;
   gfc_expr *expr;
   match m;
@@ -404,7 +404,7 @@ match_data_constant (gfc_expr **result)
 
   gfc_current_locus = old_loc;
 
-  m = gfc_match_name (name);
+  m = gfc_match_name (&name);
   if (m != MATCH_YES)
 return m;
 
@@ -2261,7 +2261,7 @@ match_pointer_init (gfc_expr **init, int procptr)
 
 
 static bool
-check_function_name (char *name)
+check_function_name (const char *name)
 {
   /* In functions that have a RESULT variable defined, the function name always
  refers to function calls.  Therefore, the name is not allowed to appear in
@@ -2294,7 +2294,7 @@ check_function_name (char *name)
 static match
 variable_decl (int elem)
 {
-  char name[GFC_MAX_SYMBOL_LEN + 1];
+  const char *name = NULL;
   static unsigned int fill_id = 0;
   gfc_expr *initializer, *char_len;
   gfc_array_spec *as;
@@ -2326,7 +2326,7 @@ variable_decl (int elem)
 
   if (m != MATCH_YES)
 {
-  m = gfc_match_name (name);
+  m = gfc_match_name (&name);
   if (m != MATCH_YES)
goto cleanup;
 }
@@ -2351,7 +2351,7 @@ variable_decl (int elem)
}
 
   /* %FILL components are given invalid fortran names.  */
-  snprintf (name, GFC_MAX_SYMBOL_LEN + 1, "%%FILL%u", fill_id++);
+  name = gfc_get_string ("%%FILL%u", fill_id++);
   m = MATCH_YES;
 }
 
@@ -2584,13 +2584,13 @@ variable_decl (int elem)
   if (gfc_current_state () == COMP_FUNCTION
   && strcmp ("ppr@", gfc_current_block ()->name) == 0
   && strcmp (name, gfc_current_block ()->ns->proc_name->name) == 0)
-strcpy (name, "ppr@");
+name = gfc_get_string ("%s", "ppr@");
 
   if (gfc_current_state () == COMP_FUNCTION
   && strcmp (name, gfc_current_block ()->name) == 0
   && gfc_current_block ()->result
   && strcmp ("ppr@", gfc_current_block ()->result->name) == 0)
-strcpy (name, "ppr@");
+name = gfc_get_string ("%s", "ppr@");
 
   /* OK, we've successfully matched the declaration.  Now put the
  symbol in the current namespace, because it might be used in the
@@ -5694,13 +5694,13 @@ set_verify_bind_c_com_block (gfc_common_head 
*com_block, int num_idents)
 bool
 get_bind_c_idents (void)
 {
-  char name[GFC_MAX_SYMBOL_LEN + 1];
+  const char *name = NULL;
   int num_idents = 0;
   gfc_symbol *tmp_sym = NULL;
   match found_id;
   gfc_common_head *com_block = NULL;
 
-  if (gfc_match_name (name) == MATCH_YES)
+  if (gfc_match_name (&name) == MATCH_YES)
 {
   found_id = MATCH_YES;
   gfc_get_ha_symbol (name, &tmp_sym);
@@ -5745,7 +5745,7 @@ get_bind_c_idents (void)
found_id = MATCH_NO;
  else if (gfc_match_char (',') != MATCH_YES)
found_id = MATCH_NO;
- else if (gfc_match_name (name) == MATCH_YES)
+ else if

  1   2   >