[PATCH] sparc/sparc64: use PIE_SPEC to select crtendS.o [PR96190]

2020-07-14 Thread Sergei Trofimovich via Gcc-patches
From: Sergei Trofimovich 

In --enable-default-pie mode compiler should switch from
using crtend.o to crtendS.o. On sparc it is especially visible
because crtend.o contains PIC-unfriendly code.

gcc:

2020-07-14  Sergei Trofimovich  

PR driver/96190
* config/sparc/linux.h: Use PIE_SPEC to select crtendS.o.
* config/sparc/linux64.h: ditto
---
 gcc/config/sparc/linux.h   | 2 +-
 gcc/config/sparc/linux64.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/sparc/linux.h b/gcc/config/sparc/linux.h
index 81201e67a2f..13d1d60 100644
--- a/gcc/config/sparc/linux.h
+++ b/gcc/config/sparc/linux.h
@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #undef  ENDFILE_SPEC
 #define ENDFILE_SPEC \
-  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s\
+  "%{shared|" PIE_SPEC ":crtendS.o%s;:crtend.o%s} crtn.o%s\
%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
 
 /* -mcpu=native handling only makes sense with compiler running on
diff --git a/gcc/config/sparc/linux64.h b/gcc/config/sparc/linux64.h
index a1a0efd8f28..be937dbaaf6 100644
--- a/gcc/config/sparc/linux64.h
+++ b/gcc/config/sparc/linux64.h
@@ -52,7 +52,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #undef ENDFILE_SPEC
 #define ENDFILE_SPEC \
-  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s\
+  "%{shared|" PIE_SPEC ":crtendS.o%s;:crtend.o%s} crtn.o%s\
%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
 
 /* The default code model.  */
-- 
2.27.0



[Patch] libgomp: Add Fortran routine support for allocators

2020-07-14 Thread Tobias Burnus

This patch adds the Fortran part to Jakub's generic + C/C++ commit
"openmp: Add basic library allocator support."
https://gcc.gnu.org/g:800bcc8c00f3ce940aa174845bb61faca9e85d36

OK for the trunk?
(Will then later be also backported to OG10)

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
libgomp: Add Fortran routine support for allocators

libgomp/ChangeLog:

	* allocator.c: Add ialias for omp_init_allocator and
	omp_destroy_allocator.
	* configure.ac: Set INTPTR_T_KIND.
	* configure: Regenerate.
	* Makefile.in: Regenerate.
	* testsuite/Makefile.in: Regenerate.
	* fortran.c (omp_init_allocator_, omp_destroy_allocator_,
	omp_set_default_allocator_, omp_get_default_allocator_): New
	functions and ialias_redirect.
	* icv.c: Add ialias for omp_set_default_allocator and
	omp_get_default_allocator.
	* libgomp.map (OMP_5.0.1): Add omp_init_allocator_,
	omp_destroy_allocator_, omp_set_default_allocator_ and
	omp_get_default_allocator_.
	* omp_lib.f90.in: Add allocator traits parameters, declare
	allocator routines and add related kind parameters.
	* omp_lib.h.in: Likewise.
	* testsuite/libgomp.c-c++-common/alloc-2.c: Fix sizeof.
	* testsuite/libgomp.fortran/alloc-1.f90: New test.
	* testsuite/libgomp.fortran/alloc-2.f90: New test.
	* testsuite/libgomp.fortran/alloc-3.f90: New test.

 libgomp/Makefile.in  |   1 +
 libgomp/allocator.c  |   3 +
 libgomp/configure|  11 +-
 libgomp/configure.ac |   2 +
 libgomp/fortran.c|  30 +
 libgomp/icv.c|   2 +
 libgomp/libgomp.map  |   4 +
 libgomp/omp_lib.f90.in   | 128 
 libgomp/omp_lib.h.in | 100 +++
 libgomp/testsuite/Makefile.in|   2 +
 libgomp/testsuite/libgomp.c-c++-common/alloc-2.c |   4 +-
 libgomp/testsuite/libgomp.fortran/alloc-1.f90| 147 +++
 libgomp/testsuite/libgomp.fortran/alloc-2.f90|  71 +++
 libgomp/testsuite/libgomp.fortran/alloc-3.f90|  23 
 14 files changed, 524 insertions(+), 4 deletions(-)

diff --git a/libgomp/allocator.c b/libgomp/allocator.c
index 76feba71082..7166538b1de 100644
--- a/libgomp/allocator.c
+++ b/libgomp/allocator.c
@@ -202,6 +202,9 @@ omp_destroy_allocator (omp_allocator_handle_t allocator)
 }
 }
 
+ialias (omp_init_allocator)
+ialias (omp_destroy_allocator)
+
 void *
 omp_alloc (size_t size, omp_allocator_handle_t allocator)
 {
diff --git a/libgomp/configure.ac b/libgomp/configure.ac
index 201d26fff7a..d1034dab7f8 100644
--- a/libgomp/configure.ac
+++ b/libgomp/configure.ac
@@ -395,6 +395,7 @@ for i in $config_path; do
   fi
 done
 
+_AC_COMPUTE_INT([sizeof (__INTPTR_TYPE__)], [INTPTR_T_KIND])
 _AC_COMPUTE_INT([sizeof (omp_lock_t)], [OMP_LOCK_SIZE],,
   [AC_MSG_ERROR([unsupported system, cannot find sizeof (omp_lock_t)])])
 _AC_COMPUTE_INT([__alignof (omp_lock_t)], [OMP_LOCK_ALIGN])
@@ -428,6 +429,7 @@ if test $OMP_NEST_LOCK_25_SIZE -gt 8 || test $OMP_NEST_LOCK_25_ALIGN -gt $OMP_NE
   OMP_NEST_LOCK_25_KIND=8
 fi
 
+AC_SUBST(INTPTR_T_KIND)
 AC_SUBST(OMP_LOCK_SIZE)
 AC_SUBST(OMP_LOCK_ALIGN)
 AC_SUBST(OMP_NEST_LOCK_SIZE)
diff --git a/libgomp/fortran.c b/libgomp/fortran.c
index 3705ff62b75..2c8aa6d4054 100644
--- a/libgomp/fortran.c
+++ b/libgomp/fortran.c
@@ -86,6 +86,10 @@ ialias_redirect (omp_get_initial_device)
 ialias_redirect (omp_get_max_task_priority)
 ialias_redirect (omp_pause_resource)
 ialias_redirect (omp_pause_resource_all)
+ialias_redirect (omp_destroy_allocator)
+ialias_redirect (omp_destroy_allocator)
+ialias_redirect (omp_set_default_allocator)
+ialias_redirect (omp_get_default_allocator)
 #endif
 
 #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING
@@ -676,3 +680,29 @@ omp_pause_resource_all_ (const int32_t *kind)
 {
   return omp_pause_resource_all (*kind);
 }
+
+intptr_t
+omp_init_allocator_ (const intptr_t *memspace, const int32_t *ntraits,
+		const omp_alloctrait_t *traits)
+{
+  return (intptr_t) omp_init_allocator ((omp_memspace_handle_t) *memspace,
+	(int) *ntraits, traits);
+}
+
+void
+omp_destroy_allocator_ (const intptr_t *allocator)
+{
+  omp_destroy_allocator ((omp_allocator_handle_t) *allocator);
+}
+
+void
+omp_set_default_allocator_ (const intptr_t *allocator)
+{
+  omp_set_default_allocator ((omp_allocator_handle_t) *allocator);
+}
+
+intptr_t
+omp_get_default_allocator_ ()
+{
+  return (intptr_t) omp_get_default_allocator ();
+}
diff --git a/libgomp/icv.c b/libgomp/icv.c
index b13289b47a7..3c16abb9123 100644
--- a/libgomp/icv.c
+++ b/libgomp/icv.c
@@ -235,3 +235,5 @@ ialias (omp_get_num_places)
 ialias (omp_get_place_num)
 ialias (omp_get_partition_num_places)
 ialias (omp_get_partition_place_nums)
+ialias (o

[PATCH] target: fix default value checking of x_str_align_functions in aarch64.c

2020-07-14 Thread Hu Jiangping
Hi,

This patch deal with the -falign-X=0 options. According to man pages,
if zero is specified, a machine-dependent default value should be used.
But in fact, zero was used in internal process, it is inconsistent.

Tested on aarch64-linux cross compiler, Is that OK?

BTW, the similar problems exists in other target sources.
I can submit them all in another patch if needed,
but I can test on i386 target only.

Regards!
Hujp

---
 gcc/config/aarch64/aarch64.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 17dbe673978..697ac676f4d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14221,11 +14221,14 @@ aarch64_override_options_after_change_1 (struct 
gcc_options *opts)
  alignment to what the target wants.  */
   if (!opts->x_optimize_size)
 {
-  if (opts->x_flag_align_loops && !opts->x_str_align_loops)
+  if ((opts->x_flag_align_loops && !opts->x_str_align_loops)
+|| (opts->x_str_align_loops && strcmp(opts->x_str_align_loops, "0") == 
0))
opts->x_str_align_loops = aarch64_tune_params.loop_align;
-  if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
+  if ((opts->x_flag_align_jumps && !opts->x_str_align_jumps)
+|| (opts->x_str_align_jumps && strcmp(opts->x_str_align_jumps, "0") == 
0))
opts->x_str_align_jumps = aarch64_tune_params.jump_align;
-  if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+  if ((opts->x_flag_align_functions && !opts->x_str_align_functions)
+|| (opts->x_str_align_functions && strcmp(opts->x_str_align_functions, 
"0") == 0))
opts->x_str_align_functions = aarch64_tune_params.function_align;
 }
 
-- 
2.17.1





[PATCH] builtins: Avoid useless char/short -> int promotions before atomics [PR96176]

2020-07-14 Thread Jakub Jelinek via Gcc-patches
Hi!

As mentioned in the PR, we generate a useless movzbl insn before lock cmpxchg.
The problem is that the builtin for the char/short cases has the arguments
promoted to int and combine gives up, because the instructions have
MEM_VOLATILE_P arguments and recog in that case doesn't recognize anything
when volatile_ok is false, and nothing afterwards optimizes the
(reg:SI a) = (zero_extend:SI (reg:QI a))
... (subreg:QI (reg:SI a) 0) ...

The following patch fixes it at expansion time, we already have a function
that is meant to undo the promotion, so this just adds the very common case
to that.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-07-14  Jakub Jelinek  

PR target/96176
* builtins.c: Include gimple-ssa.h, tree-ssa-live.h and
tree-outof-ssa.h.
(expand_expr_force_mode): If exp is a SSA_NAME with different mode
from MODE and get_gimple_for_ssa_name is a cast from MODE, use the
cast's rhs.

* gcc.target/i386/pr96176.c: New test.

--- gcc/builtins.c.jj   2020-06-22 10:59:15.0 +0200
+++ gcc/builtins.c  2020-07-13 12:37:56.519653940 +0200
@@ -73,6 +73,9 @@ along with GCC; see the file COPYING3.
 #include "gomp-constants.h"
 #include "omp-general.h"
 #include "tree-dfa.h"
+#include "gimple-ssa.h"
+#include "tree-ssa-live.h"
+#include "tree-outof-ssa.h"
 
 struct target_builtins default_target_builtins;
 #if SWITCHABLE_TARGET
@@ -6671,6 +6674,21 @@ expand_expr_force_mode (tree exp, machin
   rtx val;
   machine_mode old_mode;
 
+  if (TREE_CODE (exp) == SSA_NAME
+  && TYPE_MODE (TREE_TYPE (exp)) != mode)
+{
+  /* Undo argument promotion if possible, as combine might not
+be able to do it later due to MEM_VOLATILE_P uses in the
+patterns.  */
+  gimple *g = get_gimple_for_ssa_name (exp);
+  if (g && gimple_assign_cast_p (g))
+   {
+ tree rhs = gimple_assign_rhs1 (g);
+ if (TYPE_MODE (TREE_TYPE (rhs)) == mode)
+   exp = rhs;
+   }
+}
+
   val = expand_expr (exp, NULL_RTX, mode, EXPAND_NORMAL);
   /* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
  of CONST_INTs, where we know the old_mode only from the call argument.  */
--- gcc/testsuite/gcc.target/i386/pr96176.c.jj  2020-07-13 12:44:15.940149701 
+0200
+++ gcc/testsuite/gcc.target/i386/pr96176.c 2020-07-13 12:45:07.822396980 
+0200
@@ -0,0 +1,13 @@
+/* PR target/96176 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "\tmovzbl\t" } } */
+
+unsigned char v;
+
+void
+foo (unsigned char *x, unsigned char y, unsigned char z)
+{
+  __atomic_compare_exchange_n (x, &y, z, 0, __ATOMIC_RELAXED, 
__ATOMIC_RELAXED);
+  v = y;
+}

Jakub



[PATCH] c++: Use error_at rather than warning_at for missing return in constexpr functions [PR96182]

2020-07-14 Thread Jakub Jelinek via Gcc-patches
Hi!

For C++11 we already emit an error if a constexpr function doesn't contain
a return statement, because in C++11 that is the only thing it needs to
contain, but for C++14 we would normally issue a -Wreturn-type warning.

As mentioned by Jonathan, such constexpr functions are invalid, no
diagnostics required, because there doesn't exist any arguments for
which it would result in valid constant expression.

This raises it to an error in such cases.  The !LAMBDA_TYPE_P case
is to avoid error on g++.dg/pr81194.C where the user didn't write
constexpr anywhere and the operator() is compiler generated.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-07-14  Jakub Jelinek  

PR c++/96182
* decl.c (finish_function): In constexpr functions other than
lambdas use for C++14 and later error instead of warning if no
return statement is present and diagnose it regardless of
warn_return_type.

* g++.dg/cpp1y/constexpr-96182.C: New test.
* g++.dg/other/error35.C (S::g()): Add return statement.
* g++.dg/cpp1y/pr63996.C (foo): Likewise.
* g++.dg/cpp1y/constexpr-return2.C (f): Likewise.
* g++.dg/cpp1y/var-templ44.C (make_array): Add throw 1.

--- gcc/cp/decl.c.jj2020-07-13 19:09:27.258953908 +0200
+++ gcc/cp/decl.c   2020-07-13 22:25:42.437062842 +0200
@@ -17164,7 +17164,10 @@ finish_function (bool inline_p)
   BLOCK_SUPERCONTEXT (DECL_INITIAL (fndecl)) = fndecl;
 
   /* Complain if there's just no return statement.  */
-  if (warn_return_type
+  if ((warn_return_type
+   || (cxx_dialect >= cxx14
+  && DECL_DECLARED_CONSTEXPR_P (fndecl)
+  && !LAMBDA_TYPE_P (CP_DECL_CONTEXT (fndecl
   && !VOID_TYPE_P (TREE_TYPE (fntype))
   && !dependent_type_p (TREE_TYPE (fntype))
   && !current_function_returns_value && !current_function_returns_null
@@ -17196,8 +17199,14 @@ finish_function (bool inline_p)
global_dc->option_state))
add_return_star_this_fixit (&richloc, fndecl);
}
-  if (warning_at (&richloc, OPT_Wreturn_type,
- "no return statement in function returning non-void"))
+  if (cxx_dialect >= cxx14
+ && DECL_DECLARED_CONSTEXPR_P (fndecl)
+ && !LAMBDA_TYPE_P (CP_DECL_CONTEXT (fndecl)))
+   error_at (&richloc, "no return statement in % function "
+   "returning non-void");
+  else if (warning_at (&richloc, OPT_Wreturn_type,
+  "no return statement in function returning "
+  "non-void"))
TREE_NO_WARNING (fndecl) = 1;
 }
 
--- gcc/testsuite/g++.dg/other/error35.C.jj 2020-01-12 11:54:37.214401324 
+0100
+++ gcc/testsuite/g++.dg/other/error35.C2020-07-13 22:35:55.359228614 
+0200
@@ -9,6 +9,6 @@ template  struct S {
 enum S::E;
 template  enum S::E : int { b };
 template 
-constexpr int S::g() const { b; } // { dg-error "not declared" }
+constexpr int S::g() const { b; if (false) return 0; } // { dg-error "not 
declared" }
 static_assert(S().g() == 1, ""); // { dg-error "" }
 // { dg-message "in .constexpr. expansion of" "" { target *-*-* } .-1 }
--- gcc/testsuite/g++.dg/cpp1y/pr63996.C.jj 2020-01-12 11:54:37.0 
+0100
+++ gcc/testsuite/g++.dg/cpp1y/pr63996.C2020-07-13 22:17:39.034004329 
+0200
@@ -5,6 +5,7 @@ constexpr int
 foo (int i)
 {
   int a[i] = { }; // { dg-error "7:ISO C\\+\\+ forbids variable length array 
.a" }
+  if (i == 23) return 0;
 }
 
 constexpr int j = foo (1); // { dg-error "flows off the end|in .constexpr. 
expansion of" }
--- gcc/testsuite/g++.dg/cpp1y/constexpr-96182.C.jj 2020-07-13 
19:16:42.742936492 +0200
+++ gcc/testsuite/g++.dg/cpp1y/constexpr-96182.C2020-07-13 
19:16:12.264357640 +0200
@@ -0,0 +1,6 @@
+// PR c++/96182
+// { dg-do compile { target c++11 } }
+
+constexpr int foo () {} // { dg-error "no return statement in 'constexpr' 
function returning non-void" "" { target c++14 } }
+// { dg-error "body of 'constexpr' function 'constexpr int foo\\\(\\\)' not a 
return-statement" "" { target c++11_only } .-1 }
+// { dg-warning "no return statement in function returning non-void" "" { 
target c++11_only } .-2 }
--- gcc/testsuite/g++.dg/cpp1y/constexpr-return2.C.jj   2020-01-12 
11:54:37.115402818 +0100
+++ gcc/testsuite/g++.dg/cpp1y/constexpr-return2.C  2020-07-13 
22:17:03.582513397 +0200
@@ -3,6 +3,7 @@
 
 constexpr int f (int i)
 {
+  if (i == -1) return 0;
 }
 
 constexpr int i = f(42);   // { dg-error "flows off the end|in .constexpr. 
expansion of " }
--- gcc/testsuite/g++.dg/cpp1y/var-templ44.C.jj 2020-01-12 11:54:37.123402697 
+0100
+++ gcc/testsuite/g++.dg/cpp1y/var-templ44.C2020-07-13 22:35:03.322980157 
+0200
@@ -26,5 +26,6 @@ constexpr auto make_array()
 -> array, common_type_t<>, _Dest>,
  sizeof...(_Types)> {
   static_assert(__or_<__not_>, __and_<>>::value, ""); // { 
dg-error "static as

[PATCH] driver: Fix several memory leaks

2020-07-14 Thread Alex Coplan
Updating the subject since this is really just a driver change (and
therefore needs a review from those who can approve patches there).

Thanks,
Alex

-Original Message-
From: Jit  On Behalf Of Alex Coplan
Sent: 09 July 2020 21:13
To: gcc-patches@gcc.gnu.org; j...@gcc.gnu.org
Cc: nd 
Subject: [PATCH] libgccjit: Fix several memory leaks in the driver

Hello,

This patch fixes several memory leaks in the driver, all of which relate
to the handling of static specs. We introduce functions
set_static_spec_{shared,owned}() which are used to enforce proper memory
management when updating the strings in the static_specs table.

This is achieved by making use of the alloc_p field in the table
entries. Similarly to set_spec(), each time we update an entry, we check
whether alloc_p is set, and free the old value if so. We then set
alloc_p correctly based on whether we "own" this memory or whether we're
just taking a pointer to a shared string which we shouldn't free.

The following table shows the number of leaks found by AddressSanitizer
when running a minimal libgccjit program on AArch64. The test program
does the whole libgccjit compilation cycle in a loop (including acquiring
and releasing the context), and the table below shows the number of leaks
for different iterations of that loop.

+--+-+-+--+---+
| # of runs >  | 1   | 2   | 3| Leaks per run |
+--+-+-+--+---+
| Before patch | 463 | 940 | 1417 | 477   |
+--+-+-+--+---+
| After patch  | 416 | 846 | 1276 | 430   |
+--+-+-+--+---+

Ensuring that we minimize "leaks per run" (ultimately eliminating all of
them) is important in order for long-running applications to be able to
make use of in-process libgccjit.

Testing:
 * Bootstrap and regtest on aarch64-linxu-gnu, x86_64-linux-gnu.
 * Bootstrap and regtest on aarch64-linux-gnu with bootstrap-asan config.
 * Smoke test of libgccjit, ran regressions on a --disable-bootstrap build on
   aarch64-linux-gnu.

OK for master?

Thanks,
Alex

---

gcc/ChangeLog:

2020-07-09  Alex Coplan  

* gcc.c (set_static_spec): New.
(set_static_spec_owned): New.
(set_static_spec_shared): New.
(driver::maybe_putenv_COLLECT_LTO_WRAPPER): Use
set_static_spec_owned() to take ownership of lto_wrapper_file
such that it gets freed in driver::finalize.
(driver::maybe_run_linker): Use set_static_spec_shared() to
ensure that we don't try and free() the static string "ld",
also ensuring that any previously-allocated string in
linker_name_spec is freed. Likewise with argv0.
(driver::finalize): Use set_static_spec_shared() when resetting
specs that previously had allocated strings; remove if(0)
around call to free().

diff --git a/gcc/gcc.c b/gcc/gcc.c
index c0eb3c10cfd..f839971d42d 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -1908,6 +1908,47 @@ init_spec (void)
 
   specs = sl;
 }
+
+static void
+set_static_spec (const char **spec, const char *value, bool alloc_p)
+{
+  struct spec_list *sl = NULL;
+
+  for (unsigned i = 0; i < ARRAY_SIZE (static_specs); i++)
+{
+  if (static_specs[i].ptr_spec == spec)
+   {
+ sl = static_specs + i;
+ break;
+   }
+}
+
+  gcc_assert (sl);
+
+  if (sl->alloc_p)
+{
+  const char *old = *spec;
+  free (const_cast  (old));
+}
+
+  *spec = value;
+  sl->alloc_p = alloc_p;
+}
+
+/* Update a static spec to a new string, taking ownership of that
+   string's memory.  */
+static void set_static_spec_owned (const char **spec, const char *val)
+{
+  return set_static_spec (spec, val, true);
+}
+
+/* Update a static spec to point to a new value, but don't take
+   ownership of (i.e. don't free) that string.  */
+static void set_static_spec_shared (const char **spec, const char *val)
+{
+  return set_static_spec (spec, val, false);
+}
+
 
 /* Change the value of spec NAME to SPEC.  If SPEC is empty, then the spec is
removed; If the spec starts with a + then SPEC is added to the end of the
@@ -8333,7 +8374,7 @@ driver::maybe_putenv_COLLECT_LTO_WRAPPER () const
   if (lto_wrapper_file)
 {
   lto_wrapper_file = convert_white_space (lto_wrapper_file);
-  lto_wrapper_spec = lto_wrapper_file;
+  set_static_spec_owned ( 0
@@ -8864,7 +8905,7 @@ driver::maybe_run_li

Re: [Patch] [OpenMP, Fortran] Add structure/derived-type element mapping

2020-07-14 Thread Jakub Jelinek via Gcc-patches
On Wed, Jun 24, 2020 at 07:32:09PM +0200, Tobias Burnus wrote:
> Comments, remarks, suggestions?
> Otherwise: OK for the trunk?

LGTM, thanks.

> [OpenMP, Fortran] Add structure/derived-type element mapping
> 
> gcc/fortran/ChangeLog:
> 
>   * openmp.c (gfc_match_omp_clauses): Match also derived-type
>   component refs in OMP_CLAUSE_MAP.
>   (resolve_omp_clauses): Resolve those.
>   * trans-openmp.c (gfc_trans_omp_array_section, gfc_trans_omp_clauses):
>   Handle OpenMP structure-element mapping.
>   (gfc_trans_oacc_construct, gfc_trans_oacc_executable_directive,
>   (gfc_trans_oacc_combined_directive, gfc_trans_oacc_declare): Update
>   add openacc=true in gfc_trans_omp_clauses call.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gfortran.dg/goacc/finalize-1.f: Update dump scan pattern.
>   * gfortran.dg/gomp/map-1.f90: Update dg-error.
>   * gfortran.dg/gomp/map-2.f90: New test.
> 
> 
> libgomp/ChangeLog:
> 
>   * testsuite/libgomp.fortran/struct-elem-map-1.f90: New test.

Jakub



Re: [Patch][Fortran, OpenMP] Fix allocatable-components check (PR67311)

2020-07-14 Thread Jakub Jelinek via Gcc-patches
On Thu, Jun 25, 2020 at 04:06:24PM +0200, Tobias Burnus wrote:
> [Fortran, OpenMP] Fix allocatable-components check (PR67311)
> 
> gcc/fortran/ChangeLog:
> 
>   PR fortran/67311
>   * trans-openmp.c (gfc_has_alloc_comps): Return false also for
>   pointers to arrays.
> 
> libgomp/ChangeLog:
> 
>   PR fortran/67311
>   * testsuite/libgomp.fortran/target-map-1.f90: New test.

Ok, thanks.

Jakub



Re: [Patch] libgomp: Add Fortran routine support for allocators

2020-07-14 Thread Jakub Jelinek via Gcc-patches
On Tue, Jul 14, 2020 at 09:15:21AM +0200, Tobias Burnus wrote:
> libgomp/ChangeLog:
> 
>   * allocator.c: Add ialias for omp_init_allocator and
>   omp_destroy_allocator.
>   * configure.ac: Set INTPTR_T_KIND.
>   * configure: Regenerate.
>   * Makefile.in: Regenerate.
>   * testsuite/Makefile.in: Regenerate.
>   * fortran.c (omp_init_allocator_, omp_destroy_allocator_,
>   omp_set_default_allocator_, omp_get_default_allocator_): New
>   functions and ialias_redirect.
>   * icv.c: Add ialias for omp_set_default_allocator and
>   omp_get_default_allocator.
>   * libgomp.map (OMP_5.0.1): Add omp_init_allocator_,
>   omp_destroy_allocator_, omp_set_default_allocator_ and
>   omp_get_default_allocator_.
>   * omp_lib.f90.in: Add allocator traits parameters, declare
>   allocator routines and add related kind parameters.
>   * omp_lib.h.in: Likewise.
>   * testsuite/libgomp.c-c++-common/alloc-2.c: Fix sizeof.
>   * testsuite/libgomp.fortran/alloc-1.f90: New test.
>   * testsuite/libgomp.fortran/alloc-2.f90: New test.
>   * testsuite/libgomp.fortran/alloc-3.f90: New test.

> --- a/libgomp/fortran.c
> +++ b/libgomp/fortran.c
> @@ -86,6 +86,10 @@ ialias_redirect (omp_get_initial_device)
>  ialias_redirect (omp_get_max_task_priority)
>  ialias_redirect (omp_pause_resource)
>  ialias_redirect (omp_pause_resource_all)
> +ialias_redirect (omp_destroy_allocator)
> +ialias_redirect (omp_destroy_allocator)

The first one should be omp_init_allocator, shouldn't it?

> +ialias_redirect (omp_set_default_allocator)
> +ialias_redirect (omp_get_default_allocator)
>  #endif
>  
>  #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING

> diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
> index fdbc0f4657d..0c9eba39d72 100644
> --- a/libgomp/omp_lib.f90.in
> +++ b/libgomp/omp_lib.f90.in
> @@ -24,13 +24,19 @@
>  !  .
>  
>module omp_lib_kinds
> +use iso_c_binding, only: c_intptr_t
>  implicit none
> +private :: c_intptr_t
>  integer, parameter :: omp_lock_kind = @OMP_LOCK_KIND@
>  integer, parameter :: omp_nest_lock_kind = @OMP_NEST_LOCK_KIND@
>  integer, parameter :: omp_sched_kind = 4
>  integer, parameter :: omp_proc_bind_kind = 4
>  integer, parameter :: omp_lock_hint_kind = 4
>  integer, parameter :: omp_pause_resource_kind = 4
> +integer, parameter :: omp_allocator_handle_kind = c_intptr_t
> +integer, parameter :: omp_alloctrait_key_kind = c_int

Is c_int a keyword?  Because you use only c_intptr_t from iso_c_binding...

> +integer, parameter :: omp_alloctrait_val_kind = c_intptr_t
> +integer, parameter :: omp_memspace_handle_kind = c_intptr_t

> +integer (kind=omp_alloctrait_val_kind), &
> + parameter :: omp_atv_false = 0
> +integer (kind=omp_alloctrait_val_kind), &
> + parameter :: omp_atv_true = 1
> +integer (kind=omp_alloctrait_val_kind), &
> + parameter :: omp_atv_default = 2

Please follow the recent (sure, again 5.1-ish) changes, namely that
omp_atv_default is -1 (and should be defined probably before omp_atv_false)
and 
> +integer (kind=omp_alloctrait_val_kind), &
> + parameter :: omp_atv_contended = 3
> +integer (kind=omp_alloctrait_val_kind), &
> + parameter :: omp_atv_uncontended = 4
> +integer (kind=omp_alloctrait_val_kind), &
> + parameter :: omp_atv_sequential = 5

integer (kind=omp_alloctrait_val_kind), &
 parameter :: omp_atv_serialized = 5
integer (kind=omp_alloctrait_val_kind), &
 parameter :: omp_atv_sequantial = omp_atv_serialized

> +interface
> +  function omp_init_allocator (memspace, ntraits, traits)
> +use omp_lib_kinds
> +integer (kind=omp_allocator_handle_kind) omp_init_allocator
> +integer (kind=omp_memspace_handle_kind), &
> +  intent(in) :: memspace
> +integer, intent(in) :: ntraits
> +type (omp_alloctrait), intent(in) :: traits(*)
> +  end function

Do we want to have 2 versions of this for different -fdefault-initeger-* ?
I mean like we have:
  subroutine omp_set_num_threads (num_threads)
integer (4), intent (in) :: num_threads
  end subroutine omp_set_num_threads
  subroutine omp_set_num_threads_8 (num_threads)
integer (8), intent (in) :: num_threads
  end subroutine omp_set_num_threads_8
have omp_init_allocator_ use integer (kind=4) ntraits, and
omp_init_allocator_8_ use integer (kind=8) ntraits?

> +interface
> +  subroutine omp_set_default_allocator (allocator)
> +use omp_lib_kinds
> +integer (kind=omp_allocator_handle_kind), &
> +  intent(in) :: allocator
> +  end subroutine

Re: [PATCH] libgomp: Add OMPD Address Space Information functions.

2020-07-14 Thread Jakub Jelinek via Gcc-patches
On Thu, Jul 09, 2020 at 07:01:00PM -0400, y2s1982 via Gcc-patches wrote:
> --- a/libgomp/libgompd.h
> +++ b/libgomp/libgompd.h
> @@ -47,4 +47,19 @@ typedef struct _ompd_aspace_handle {
>ompd_size_t ref_count;
>  } ompd_address_space_handle_t;
>  
> +struct gompd_env
> +{
> +  /* TODO: when the struct is better defined, turn it into a compact form.
> + LINK: https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549698.html
> + For now, keep it as a struct.  */
> +
> +  /* Environment set version number.  */
> +  ompd_word_t gompd_env_version;
> +  /* Represents _OPENMP that is in mm format.  */
> +  ompd_word_t openmp_version;
> +};
> +
> +/* TODO: when gompd_env is better defined, turn it into a compact form.  */
> +extern struct gompd_env gompd_env_info;

I don't think you want this to be a global var.

> +  ompd_size_t macro_length = 6; /* _OPENMP format: mm.  */

So use omp_version_len = strlen ("mm");
and no comment is needed (the compiler will optimize it into constant)
or omp_version_len = sizeof ("yyymm") - 1;

> +  char *tmp = "GNU OpenMP Runtime implementing OpenMP 5.0 ";
> +  ompd_size_t tmp_length = strlen (tmp);

Use name instead of tmp in both vars?

> --- a/libgomp/ompd-lib.c
> +++ b/libgomp/ompd-lib.c
> @@ -31,6 +31,17 @@
>  
>  ompd_callbacks_t gompd_callbacks;
>  static int ompd_initialized = 0;
> +struct gompd_env gompd_env_info;

Again.

> +
> +ompd_rc_t
> +gompd_set_environment ()
> +{
> +  /* TODO: Turn this placeholder function to handle OMPD environment 
> variables
> + when it becomes compact.  */
> +  struct gompd_env temp_env = { 202007, 201811 };
> +  gompd_env_info = temp_env;
> +  return ompd_rc_ok;
> +}
>  
>  ompd_rc_t
>  ompd_get_api_version (ompd_word_t *version)
> @@ -57,6 +68,7 @@ ompd_initialize (ompd_word_t api_version, const 
> ompd_callbacks_t *callbacks)
>  return ompd_rc_error;
>  
>gompd_callbacks = *callbacks;
> +  gompd_set_environment ();

And you shouldn't call it here, but instead in ompd_process_initialize
and put the struct into the handle.

The thing is, the same OMPD library can e.g. handle a 64-bit and 32-bit
process, or one with older and one with newer libgomp.so.1.

Jakub



Re: [PATCH] libgccjit: Add new gcc_jit_context_new_blob entry point

2020-07-14 Thread Andrea Corallo
Andrea Corallo  writes:

> Andrea Corallo  writes:
>
>>> It occurred to me that the entrypoint is combining two things:
>>> - creating a global char[]
>>> - creating an initializer for that global
>>>
>>> which got me wondering if we should instead have a way to add
>>> initializers for globals.
>>>
>>> My first thought was something like:
>>>
>>> gcc_jit_context_new_global_with_initializer
>>>
>>> which would be like gcc_jit_context_new_global but would have an
>>> additional gcc_jit_rvalue *init_value param?
>>> The global would have to be of kind GCC_JIT_GLOBAL_EXPORTED or
>>> GCC_JIT_GLOBAL_INTERNAL, not GCC_JIT_GLOBAL_IMPORTED.
>>>
>>> Alternatively, maybe it would be better to have
>>>
>>> gcc_jit_global_set_initializer (gcc_jit_lvalue *global,
>>> gcc_jit_rvalue *init_val);
>>>
>>> to make the API more flexible.
>>>
>>> But even if we had this, we'd still need some way to create the rvalue
>>> for that initial value.  Also, maybe there ought to be a distinction
>>> between rvalues that can vary at runtime vs those that can be computed
>>> at compile-time (and are thus suitable for use in static
>>> initialization).
>>>
>>> I suspect you may have gone through the same thought process and come
>>> up with a simpler approach.   (I'm mostly just "thinking out loud"
>>> here, sorry if it isn't very coherent).
>>
>> Yes I had kind of similar thoughs.
>>
>> Ideally would be good to have a generic solution, the complication is
>> that as you mentioned not every rvalue is suitable for initializing
>> every global, but rather the opposite.  My fear was that the space to be
>> covered would be non trivial for a robust solution in this case.
>>
>> Also I believe we currently have no way to express in libgccjit rvalues
>> an array with some content, so how to use this as initializer?  Perhaps
>> also we should have a new type gcc_jit_initializer?
>>
>> On the other hand I thought that for simple things like integers the
>> problem is tipically already solved with an assignment in some init code
>> (infact I think so far nobody complained) while the real standing
>> limitation is for blobs (perhaps I'm wrong).  And in the end if you can
>> stuff some data in, you can use it later for any scope.
>>
>> Another "hybrid" solution would be to have specific entry point for each
>> type of the subset we want to allow for static initialization.  This way
>> we still control the creation of the initializer internally so it's
>> safe.  In this view this blob entry point would be just one of these
>> (probably with a different name like
>> 'gcc_jit_context_new_glob_init_char_array').
>>
>
> Hi Dave,
>
> wanted to ask if you formed an opinion about the patch and/or more in
> general the problem of static initialize data.
>
> Thanks
>
>   Andrea

Ping


Re: [PATCH] libgomp: Add OMPD functions in 5.5.6 and related data types.

2020-07-14 Thread Jakub Jelinek via Gcc-patches
On Sat, Jul 11, 2020 at 06:05:36PM -0400, y2s1982 wrote:
> 2020-07-11  Tony Sim  
> 
> libgomp/ChangeLog:
> 
>   * libgompd.h (ompd_thread_handle_t): Add.
>   (ompd_parallel_handle_t): Add.
>   (ompd_task_handle_t): Add.
>   * ompd-parallel.c: New file.

So you add a new file, but don't add it to Makefile.am - that means
nothing will compile it.
> +}ompd_thread_handle_t;

Formatting, space after }
> +
> +typedef struct _ompd_parallel_handle{

and space bef before { (etc.).
> +  ompd_address_space_handle_t *ah;
> +  ompd_parallel_handle_t *enclosing_ph;
> +  ompd_size_t enclosed_ph;
> +  ompd_address_t thread_pool; /* Stores gomp_thread_pool *.  */
> +}ompd_parallel_handle_t;
> +
> +typedef struct _ompd_task_handle{
> +  ompd_parallel_handle_t *ph;
> +}ompd_task_handle_t;
> +
>  #endif /* LIBGOMPD_H */
> +  ompd_rc_t ret = ompd_rc_error;
> +  ompd_size_t i = 0;
> +  struct gomp_thread_pool * pool =
> +  (struct gomp_thread_pool *)ph->thread_pool.address;

Formatting, = should never be at the end of line.  And no space between
* and pool, so:
  struct gomp_thread_pool *pool
= (struct gomp_thread_pool *) ph->thread_pool.address;

> +  for (i = 0; i < pool->threads_used && ret == ompd_rc_error; i++)
> +  {
> +if (pool->threads[i]->ts.team == NULL)
> +  ret = ompd_rc_ok;
> +  }

Like I said on other patches, { would need to be indented by 2 spaces from
for, but as the body contains a single statement, just leave the {}s out
completely and then it can stay as is.

More important, I don't see any function that would initialize
e.g. threads_used, etc., IMNSHO you should start with those and
there write the reading of those from the inferior.
And, unless that routine copies everything from the inferior, which is risky
because it can change there any time, I think the above is not really what
you want, you instead want to read it from the inferior.
The debugged process (if it is a process and not e.g. a core file) is not in
the same address space as the debugger (that loads the OMPD library), so
even if you get pointers copied from the debugged process, you can't
dereference them, but need to use the callbacks to read the corresponding
memory.

Jakub



Re: [PATCH] libgomp: Fix hang when profiling OpenACC programs with CUDA 9.0 nvprof

2020-07-14 Thread Thomas Schwinge
Hi Kwok!

On 2020-07-13T16:29:14+0100, Kwok Cheung Yeung  wrote:
> When the version of nvprof in CUDA 9.0 is run on an OpenACC program, [...] the
> program deadlocks.

> I have added a testcase that sets up the situation presented by nvprof.

Thanks.  I have extended this one a little bit, to add some state
tracking to verify that we get the expected callbacks invoked, test what
we expect returned from 'acc_get_device_type', and in addition to your
'acc_ev_device_init_start' also verify the corresponding
'acc_ev_device_init_end'.

I've also updated the documentation.

> This
> testcase hangs without the patch (hence the short dg-timeout), and passes with
> it.

(Thus, 'dg-timeout' not really necessary anymore, but OK to leave in if
you'd like.)

> Okay for master, GCC 10 branch and OG10?

Thanks, OK, with the incremental patch merged in, unless there's anything
to discuss further.

> libgomp: Fix hang when profiling OpenACC programs with CUDA 9.0 nvprof
>
> The version of nvprof in CUDA 9.0 causes a hang when used to profile an
> OpenACC program.  This is because it calls acc_get_device_type from
> a callback called during device initialization, which then attempts
> to acquire acc_device_lock while it is already taken, resulting in
> deadlock.  This works around the issue by returning acc_device_none
> from acc_get_device_type without attempting to acquire the lock when
> initialization has not completed yet.
>
> 2020-07-13  Tom de Vries  

Should use Tom's CodeSourcery address, given that's when this work was
done.

>   Cesar Philippidis  
>   Thomas Schwinge  
>   Kwok Cheung Yeung  


Grüße
 Thomas


>   libgomp/
>   * oacc-init.c (acc_init_state_lock, acc_init_state, acc_init_thread):
>   New variable.
>   (acc_init_1): Set acc_init_thread to pthread_self ().  Set
>   acc_init_state to initializing at the start, and to initialized at the
>   end.
>   (self_initializing_p): New function.
>   (acc_get_device_type): Return acc_device_none if called by thread that
>   is currently executing acc_init_1.
>   * testsuite/libgomp.oacc-c-c++-common/acc_prof-cb-call.c: New.
>
> diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
> index 5d786a5..1e7f934 100644
> --- a/libgomp/oacc-init.c
> +++ b/libgomp/oacc-init.c
> @@ -40,6 +40,11 @@
>
>  static gomp_mutex_t acc_device_lock;
>
> +static gomp_mutex_t acc_init_state_lock;
> +static enum { uninitialized, initializing, initialized } acc_init_state
> +  = uninitialized;
> +static pthread_t acc_init_thread;
> +
>  /* A cached version of the dispatcher for the global "current" accelerator 
> type,
> e.g. used as the default when creating new host threads.  This is the
> device-type equivalent of goacc_device_num (which specifies which device 
> to
> @@ -228,6 +233,11 @@ acc_dev_num_out_of_range (acc_device_t d, int ord, int 
> ndevs)
>  static struct gomp_device_descr *
>  acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit)
>  {
> +  gomp_mutex_lock (&acc_init_state_lock);
> +  acc_init_state = initializing;
> +  acc_init_thread = pthread_self ();
> +  gomp_mutex_unlock (&acc_init_state_lock);
> +
>bool check_not_nested_p;
>if (implicit)
>  {
> @@ -317,6 +327,14 @@ acc_init_1 (acc_device_t d, acc_construct_t 
> parent_construct, int implicit)
>   &api_info);
>  }
>
> +  /* We're setting 'initialized' *after* 'goacc_profiling_dispatch', so that 
> a
> + nested 'acc_get_device_type' called from a profiling callback still sees
> + 'initializing', so that we don't deadlock when it then again tries to 
> lock
> + 'goacc_prof_lock'.  See also the discussion in 'acc_get_device_type'.  
> */
> +  gomp_mutex_lock (&acc_init_state_lock);
> +  acc_init_state = initialized;
> +  gomp_mutex_unlock (&acc_init_state_lock);
> +
>return base_dev;
>  }
>
> @@ -643,6 +661,17 @@ acc_set_device_type (acc_device_t d)
>
>  ialias (acc_set_device_type)
>
> +static bool
> +self_initializing_p (void)
> +{
> +  bool res;
> +  gomp_mutex_lock (&acc_init_state_lock);
> +  res = (acc_init_state == initializing
> +  && pthread_equal (acc_init_thread, pthread_self ()));
> +  gomp_mutex_unlock (&acc_init_state_lock);
> +  return res;
> +}
> +
>  acc_device_t
>  acc_get_device_type (void)
>  {
> @@ -652,6 +681,15 @@ acc_get_device_type (void)
>
>if (thr && thr->base_dev)
>  res = acc_device_type (thr->base_dev->type);
> +  else if (self_initializing_p ())
> +/* The Cuda libaccinj64.so version 9.0+ calls acc_get_device_type during 
> the
> +   acc_ev_device_init_start event callback, which is dispatched during
> +   acc_init_1.  Trying to lock acc_device_lock during such a call (as we 
> do
> +   in the else clause below), will result in deadlock, since the lock has
> +   already been taken by the acc_init_1 caller.  We work around this 
> problem
> +   by using the acc_get

Re: [PATCH 8/9] [OpenACC] Fix standalone attach for Fortran assumed-shape array pointers

2020-07-14 Thread Thomas Schwinge
Hi Julian, Tobias!

On 2020-06-16T15:39:44-0700, Julian Brown  wrote:
> As mentioned in the blurb for the previous patch, an "attach" operation
> for a Fortran pointer with an array descriptor must copy that array
> descriptor to the target.

Heh, I see -- I don't think I had read the OpenACC standard in that way,
but I think I agree your interpretation is fine.

This does not create some sort of memory leak -- everything implicitly
allocated there will eventually be deallocated again, right?

> This patch arranges for that to be so.

In response to the new OpenACC/Fortran testcase that I'd submtited in
<87wo3co0tm.fsf@euler.schwinge.homeip.net">http://mid.mail-archive.com/87wo3co0tm.fsf@euler.schwinge.homeip.net>,
you (Julian) correctly supposed in
, that
this patch indeed does resolve that testcase, too.  That wasn't obvious
to me.  So, similar to
'libgomp/testsuite/libgomp.oacc-c-c++-common/pr95270-{1.2}.c', please
include my new OpenACC/Fortran testcase (if that makes sense to you), and
reference PR95270 in the commit log.

> OK?

Basically yes (for master and releases/gcc-10 branches), but please
consider the following:

> --- a/gcc/fortran/trans-openmp.c
> +++ b/gcc/fortran/trans-openmp.c
> @@ -2573,8 +2573,44 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
> gfc_omp_clauses *clauses,
>   }
>   }
> if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))
> -   && n->u.map_op != OMP_MAP_ATTACH
> -   && n->u.map_op != OMP_MAP_DETACH)
> +   && (n->u.map_op == OMP_MAP_ATTACH
> +   || n->u.map_op == OMP_MAP_DETACH))
> + {
> +   tree type = TREE_TYPE (decl);
> +   tree data = gfc_conv_descriptor_data_get (decl);
> +   if (present)
> + data = gfc_build_cond_assign_expr (block, present,
> +data,
> +null_pointer_node);
> +   tree ptr
> + = fold_convert (build_pointer_type (char_type_node),
> + data);
> +   ptr = build_fold_indirect_ref (ptr);
> +   /* Standalone attach clauses used with arrays with
> +  descriptors must copy the descriptor to the target,
> +  else they won't have anything to perform the
> +  attachment onto (see OpenACC 2.6, "2.6.3. Data
> +  Structures with Pointers").  */
> +   OMP_CLAUSE_DECL (node) = ptr;
> +   node2 = build_omp_clause (input_location, OMP_CLAUSE_MAP);
> +   OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_TO_PSET);
> +   OMP_CLAUSE_DECL (node2) = decl;
> +   OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type);
> +   node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP);
> +   if (n->u.map_op == OMP_MAP_ATTACH)
> + {
> +   OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_ATTACH);
> +   n->u.map_op = OMP_MAP_ALLOC;
> + }
> +   else  /* OMP_MAP_DETACH.  */
> + {
> +   OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_DETACH);
> +   n->u.map_op = OMP_MAP_RELEASE;
> + }
> +   OMP_CLAUSE_DECL (node3) = data;
> +   OMP_CLAUSE_SIZE (node3) = size_int (0);
> + }

So this ("case A") duplicates most of the code from...

> +   else if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl)))
>   {
> [...]

... this existing case here ("case B").  It's not clear to me if these
two cases really still need to be handled separately, and a little bit
differently (regarding 'if (present)' handling, for example), or if they
could/should (?) be merged?  Tobias, do you have an opinion?

Do we have sufficient testsuite coverage?  (For example,
'attach'/'detach' with 'present == false', if that makes sense, or any
other thing that case A is doing differently from case B?)  Shouldn't
this get '-fdump-tree-original' and/or '-fdump-tree-gimple' testcases,
similar to 'gfortran.dg/goacc/finalize-1.f', so that we verify/document
what we generate here?


> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-fortran/attach-descriptor-1.f90
> @@ -0,0 +1,51 @@
> +program att

Please add 'dg-do run', and...

> +  use openacc
> +  implicit none
> +  type t
> +integer :: arr1(10)
> +integer, allocatable :: arr2(:)
> +  end type t
> +  integer :: i
> +  type(t) :: myvar
> +  integer, target :: tarr(10)
> +  integer, pointer :: myptr(:)
> +
> +  allocate(myvar%arr2(10))
> +
> +  do i=1,10
> +myvar%arr1(i) = 0
> +   

Re: [PATCH] builtins: Avoid useless char/short -> int promotions before atomics [PR96176]

2020-07-14 Thread Richard Biener
On Tue, 14 Jul 2020, Jakub Jelinek wrote:

> Hi!
> 
> As mentioned in the PR, we generate a useless movzbl insn before lock cmpxchg.
> The problem is that the builtin for the char/short cases has the arguments
> promoted to int and combine gives up, because the instructions have
> MEM_VOLATILE_P arguments and recog in that case doesn't recognize anything
> when volatile_ok is false, and nothing afterwards optimizes the
> (reg:SI a) = (zero_extend:SI (reg:QI a))
> ... (subreg:QI (reg:SI a) 0) ...

So the above isn't fixable?  Because it would probably be the more
generic fix, right?

> The following patch fixes it at expansion time, we already have a function
> that is meant to undo the promotion, so this just adds the very common case
> to that.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2020-07-14  Jakub Jelinek  
> 
>   PR target/96176
>   * builtins.c: Include gimple-ssa.h, tree-ssa-live.h and
>   tree-outof-ssa.h.
>   (expand_expr_force_mode): If exp is a SSA_NAME with different mode
>   from MODE and get_gimple_for_ssa_name is a cast from MODE, use the
>   cast's rhs.
> 
>   * gcc.target/i386/pr96176.c: New test.
> 
> --- gcc/builtins.c.jj 2020-06-22 10:59:15.0 +0200
> +++ gcc/builtins.c2020-07-13 12:37:56.519653940 +0200
> @@ -73,6 +73,9 @@ along with GCC; see the file COPYING3.
>  #include "gomp-constants.h"
>  #include "omp-general.h"
>  #include "tree-dfa.h"
> +#include "gimple-ssa.h"
> +#include "tree-ssa-live.h"
> +#include "tree-outof-ssa.h"
>  
>  struct target_builtins default_target_builtins;
>  #if SWITCHABLE_TARGET
> @@ -6671,6 +6674,21 @@ expand_expr_force_mode (tree exp, machin
>rtx val;
>machine_mode old_mode;
>  
> +  if (TREE_CODE (exp) == SSA_NAME
> +  && TYPE_MODE (TREE_TYPE (exp)) != mode)
> +{
> +  /* Undo argument promotion if possible, as combine might not
> +  be able to do it later due to MEM_VOLATILE_P uses in the
> +  patterns.  */
> +  gimple *g = get_gimple_for_ssa_name (exp);
> +  if (g && gimple_assign_cast_p (g))
> + {
> +   tree rhs = gimple_assign_rhs1 (g);
> +   if (TYPE_MODE (TREE_TYPE (rhs)) == mode)

Is this really enough to check?  What if the cast was truncating?
The gimple_assign_cast_p predicate also allows for FIX_TRUNC_EXPR
and VIEW_CONVERT_EXPR where for the latter the rhs is the 
VIEW_CONVERT_EXPR itself.

Richard.

> + exp = rhs;
> + }
> +}
> +
>val = expand_expr (exp, NULL_RTX, mode, EXPAND_NORMAL);
>/* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
>   of CONST_INTs, where we know the old_mode only from the call argument.  
> */
> --- gcc/testsuite/gcc.target/i386/pr96176.c.jj2020-07-13 
> 12:44:15.940149701 +0200
> +++ gcc/testsuite/gcc.target/i386/pr96176.c   2020-07-13 12:45:07.822396980 
> +0200
> @@ -0,0 +1,13 @@
> +/* PR target/96176 */
> +/* { dg-do compile { target lp64 } } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler-not "\tmovzbl\t" } } */
> +
> +unsigned char v;
> +
> +void
> +foo (unsigned char *x, unsigned char y, unsigned char z)
> +{
> +  __atomic_compare_exchange_n (x, &y, z, 0, __ATOMIC_RELAXED, 
> __ATOMIC_RELAXED);
> +  v = y;
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


[PATCH] libsanitizer: Fix GetPcSpBp determination of SP on 32-bit Solaris/x86

2020-07-14 Thread Rainer Orth
The latest Solaris 11.4/x86 update uncovered a libsanitizer bug that
caused one test to FAIL for 32-bit:

+FAIL: c-c++-common/asan/null-deref-1.c   -O0  output pattern test
+FAIL: c-c++-common/asan/null-deref-1.c   -O1  output pattern test
+FAIL: c-c++-common/asan/null-deref-1.c   -O2  output pattern test
+FAIL: c-c++-common/asan/null-deref-1.c   -O2 -flto  output pattern test
+FAIL: c-c++-common/asan/null-deref-1.c   -O2 -flto -flto-partition=none  
output pattern test
+FAIL: c-c++-common/asan/null-deref-1.c   -O3 -g  output pattern test
+FAIL: c-c++-common/asan/null-deref-1.c   -Os  output pattern test

I've identified the problem and the fix has just landed in upstream
llvm-project:

https://reviews.llvm.org/D83664

Tested on i386-pc-solaris2.11 on master, gcc-10 and gcc-9 branches.

Ok for all three?

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2020-07-13  Rainer Orth  

* sanitizer_common/sanitizer_linux.cpp: Cherry-pick llvm-project
revision f0e9b76c3500496f8f3ea7abe6f4bf801e3b41e7.

# HG changeset patch
# Parent  829b9cc5641b7899449adf14549fd4052aeea596
libsanitizer: Fix GetPcSpBp determination of SP on 32-bit Solaris/x86

diff --git a/libsanitizer/sanitizer_common/sanitizer_linux.cpp b/libsanitizer/sanitizer_common/sanitizer_linux.cpp
--- a/libsanitizer/sanitizer_common/sanitizer_linux.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_linux.cpp
@@ -2043,13 +2043,13 @@ static void GetPcSpBp(void *context, upt
 # ifndef REG_EBP
 #  define REG_EBP  6 // REG_FP
 # endif
-# ifndef REG_ESP
-#  define REG_ESP 17 // REG_SP
+# ifndef REG_UESP
+#  define REG_UESP 17 // REG_SP
 # endif
 # endif
   *pc = ucontext->uc_mcontext.gregs[REG_EIP];
   *bp = ucontext->uc_mcontext.gregs[REG_EBP];
-  *sp = ucontext->uc_mcontext.gregs[REG_ESP];
+  *sp = ucontext->uc_mcontext.gregs[REG_UESP];
 # endif
 #elif defined(__powerpc__) || defined(__powerpc64__)
   ucontext_t *ucontext = (ucontext_t*)context;


Re: [PATCH][RFC] vector creation from two parts of two vectors produces TBL rather than ins (PR93720)

2020-07-14 Thread Dmitrij Pochepko
Hi,

please take a look at updated patch with all comments addressed (attached).

Thanks,
Dmitrij

On Sat, Jul 11, 2020 at 09:52:40AM +0100, Richard Sandiford wrote:
...
> 
> For this point, I meant that we should remove the first loop too.  I.e.:
>
... 
> 
> is now redundant with the later:
>
... 
> 
> However, a more canonical way to write the condition above is:
> 
>   for (unsigned HOST_WIDE_INT i = 0; i < nelt; i++)
> {
>   HOST_WIDE_INT elt;
>   if (!elt.is_constant (&elt))
> return false;
>   if (elt == (HOST_WIDE_INT) i)
> continue;
> 

done

> Very minor, but the coding conventions don't put a space before “++”.
> So:
> 
> > +  for (unsigned HOST_WIDE_INT i = 0; i < nelt; i ++)
> 
> …this should be “i++” too.

done
>From 9acc14f4cdd10091daa5311f495daacfebdcfc3d Mon Sep 17 00:00:00 2001
From: Dmitrij Pochepko 
Date: Tue, 14 Jul 2020 15:48:46 +0300
Subject: [PATCH] vector creation from two parts of two vectors produces TBL
 rather than ins (PR 93720)

The following patch enables vector permutations optimization by trying to use ins instruction instead of slow and generic tbl.

example:

vector float f0(vector float a, vector float b)
{
  return __builtin_shuffle (a, a, (vector int){3, 1, 2, 3});
}

was compiled into:
...
	adrpx0, .LC0
	ldr q1, [x0, #:lo12:.LC0]
	tbl v0.16b, {v0.16b}, v1.16b
...

and after patch:
...
	ins v0.s[0], v0.s[3]
...

bootstrapped and tested on aarch64-linux-gnu with no regressions

gcc/ChangeLog:

2020-07-14 Andrew Pinski   

	PR gcc/93720

	* gcc/config/aarch64/aarch64.c (aarch64_evpc_ins): New function
	* gcc/config/aarch64/aarch64-simd.md (aarch64_simd_vec_copy_lane): changed name prefix

gcc/testsuite/ChangeLog:

2020-07-14  Andrew Pinski   

	PR gcc/93720

	* gcc/testsuite/gcc.target/aarch64/vins-1.c: New test
	* gcc/testsuite/gcc.target/aarch64/vins-2.c: New test
	* gcc/testsuite/gcc.target/aarch64/vins-3.c: New test

Co-Authored-By: Dmitrij Pochepko
---
 gcc/config/aarch64/aarch64-simd.md|  2 +-
 gcc/config/aarch64/aarch64.c  | 77 +++
 gcc/testsuite/gcc.target/aarch64/vins-1.c | 23 +
 gcc/testsuite/gcc.target/aarch64/vins-2.c | 23 +
 gcc/testsuite/gcc.target/aarch64/vins-3.c | 23 +
 5 files changed, 147 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vins-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vins-2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vins-3.c

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9f0e2bd..11ebf5b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -958,7 +958,7 @@
   [(set_attr "type" "neon_ins, neon_from_gp, neon_load1_one_lane")]
 )
 
-(define_insn "*aarch64_simd_vec_copy_lane"
+(define_insn "@aarch64_simd_vec_copy_lane"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 	(vec_merge:VALL_F16
 	(vec_duplicate:VALL_F16
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e259d05..f1c5b5a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -20594,6 +20594,81 @@ aarch64_evpc_sel (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* Recognize patterns suitable for the INS instructions.  */
+static bool
+aarch64_evpc_ins (struct expand_vec_perm_d *d)
+{
+  machine_mode mode = d->vmode;
+  unsigned HOST_WIDE_INT nelt;
+
+  if (d->vec_flags != VEC_ADVSIMD)
+return false;
+
+  /* to_constant is safe since this routine is specific to Advanced SIMD
+ vectors.  */
+  nelt = d->perm.length ().to_constant ();
+  rtx insv = d->op0;
+
+  HOST_WIDE_INT idx = -1;
+
+  for (unsigned HOST_WIDE_INT i = 0; i < nelt; i++)
+{
+  HOST_WIDE_INT elt;
+  if (!d->perm[i].is_constant (&elt))
+	return false;
+  if (elt == (HOST_WIDE_INT) i)
+	continue;
+  if (idx != -1)
+	{
+	  idx = -1;
+	  break;
+	}
+  idx = i;
+}
+
+  if (idx == -1)
+{
+  insv = d->op1;
+  for (unsigned HOST_WIDE_INT i = 0; i < nelt; i++)
+	{
+	  if (d->perm[i].to_constant () == (HOST_WIDE_INT) (i + nelt))
+	continue;
+	  if (idx != -1)
+	return false;
+	  idx = i;
+	}
+
+  if (idx == -1)
+	return false;
+}
+
+  if (d->testing_p)
+return true;
+
+  gcc_assert (idx != -1);
+
+  unsigned extractindex = d->perm[idx].to_constant ();
+  rtx extractv = d->op0;
+  if (extractindex >= nelt)
+{
+  extractv = d->op1;
+  extractindex -= nelt;
+}
+  gcc_assert (extractindex < nelt);
+
+  emit_move_insn (d->target, insv);
+  insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
+  expand_operand ops[5];
+  create_output_operand (&ops[0], d->target, mode);
+  create_input_operand (&ops[1], d->target, mode);
+  create_integer_operand (&ops[2], 1 << idx);
+  create_input_operand (&ops[3], extractv, mode);
+  create_integer_operand (&ops[4], extractindex);
+  expand_insn (icode, 5, 

Re: [PATCH] libsanitizer: Fix GetPcSpBp determination of SP on 32-bit Solaris/x86

2020-07-14 Thread Jakub Jelinek via Gcc-patches
On Tue, Jul 14, 2020 at 02:32:57PM +0200, Rainer Orth wrote:
> The latest Solaris 11.4/x86 update uncovered a libsanitizer bug that
> caused one test to FAIL for 32-bit:
> 
> +FAIL: c-c++-common/asan/null-deref-1.c   -O0  output pattern test
> +FAIL: c-c++-common/asan/null-deref-1.c   -O1  output pattern test
> +FAIL: c-c++-common/asan/null-deref-1.c   -O2  output pattern test
> +FAIL: c-c++-common/asan/null-deref-1.c   -O2 -flto  output pattern test
> +FAIL: c-c++-common/asan/null-deref-1.c   -O2 -flto -flto-partition=none  
> output pattern test
> +FAIL: c-c++-common/asan/null-deref-1.c   -O3 -g  output pattern test
> +FAIL: c-c++-common/asan/null-deref-1.c   -Os  output pattern test
> 
> I've identified the problem and the fix has just landed in upstream
> llvm-project:
> 
>   https://reviews.llvm.org/D83664
> 
> Tested on i386-pc-solaris2.11 on master, gcc-10 and gcc-9 branches.
> 
> Ok for all three?

Won't this break i386-linux?
I mean, while the ifndef/define change is guarded by #if SANITIZER_SOLARIS,
the last changed line is not.  I'm afraid I don't know if
ucontext->uc_mcontext.gregs[REG_UESP] or ucontext->uc_mcontext.gregs[REG_ESP];
is what we want to use on i686-linux...
Similarly netbsd.

> --- a/libsanitizer/sanitizer_common/sanitizer_linux.cpp
> +++ b/libsanitizer/sanitizer_common/sanitizer_linux.cpp
> @@ -2043,13 +2043,13 @@ static void GetPcSpBp(void *context, upt
>  # ifndef REG_EBP
>  #  define REG_EBP  6 // REG_FP
>  # endif
> -# ifndef REG_ESP
> -#  define REG_ESP 17 // REG_SP
> +# ifndef REG_UESP
> +#  define REG_UESP 17 // REG_SP
>  # endif
>  # endif
>*pc = ucontext->uc_mcontext.gregs[REG_EIP];
>*bp = ucontext->uc_mcontext.gregs[REG_EBP];
> -  *sp = ucontext->uc_mcontext.gregs[REG_ESP];
> +  *sp = ucontext->uc_mcontext.gregs[REG_UESP];
>  # endif
>  #elif defined(__powerpc__) || defined(__powerpc64__)
>ucontext_t *ucontext = (ucontext_t*)context;


Jakub



Re: [PATCH] sparc/sparc64: use PIE_SPEC to select crtendS.o [PR96190]

2020-07-14 Thread Eric Botcazou
> In --enable-default-pie mode compiler should switch from
> using crtend.o to crtendS.o. On sparc it is especially visible
> because crtend.o contains PIC-unfriendly code.

Let's use GNU_USER_TARGET_ENDFILE_SPEC in ENDFILE_SPEC since we presumably use 
GNU_USER_TARGET_STARTFILE_SPEC for STARTFILE_SPEC from config/gnu-user.h.

-- 
Eric Botcazou


Re: [PATCH] c++: Improve checking of decls with trailing return type [PR95820]

2020-07-14 Thread Jason Merrill via Gcc-patches

On 6/24/20 7:27 PM, Marek Polacek wrote:

This is an ICE-on-invalid but I've been seeing it when reducing
various testcases, so it's more important for me than usually.

splice_late_return_type now checks that if we've seen a late return
type, the function return type was auto.  That's a fair assumption
but grokdeclarator/cdk_function wasn't giving errors for function
pointers and similar.  So we want to perform various checks not only
when funcdecl_p || inner_declarator == NULL.  But only give the
!late_return_type errors when funcdecl_p, to accept e.g.

auto (*fp)() = f;

in C++11.  Here's a diff -w to ease the review:

--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -12102,14 +12102,9 @@ grokdeclarator (const cp_declarator *declarator,

/* Handle a late-specified return type.  */
tree late_return_type = declarator->u.function.late_return_type;
-   if (funcdecl_p
-   /* This is the case e.g. for
-  using T = auto () -> int.  */
-   || inner_declarator == NULL)
- {
if (tree auto_node = type_uses_auto (type))
  {
-   if (!late_return_type)
+   if (!late_return_type && funcdecl_p)
  {
if (current_class_type
&& LAMBDA_TYPE_P (current_class_type))
@@ -12201,7 +12196,6 @@ grokdeclarator (const cp_declarator *declarator,
"type specifier", name);
return error_mark_node;
  }
- }
type = splice_late_return_type (type, late_return_type);
if (type == error_mark_node)
  return error_mark_node;

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK.


gcc/cp/ChangeLog:

PR c++/95820
* decl.c (grokdeclarator) : Check also
pointers/references/... to functions.

gcc/testsuite/ChangeLog:

PR c++/95820
* g++.dg/cpp1y/auto-fn58.C: New test.
---
  gcc/cp/decl.c  | 166 -
  gcc/testsuite/g++.dg/cpp1y/auto-fn58.C |  13 ++
  2 files changed, 93 insertions(+), 86 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp1y/auto-fn58.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 3afad5ca805..a9ec328c498 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -12102,106 +12102,100 @@ grokdeclarator (const cp_declarator *declarator,
  
  	/* Handle a late-specified return type.  */

tree late_return_type = declarator->u.function.late_return_type;
-   if (funcdecl_p
-   /* This is the case e.g. for
-  using T = auto () -> int.  */
-   || inner_declarator == NULL)
+   if (tree auto_node = type_uses_auto (type))
  {
-   if (tree auto_node = type_uses_auto (type))
+   if (!late_return_type && funcdecl_p)
  {
-   if (!late_return_type)
+   if (current_class_type
+   && LAMBDA_TYPE_P (current_class_type))
+ /* OK for C++11 lambdas.  */;
+   else if (cxx_dialect < cxx14)
  {
-   if (current_class_type
-   && LAMBDA_TYPE_P (current_class_type))
- /* OK for C++11 lambdas.  */;
-   else if (cxx_dialect < cxx14)
- {
-   error_at (typespec_loc, "%qs function uses "
- "% type specifier without "
- "trailing return type", name);
-   inform (typespec_loc,
-   "deduced return type only available "
-   "with %<-std=c++14%> or %<-std=gnu++14%>");
- }
-   else if (virtualp)
- {
-   error_at (typespec_loc, "virtual function "
- "cannot have deduced return type");
-   virtualp = false;
- }
+   error_at (typespec_loc, "%qs function uses "
+ "% type specifier without "
+ "trailing return type", name);
+   inform (typespec_loc,
+   "deduced return type only available "
+   "with %<-std=c++14%> or %<-std=gnu++14%>");
  }
-   else if (!is_auto (type) && sfk != sfk_conversion)
+   else if (virtualp)
  {
-   error_at (typespec_loc, "%qs function with trailing "
- "return type has %qT as its type rather "
- "than plain %", name, type);
-   return error_m

Re: Patch RFA: In C++ demangler don't treat lambda as substitution candidate

2020-07-14 Thread Jason Merrill via Gcc-patches

On 7/10/20 12:33 PM, Ian Lance Taylor wrote:

As discussed in PR 96143, the C++ frontend and the demangler disagree
as to whether a lambda is a substitution candidate.  According to the
ABI, the C++ frontend is behaving correctly.  This patch changes the
demangler accordingly.

This caused the demangling of several existing test cases to change.
These test cases are long and complicated, and I frankly did not try
to understand whether the change is correct or not.  I believe that
the change is correct, and at least these symbols continue to
demangle.

Did a full bootstrap and testsuite run on x86_64-pc-linux-gnu.

OK for mainline?


OK, thanks.


Ian

2020-07-10  Ian Lance Taylor  

PR demangler/96143
* cp-demangle.c (d_lambda): Don't add substitution candidate.
* testsuite/demangle-expected: Update a few existing test cases
accordingly, and add a new test case.





doc: fix a couple of typos

2020-07-14 Thread Nathan Sidwell
gty calls gt_clear*e*_cache not gt_clear_cache.  I know not why it is 
named so, but at least document it correctly.  invoke.texi had a 
duplicate opindex.


gcc/
* doc/gty.texi: Fic gt_cleare_cache name.
* doc/invoke.texi: Remove duplicate opindex Wabi-tag.

pushing to trunk
--
Nathan Sidwell
diff --git i/gcc/doc/gty.texi w/gcc/doc/gty.texi
index 9414d3cc48c..f5c310414dc 100644
--- i/gcc/doc/gty.texi
+++ w/gcc/doc/gty.texi
@@ -250,7 +250,7 @@ for more information.
 @findex cache
 @item cache
 
-When the @code{cache} option is applied to a global variable gt_clear_cache is
+When the @code{cache} option is applied to a global variable gt_cleare_cache is
 called on that variable between the mark and sweep phases of garbage
 collection.  The gt_clear_cache function is free to mark blocks as used, or to
 clear pointers in the variable.
diff --git i/gcc/doc/invoke.texi w/gcc/doc/invoke.texi
index 09bcc5b0f78..e34d03de002 100644
--- i/gcc/doc/invoke.texi
+++ w/gcc/doc/invoke.texi
@@ -3272,7 +3272,6 @@ In addition, these warning options have meanings only for C++ programs:
 @table @gcctabopt
 @item -Wabi-tag @r{(C++ and Objective-C++ only)}
 @opindex Wabi-tag
-@opindex Wabi-tag
 Warn when a type with an ABI tag is used in a context that does not
 have that ABI tag.  See @ref{C++ Attributes} for more information
 about ABI tags.


core: tree-node comments and robustify

2020-07-14 Thread Nathan Sidwell
As we've moved to 64-bit systems, the padding information has become 
conditionally inaccurate.  I also hit cases where invalid tree codes 
did not get flagged as invalid.


gcc/
* tree-core.h (tree_decl_with_vis, tree_function_decl):
Note additional padding on 64-bits
* tree.c (cache_integer_cst): Note why no caching of enum 
literals.

(get_tree_code_name): Robustify error case.

pushing to trunk

--
Nathan Sidwell
diff --git i/gcc/tree-core.h w/gcc/tree-core.h
index 8c5a2e3c404..ba7f9ceb205 100644
--- i/gcc/tree-core.h
+++ w/gcc/tree-core.h
@@ -1826,6 +1826,7 @@ struct GTY(()) tree_decl_with_vis {
  /* Belong to FUNCTION_DECL exclusively.  */
  unsigned regdecl_flag : 1;
  /* 14 unused bits. */
+ /* 32 more unused on 64 bit HW. */
 };
 
 struct GTY(()) tree_var_decl {
@@ -1901,6 +1902,7 @@ struct GTY(()) tree_function_decl {
   unsigned replaceable_operator : 1;
 
   /* 11 bits left for future expansion.  */
+  /* 32 bits on 64-bit HW.  */
 };
 
 struct GTY(()) tree_translation_unit_decl {
diff --git i/gcc/tree.c w/gcc/tree.c
index 3d9968fd7a0..9102f8d4e54 100644
--- i/gcc/tree.c
+++ w/gcc/tree.c
@@ -1771,6 +1771,8 @@ cache_integer_cst (tree t)
   break;
 
 case ENUMERAL_TYPE:
+  /* The slot used by TYPE_CACHED_VALUES is used for the enum
+	 members.  */
   break;
 
 default:
@@ -13254,7 +13256,9 @@ get_tree_code_name (enum tree_code code)
 {
   const char *invalid = "";
 
-  if (code >= MAX_TREE_CODES)
+  /* The tree_code enum promotes to signed, but we could be getting
+ invalid values, so force an unsigned comparison.  */
+  if (unsigned (code) >= MAX_TREE_CODES)
 {
   if (code == 0xa5a5)
 	return "ggc_freed";


core: comment & formatting

2020-07-14 Thread Nathan Sidwell
One of hash_map's getters returns a pointer to T or null, The comment 
said it returned T or null.  Also an unaligned pair of comments.


gcc/
* hash-map.h (hash_map::get): Note it is a pointer to value.
* incpath.h (incpath_kind): Align comments.

pushing to trunk
--
Nathan Sidwell
diff --git i/gcc/hash-map.h w/gcc/hash-map.h
index 5b8fd184e32..6bf3dc64816 100644
--- i/gcc/hash-map.h
+++ w/gcc/hash-map.h
@@ -177,7 +177,8 @@ public:
   return !ins;
 }
 
-  /* if the passed in key is in the map return its value otherwise NULL.  */
+  /* If the passed in key is in the map return pointer to its value
+ otherwise NULL.  */
 
   Value *get (const Key &k)
 {
diff --git i/gcc/incpath.h w/gcc/incpath.h
index fe48194dab7..c647081630c 100644
--- i/gcc/incpath.h
+++ w/gcc/incpath.h
@@ -22,8 +22,8 @@
 enum incpath_kind {
   INC_QUOTE = 0, /* include "foo" */
   INC_BRACKET,   /* include  */
-  INC_SYSTEM,/* sysinclude */
-  INC_AFTER,	/* post-sysinclude.  */
+  INC_SYSTEM,/* sys-include */
+  INC_AFTER,	 /* post-sysinclude  */
   INC_MAX
 };
 


c++: comments & formatting

2020-07-14 Thread Nathan Sidwell

unsurprisingly, most of the reformatting I discovered is in the C++ FE.
I found some bad formatting and misleading or incomplete comments during 
my spelunking around the c++FE.  May as well clean up trunk and 
record what I noted.


gcc/cp/
* cp-tree.h: Correct some tree lang flag comments,
reformat some structure definitions.  Note some structure
sizes.  Clarify some comments.
(yyungetc): Delete.  Not been a thing for some time.
* decl.c:  Fix some formatting & whitespace issues.
(function_requirements_equivalent_p): Note why
substitutions are needed.
* decl2.c (no_linkage_error): Note that heroics about
'typedef struct { ... };' are no longer needed.
* method.c: Whitespace.
* name-lookup.c: Whitespace.
(add_decl_to_level): Reformat a line.
(print_binding_stack): Mark as DEBUG_FUNCTION.
(has_using_namespace_std_directive_p): Delete comment.
* ptree.c: Whitespace.
* rtti.c: Whitespace & comment.
* tree.c: Comment.
* typeck.c (structural_comptypes): Add comment.

pushing to trunk
--
Nathan Sidwell
diff --git i/gcc/cp/class.c w/gcc/cp/class.c
index 14380c7a08c..c49055d384e 100644
--- i/gcc/cp/class.c
+++ w/gcc/cp/class.c
@@ -4708,6 +4708,10 @@ check_methods (tree t)
 }
 }
 
+/* FN is constructor, destructor or operator function.  Clone the
+   declaration to create a NAME'd variant.  NEED_VTT_PARM_P and
+   OMIT_INHERITED_PARMS_P are relevant if it's a cdtor.  */
+
 static tree
 copy_fndecl_with_name (tree fn, tree name, tree_code code,
 		   bool need_vtt_parm_p, bool omit_inherited_parms_p)
@@ -6091,10 +6095,8 @@ check_bases_and_members (tree t)
   }
 
   if (LAMBDA_TYPE_P (t))
-{
-  /* "This class type is not an aggregate."  */
-  CLASSTYPE_NON_AGGREGATE (t) = 1;
-}
+/* "This class type is not an aggregate."  */
+CLASSTYPE_NON_AGGREGATE (t) = 1;
 
   /* Compute the 'literal type' property before we
  do anything with non-static member functions.  */
@@ -6717,6 +6719,8 @@ layout_class_type (tree t, tree *virtuals_p)
 	 indicates the total number of bits used.  Therefore,
 	 rli_size_so_far, rather than rli_size_unit_so_far, is
 	 used to compute TYPE_SIZE_UNIT.  */
+
+  /* Set the size and alignment for the new type.  */
   tree eoc = end_of_class (t, /*include_virtuals_p=*/0);
   TYPE_SIZE_UNIT (base_t)
 	= size_binop (MAX_EXPR,
diff --git i/gcc/cp/cp-tree.def w/gcc/cp/cp-tree.def
index 99851eb780f..31be2cf41a3 100644
--- i/gcc/cp/cp-tree.def
+++ w/gcc/cp/cp-tree.def
@@ -194,7 +194,9 @@ DEFTREECODE (BOUND_TEMPLATE_TEMPLATE_PARM, "bound_template_template_parm",
 
 /* For template template argument of the form `T::template C'.
TYPE_CONTEXT is `T', the template parameter dependent object.
-   TYPE_NAME is an IDENTIFIER_NODE for `C', the member class template.  */
+   TYPE_NAME is a TEMPLATE_DECL, whose DECL_TEMPLATE_PARMS are any
+   template parms of the instantiation.  That decl's DECL_NAME is the
+   IDENTIFIER_NODE for `C', the member class template.  */
 DEFTREECODE (UNBOUND_CLASS_TEMPLATE, "unbound_class_template", tcc_type, 0)
 
 /* A using declaration.  USING_DECL_SCOPE contains the specified
diff --git i/gcc/cp/cp-tree.h w/gcc/cp/cp-tree.h
index 2aa8ebe64c0..a599f3bad1c 100644
--- i/gcc/cp/cp-tree.h
+++ w/gcc/cp/cp-tree.h
@@ -1,4 +1,4 @@
-/* Definitions for C++ parsing and type checking.
+/* Definitions for -*- C++ -*- parsing and type checking.
Copyright (C) 1987-2020 Free Software Foundation, Inc.
Contributed by Michael Tiemann (tiem...@cygnus.com)
 
@@ -391,7 +391,7 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX];
   CLEANUP_P (in TRY_BLOCK)
   AGGR_INIT_VIA_CTOR_P (in AGGR_INIT_EXPR)
   PTRMEM_OK_P (in ADDR_EXPR, OFFSET_REF, SCOPE_REF)
-  PAREN_STRING_LITERAL (in STRING_CST)
+  PAREN_STRING_LITERAL_P (in STRING_CST)
   CP_DECL_THREAD_LOCAL_P (in VAR_DECL)
   KOENIG_LOOKUP_P (in CALL_EXPR)
   STATEMENT_LIST_NO_SCOPE (in STATEMENT_LIST).
@@ -486,7 +486,7 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX];
   CALL_EXPR_REVERSE_ARGS (in CALL_EXPR, AGGR_INIT_EXPR)
   CONSTRUCTOR_PLACEHOLDER_BOUNDARY (in CONSTRUCTOR)
6: TYPE_MARKED_P (in _TYPE)
-  DECL_NON_TRIVIALLY_INITIALIZED_P (in VAR_DECL)
+  DECL_NONTRIVIALLY_INITIALIZED_P (in VAR_DECL)
   RANGE_FOR_IVDEP (in RANGE_FOR_STMT)
   CALL_EXPR_OPERATOR_SYNTAX (in CALL_EXPR, AGGR_INIT_EXPR)
   CONSTRUCTOR_IS_DESIGNATED_INIT (in CONSTRUCTOR)
@@ -515,7 +515,7 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX];
   DECL_VLA_CAPTURE_P (in FIELD_DECL)
   DECL_ARRAY_PARAMETER_P (in PARM_DECL)
   LABEL_DECL_CONTINUE (in LABEL_DECL)
-   2: DECL_THIS_EXTERN (in VAR_DECL or FUNCTION_DECL).
+   2: DECL_THIS_EXTERN (in VAR_DECL, FUNCTION_DECL or PARM_DECL)
   DECL_IMPLICIT_TYPEDEF_P (in a TYPE_DECL)
  

Re: [PATCH] builtins: Avoid useless char/short -> int promotions before atomics [PR96176]

2020-07-14 Thread Jakub Jelinek via Gcc-patches
On Tue, Jul 14, 2020 at 02:33:41PM +0200, Richard Biener wrote:
> > As mentioned in the PR, we generate a useless movzbl insn before lock 
> > cmpxchg.
> > The problem is that the builtin for the char/short cases has the arguments
> > promoted to int and combine gives up, because the instructions have
> > MEM_VOLATILE_P arguments and recog in that case doesn't recognize anything
> > when volatile_ok is false, and nothing afterwards optimizes the
> > (reg:SI a) = (zero_extend:SI (reg:QI a))
> > ... (subreg:QI (reg:SI a) 0) ...
> 
> So the above isn't fixable?  Because it would probably be the more
> generic fix, right?

I'm afraid it is not, CCing Segher on that.  The question is how to
differentiate between "combine didn't do anything dangerous to this
MEM_VOLATILE_P and just kept it in the pattern as is" vs.
"combine changed it in a dangerous way unsuitable for volatile accesses".
Even if we wanted to make just the case where i3 is the only insn
that originally contains MEM_VOLATILE_P and checked that the MEM stayed as
is, there is the difficulty that we change the insns in place, so remembering
how it looked before is hard.  And then before trying to recognize it we'd
carefully need to check that nothing else is volatile before enabling
temporarily volatile_ok.

> > +  if (TREE_CODE (exp) == SSA_NAME
> > +  && TYPE_MODE (TREE_TYPE (exp)) != mode)
> > +{
> > +  /* Undo argument promotion if possible, as combine might not
> > +be able to do it later due to MEM_VOLATILE_P uses in the
> > +patterns.  */
> > +  gimple *g = get_gimple_for_ssa_name (exp);
> > +  if (g && gimple_assign_cast_p (g))
> > +   {
> > + tree rhs = gimple_assign_rhs1 (g);
> > + if (TYPE_MODE (TREE_TYPE (rhs)) == mode)
> 
> Is this really enough to check?  What if the cast was truncating?
> The gimple_assign_cast_p predicate also allows for FIX_TRUNC_EXPR
> and VIEW_CONVERT_EXPR where for the latter the rhs is the 
> VIEW_CONVERT_EXPR itself.

I don't know how could those happen.
mode is the result of get_builtin_sync_mode or
int_mode_for_size (BITS_PER_UNIT * size, 0).require () (the former is
int_mode_for_size (...).require ()) and thus an integral mode.
The checks verify that the SSA_NAME doesn't have that mode, but rhs1
of the statement does have the right mode.
As mode is integral, I think that rules out FIX_TRUNC_EXPR
which would need real mode on the argument, and for VCE, if VCE is
in the operand itself, then it ought to have the same mode as the lhs.
I believe the only possibility where exp doesn't have already the desired
mode is the char/short to int promotion, otherwise all the builtins are
prototyped and thus one shouldn't have some unrelated mode.

But if you want some extra checks just to be sure, I can add them, whether
it means checking only for CONVERT_EXPR_CODE_P instead of all 4 rhs codes,
and/or checking that both types are integral scalar and the conversion is an
extension (by comparing type precisions).

Jakub



[PATCH] expr: Unbreak build of mesa [PR96194]

2020-07-14 Thread Jakub Jelinek via Gcc-patches
Hi!

On Tue, Jul 07, 2020 at 09:04:40AM +0200, Richard Biener via Gcc-patches wrote:
> > The store to the whole of each volatile object was picked apart
> > like there had been an individual assignment to each of the
> > fields.  Reads were added as part of that; see PR for details.
> > The reads from volatile memory were a clear bug; individual
> > stores questionable.  A separate patch clarifies the docs.
> >
> > Tested x86_64-linux, powerpc64le-linux and cris-elf.
> > Ok to commit?  Backport to gcc-10?
> 
> OK for both.

This breaks building of mesa on both the trunk and 10 branch.

The problem is that the middle-end may never create temporaries of non-POD
(TREE_ADDRESSABLE) types, those can be only created when the language says
so and thus only the FE is allowed to create those.

This patch just reverts the behavior to what we used to do before for the
stores to volatile non-PODs.  Perhaps we want to do something else, but
definitely we can't create temporaries of the non-POD type.  It is up to
discussions on what should happen in those cases.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/10.2?

2020-07-14  Jakub Jelinek  

PR middle-end/96194
* expr.c (expand_constructor): Don't create temporary for store to
volatile MEM if exp has an addressable type.

* g++.dg/opt/pr96194.C: New test.

--- gcc/expr.c.jj   2020-07-13 19:09:33.173872178 +0200
+++ gcc/expr.c  2020-07-14 13:07:26.228801996 +0200
@@ -8382,7 +8382,9 @@ expand_constructor (tree exp, rtx target
   || GET_CODE (target) == PARALLEL || modifier == EXPAND_STACK_PARM
   /* Also make a temporary if the store is to volatile memory, to
 avoid individual accesses to aggregate members.  */
-  || (GET_CODE (target) == MEM && MEM_VOLATILE_P (target)))
+  || (GET_CODE (target) == MEM
+ && MEM_VOLATILE_P (target)
+ && !TREE_ADDRESSABLE (TREE_TYPE (exp
 {
   if (avoid_temp_mem)
return NULL_RTX;
--- gcc/testsuite/g++.dg/opt/pr96194.C.jj   2020-07-14 13:15:32.185733939 
+0200
+++ gcc/testsuite/g++.dg/opt/pr96194.C  2020-07-14 13:16:16.033096197 +0200
@@ -0,0 +1,21 @@
+// PR middle-end/96194
+// { dg-do compile }
+// { dg-options "-O2" }
+
+#include 
+
+struct A { ~A (); };
+struct B : A { float e[64]; };
+
+B *
+foo ()
+{
+  return new ((void *) 0) B ();
+}
+
+B *
+bar (void *x, bool y)
+{
+  void *p = y ? x : (void *) 0;
+  return new (p) B ();
+}


Jakub



Re: [PATCH] libgomp: Add OMPD Address Space Information functions.

2020-07-14 Thread y2s1982 via Gcc-patches
Hello Jakub,

Thank you for the detailed information. I will make those changes right
away.

Cheers,

Tony Sim

On Tue, Jul 14, 2020 at 5:57 AM Jakub Jelinek  wrote:

> On Thu, Jul 09, 2020 at 07:01:00PM -0400, y2s1982 via Gcc-patches wrote:
> > --- a/libgomp/libgompd.h
> > +++ b/libgomp/libgompd.h
> > @@ -47,4 +47,19 @@ typedef struct _ompd_aspace_handle {
> >ompd_size_t ref_count;
> >  } ompd_address_space_handle_t;
> >
> > +struct gompd_env
> > +{
> > +  /* TODO: when the struct is better defined, turn it into a compact
> form.
> > + LINK:
> https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549698.html
> > + For now, keep it as a struct.  */
> > +
> > +  /* Environment set version number.  */
> > +  ompd_word_t gompd_env_version;
> > +  /* Represents _OPENMP that is in mm format.  */
> > +  ompd_word_t openmp_version;
> > +};
> > +
> > +/* TODO: when gompd_env is better defined, turn it into a compact
> form.  */
> > +extern struct gompd_env gompd_env_info;
>
> I don't think you want this to be a global var.
>
> > +  ompd_size_t macro_length = 6; /* _OPENMP format: mm.  */
>
> So use omp_version_len = strlen ("mm");
> and no comment is needed (the compiler will optimize it into constant)
> or omp_version_len = sizeof ("yyymm") - 1;
>
> > +  char *tmp = "GNU OpenMP Runtime implementing OpenMP 5.0 ";
> > +  ompd_size_t tmp_length = strlen (tmp);
>
> Use name instead of tmp in both vars?
>
> > --- a/libgomp/ompd-lib.c
> > +++ b/libgomp/ompd-lib.c
> > @@ -31,6 +31,17 @@
> >
> >  ompd_callbacks_t gompd_callbacks;
> >  static int ompd_initialized = 0;
> > +struct gompd_env gompd_env_info;
>
> Again.
>
> > +
> > +ompd_rc_t
> > +gompd_set_environment ()
> > +{
> > +  /* TODO: Turn this placeholder function to handle OMPD environment
> variables
> > + when it becomes compact.  */
> > +  struct gompd_env temp_env = { 202007, 201811 };
> > +  gompd_env_info = temp_env;
> > +  return ompd_rc_ok;
> > +}
> >
> >  ompd_rc_t
> >  ompd_get_api_version (ompd_word_t *version)
> > @@ -57,6 +68,7 @@ ompd_initialize (ompd_word_t api_version, const
> ompd_callbacks_t *callbacks)
> >  return ompd_rc_error;
> >
> >gompd_callbacks = *callbacks;
> > +  gompd_set_environment ();
>
> And you shouldn't call it here, but instead in ompd_process_initialize
> and put the struct into the handle.
>
> The thing is, the same OMPD library can e.g. handle a 64-bit and 32-bit
> process, or one with older and one with newer libgomp.so.1.
>
> Jakub
>
>


Re: [PATCH] diagnostics: Add options to control the column units [PR49973] [PR86904]

2020-07-14 Thread David Malcolm via Gcc-patches
On Mon, 2020-07-13 at 17:07 -0400, Lewis Hyatt wrote:
> On Mon, Jul 13, 2020 at 03:04:20PM -0400, David Malcolm wrote:

[...]

> > OK for trunk with those nits fixed.
> > 
> > Dave
> > 
> > 
> 
> Thanks again for your time! I will address the above and then push in
> a day or two.

Excellent - thanks for all your work on this.

Dave



Re: [PATCH] expr: Unbreak build of mesa [PR96194]

2020-07-14 Thread Richard Biener via Gcc-patches
On July 14, 2020 3:37:35 PM GMT+02:00, Jakub Jelinek  wrote:
>Hi!
>
>On Tue, Jul 07, 2020 at 09:04:40AM +0200, Richard Biener via
>Gcc-patches wrote:
>> > The store to the whole of each volatile object was picked apart
>> > like there had been an individual assignment to each of the
>> > fields.  Reads were added as part of that; see PR for details.
>> > The reads from volatile memory were a clear bug; individual
>> > stores questionable.  A separate patch clarifies the docs.
>> >
>> > Tested x86_64-linux, powerpc64le-linux and cris-elf.
>> > Ok to commit?  Backport to gcc-10?
>> 
>> OK for both.
>
>This breaks building of mesa on both the trunk and 10 branch.
>
>The problem is that the middle-end may never create temporaries of
>non-POD
>(TREE_ADDRESSABLE) types, those can be only created when the language
>says
>so and thus only the FE is allowed to create those.
>
>This patch just reverts the behavior to what we used to do before for
>the
>stores to volatile non-PODs.  Perhaps we want to do something else, but
>definitely we can't create temporaries of the non-POD type.  It is up
>to
>discussions on what should happen in those cases.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for
>trunk/10.2?

OK. 

Richard. 

>2020-07-14  Jakub Jelinek  
>
>   PR middle-end/96194
>   * expr.c (expand_constructor): Don't create temporary for store to
>   volatile MEM if exp has an addressable type.
>
>   * g++.dg/opt/pr96194.C: New test.
>
>--- gcc/expr.c.jj  2020-07-13 19:09:33.173872178 +0200
>+++ gcc/expr.c 2020-07-14 13:07:26.228801996 +0200
>@@ -8382,7 +8382,9 @@ expand_constructor (tree exp, rtx target
>  || GET_CODE (target) == PARALLEL || modifier == EXPAND_STACK_PARM
>   /* Also make a temporary if the store is to volatile memory, to
>avoid individual accesses to aggregate members.  */
>-  || (GET_CODE (target) == MEM && MEM_VOLATILE_P (target)))
>+  || (GET_CODE (target) == MEM
>+&& MEM_VOLATILE_P (target)
>+&& !TREE_ADDRESSABLE (TREE_TYPE (exp
> {
>   if (avoid_temp_mem)
>   return NULL_RTX;
>--- gcc/testsuite/g++.dg/opt/pr96194.C.jj  2020-07-14 13:15:32.185733939
>+0200
>+++ gcc/testsuite/g++.dg/opt/pr96194.C 2020-07-14 13:16:16.033096197
>+0200
>@@ -0,0 +1,21 @@
>+// PR middle-end/96194
>+// { dg-do compile }
>+// { dg-options "-O2" }
>+
>+#include 
>+
>+struct A { ~A (); };
>+struct B : A { float e[64]; };
>+
>+B *
>+foo ()
>+{
>+  return new ((void *) 0) B ();
>+}
>+
>+B *
>+bar (void *x, bool y)
>+{
>+  void *p = y ? x : (void *) 0;
>+  return new (p) B ();
>+}
>
>
>   Jakub



Re: [PATCH] builtins: Avoid useless char/short -> int promotions before atomics [PR96176]

2020-07-14 Thread Segher Boessenkool
Hi!

On Tue, Jul 14, 2020 at 03:26:50PM +0200, Jakub Jelinek wrote:
> On Tue, Jul 14, 2020 at 02:33:41PM +0200, Richard Biener wrote:
> > > As mentioned in the PR, we generate a useless movzbl insn before lock 
> > > cmpxchg.
> > > The problem is that the builtin for the char/short cases has the arguments
> > > promoted to int and combine gives up, because the instructions have
> > > MEM_VOLATILE_P arguments and recog in that case doesn't recognize anything
> > > when volatile_ok is false, and nothing afterwards optimizes the
> > > (reg:SI a) = (zero_extend:SI (reg:QI a))
> > > ... (subreg:QI (reg:SI a) 0) ...
> > 
> > So the above isn't fixable?  Because it would probably be the more
> > generic fix, right?
> 
> I'm afraid it is not, CCing Segher on that.  The question is how to
> differentiate between "combine didn't do anything dangerous to this
> MEM_VOLATILE_P and just kept it in the pattern as is" vs.
> "combine changed it in a dangerous way unsuitable for volatile accesses".

This can be handled by having a predicate that allows volatile memory.
Various targets do this already (rs6000 is the most prominent).  You then
have to make sure that every insn using one of those predicates actually
*does* do everything volatile memory needs to be done ;-)

If ever variable is aligned (say the ABI requires that), you are 98%
there already.  If not, it is much harder.

> Even if we wanted to make just the case where i3 is the only insn
> that originally contains MEM_VOLATILE_P and checked that the MEM stayed as
> is, there is the difficulty that we change the insns in place, so remembering
> how it looked before is hard.  And then before trying to recognize it we'd
> carefully need to check that nothing else is volatile before enabling
> temporarily volatile_ok.

Yeah, don't, this wont't work :-)


Segher


Re: [PATCH] builtins: Avoid useless char/short -> int promotions before atomics [PR96176]

2020-07-14 Thread Richard Biener
On Tue, 14 Jul 2020, Jakub Jelinek wrote:

> On Tue, Jul 14, 2020 at 02:33:41PM +0200, Richard Biener wrote:
> > > As mentioned in the PR, we generate a useless movzbl insn before lock 
> > > cmpxchg.
> > > The problem is that the builtin for the char/short cases has the arguments
> > > promoted to int and combine gives up, because the instructions have
> > > MEM_VOLATILE_P arguments and recog in that case doesn't recognize anything
> > > when volatile_ok is false, and nothing afterwards optimizes the
> > > (reg:SI a) = (zero_extend:SI (reg:QI a))
> > > ... (subreg:QI (reg:SI a) 0) ...
> > 
> > So the above isn't fixable?  Because it would probably be the more
> > generic fix, right?
> 
> I'm afraid it is not, CCing Segher on that.  The question is how to
> differentiate between "combine didn't do anything dangerous to this
> MEM_VOLATILE_P and just kept it in the pattern as is" vs.
> "combine changed it in a dangerous way unsuitable for volatile accesses".
> Even if we wanted to make just the case where i3 is the only insn
> that originally contains MEM_VOLATILE_P and checked that the MEM stayed as
> is, there is the difficulty that we change the insns in place, so remembering
> how it looked before is hard.  And then before trying to recognize it we'd
> carefully need to check that nothing else is volatile before enabling
> temporarily volatile_ok.
> 
> > > +  if (TREE_CODE (exp) == SSA_NAME
> > > +  && TYPE_MODE (TREE_TYPE (exp)) != mode)
> > > +{
> > > +  /* Undo argument promotion if possible, as combine might not
> > > +  be able to do it later due to MEM_VOLATILE_P uses in the
> > > +  patterns.  */
> > > +  gimple *g = get_gimple_for_ssa_name (exp);
> > > +  if (g && gimple_assign_cast_p (g))
> > > + {
> > > +   tree rhs = gimple_assign_rhs1 (g);
> > > +   if (TYPE_MODE (TREE_TYPE (rhs)) == mode)
> > 
> > Is this really enough to check?  What if the cast was truncating?
> > The gimple_assign_cast_p predicate also allows for FIX_TRUNC_EXPR
> > and VIEW_CONVERT_EXPR where for the latter the rhs is the 
> > VIEW_CONVERT_EXPR itself.
> 
> I don't know how could those happen.
> mode is the result of get_builtin_sync_mode or
> int_mode_for_size (BITS_PER_UNIT * size, 0).require () (the former is
> int_mode_for_size (...).require ()) and thus an integral mode.
> The checks verify that the SSA_NAME doesn't have that mode, but rhs1
> of the statement does have the right mode.
> As mode is integral, I think that rules out FIX_TRUNC_EXPR
> which would need real mode on the argument, and for VCE, if VCE is
> in the operand itself, then it ought to have the same mode as the lhs.
> I believe the only possibility where exp doesn't have already the desired
> mode is the char/short to int promotion, otherwise all the builtins are
> prototyped and thus one shouldn't have some unrelated mode.
> 
> But if you want some extra checks just to be sure, I can add them, whether
> it means checking only for CONVERT_EXPR_CODE_P instead of all 4 rhs codes,
> and/or checking that both types are integral scalar and the conversion is an
> extension (by comparing type precisions).

I see the function is only used from atomics expansion but still
I would feel more comfortable with checking for just
CONVERT_EXPR_CODE_P and both integral types plus the precision bits.
You never know how these functions end up re-used ...

OK with those changes.

Thanks,
Richard.


Re: [PATCH] x86: Provide expanders for truncdisi2 and friends.

2020-07-14 Thread Richard Biener via Gcc-patches
On Mon, Jul 13, 2020 at 4:50 PM Roger Sayle  wrote:
>
>
> Hi Richard,
>
> > It seems to be improving TARGET_TRULY_NOOP_TRUNCATION documentation might 
> > be useful here.
>
> This is an excellent suggestion.  How about the following/attached:
>
> 2020-07-13  Roger Sayle  
>
> gcc/ChangeLog:
> * doc/tm.texi (TARGET_TRULY_NOOP_TRUNCATION): Clarify that targets
> that (sometimes) return false, indicating SUBREGs shouldn't be
> used, also need to provide a trunc?i?i2 optab that performs this
> truncation.
>
> > The only user (after your patch) of this hook is in function.c for the 
> > function parameter setup btw.
>
> The targetm.truly_noop_truncation in assign_parm_setup_block is the last 
> place that calls this
> hook directly (with sizes), but the majority of uses go via 
> TRULY_NOOP_TRUNCATION_MODES_P
> as defined in machmode.h.
>
> I'll prepare a patch to switch function.c to use TRULY_NOOP_TRUNCATION_MODE_P 
> so that we
> are consistent throughout the compiler.  In theory, this hook could then be 
> changed to take modes
> instead of (poly_unit64) sizes, but that clean-up might be tricky without 
> access to the affected
> platforms.
>
> The hard register semantics that you're referring to are related to 
> TARGET_MODES_TIEABLE_P,
> which is documented to have interesting interactions with 
> TARGET_TRULY_NOOP_TRUNCATION.
>
> Is the above documentation change Ok for mainline?

OK.

Thanks,
Richard.

> Thanks,
> Roger
> --
>


Re: [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310]

2020-07-14 Thread David Edelsohn via Gcc-patches
Unfortunately this patch is eliciting a number of new testsuite
failures, all like

error: unrecognizable insn:
(insn 44 43 45 5 (parallel [
(set (reg:SI 199)
(unspec:SI [
(reg:SF 202)
] UNSPEC_SI_FROM_SF))
(clobber (scratch:V4SF))
]) 
"/nasfarm/edelsohn/src/src/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c":70:1
-1
 (nil))
during RTL pass: vregs

for

gcc.dg/vect/vect-alias-check-11.c
gcc.dg/vect/vect-alias-check-12.c
gcc.dg/vect/pr57741-2.c
gcc.dg/vect/pr57741-3.c
gcc.dg/vect/pr89440.c
gcc.target/powerpc/sse-movss-1.c

Thanks, David

On Mon, Jul 13, 2020 at 2:30 AM luoxhu  wrote:
>
> Hi,
>
> On 2020/7/11 08:54, Segher Boessenkool wrote:
> > Hi!
> >
> > On Fri, Jul 10, 2020 at 09:39:40AM +0800, luoxhu wrote:
> >> OK, seems the md file needs a format tool too...
> >
> > Heh.  Just make sure it looks good (that is, does what it looks like),
> > looks like the rest, etc.  It's hard to do anything nice with unspecs,
> > [ ] lists do not format well.
> >
>  +  "TARGET_NO_SF_SUBREG"
>  +  "#"
>  +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"
> >>>
> >>> Put this in the insn condition?  And since this is just a predicate,
> >>> you can just use it instead of gpc_reg_operand.
> >>>
> >>> (The split condition becomes "&& 1" then, not "").
> >>
> >> OK, this seems a bit strange as movsi_from_sf, movsf_from_si and
> >> movdi_from_sf_zero_ext all use it as condition...
> >
> > Since in your case you *always* split, the split condition should be
> > "always".  There are no alternatives that do not split here.
> >
> >> And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG
> >> "avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The
> >> TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.
> >
> > Good question.  I do not know.
> >
> > Well...  Since this define_insn* requires p8 *anyway*, we do not need
> > any of these sf_subreg things?  We always know for each one if it should
> > be true or false.
>
> Yes, removed the vsx_reg_sfsubreg_ok check.
>
> >
> >> +  "TARGET_NO_SF_SUBREG"
> >
> > But here we should require p8 some other way, then.
>
> TARGET_NO_SF_SUBREG is defined to TARGET_DIRECT_MOVE_64BIT, and
> TARGET_DIRECT_MOVE_64BIT is TARGET_DIRECT_MOVE && TARGET_P8_VECTOR && 
> TARGET_POWERPC64
> which means TARGET_P8_VECTOR must be true for TARGET_NO_SF_SUBREG.
>
> >
> >> +  (set_attr "isa" "p8v")])
> >
> > (This isn't enough, unfortunately).
> >
>
>
> Updated patch to removed the vsx_reg_sfsubreg_ok and ICE fix:
>
>
> For extracting high part element from DImode register like:
>
> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>
> split it before reload with "and mask" to avoid generating shift right
> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
> PR67741, etc.
>
> srdi 3,3,32
> sldi 9,3,32
> mtvsrd 1,9
> xscvspdpn 1,1
>
> =>
>
> rldicr 3,3,0,31
> mtvsrd 1,3
> xscvspdpn 1,1
>
> Bootstrap and regression tested pass on Power8-LE.
>
> gcc/ChangeLog:
>
> 2020-07-13  Xionghu Luo  
>
> PR rtl-optimization/89310
> * config/rs6000/rs6000.md (movsf_from_si2): New
> define_insn_and_split.
>
> gcc/testsuite/ChangeLog:
>
> 2020-07-13  Xionghu Luo  
>
> PR rtl-optimization/89310
> * gcc.target/powerpc/pr89310.c: New test.
> ---
>  gcc/config/rs6000/rs6000.md| 31 ++
>  gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 
>  2 files changed, 48 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c
>
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 4fcd6a94022..480385ed4d2 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -7593,6 +7593,37 @@ (define_insn_and_split "movsf_from_si"
> "*,  *, p9v,   p8v,   *, *,
>  p8v,p8v,   p8v,   *")])
>
> +;; For extracting high part element from DImode register like:
> +;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
> +;; split it before reload with "and mask" to avoid generating shift right
> +;; 32 bit then shift left 32 bit.
> +(define_insn_and_split "movsf_from_si2"
> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
> +   (unspec:SF
> +[(subreg:SI
> +  (ashiftrt:DI
> +   (match_operand:DI 1 "input_operand" "r")
> +   (const_int 32))
> +  0)]
> +UNSPEC_SF_FROM_SI))
> +  (clobber (match_scratch:DI 2 "=r"))]
> +  "TARGET_NO_SF_SUBREG"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  if (GET_CODE (operands[2]) == SCRATCH)
> +operands[2] = gen_reg_rtx (DImode);
> +
> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
> +  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
> +  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
> +  emit_insn (

[Patch, committed] [OpenMP, Fortran] Fix goacc/finalize-1.f tree dump-scanning for -m32 (was: [Patch] [OpenMP, Fortran] Add structure/derived-type element mapping)

2020-07-14 Thread Tobias Burnus

As pointed out by Jakub, this fails with -m32 as the dump-scanning
assumed ptrdiff node = integer(kind=8).

Committed as obvious (and as pre-approved) to mainline + OG10 after testing.

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
commit 524862db444b6544c6dc87c5f06f351100ecf50d
Author: Tobias Burnus 
Date:   Tue Jul 14 16:28:37 2020 +0200

Fix goacc/finalize-1.f tree dump-scanning for -m32

gcc/testsuite/ChangeLog:

* gfortran.dg/goacc/finalize-1.f: Relax scan-tree-dump-times
pattern to work on 32bit-pointer systems.

diff --git a/gcc/testsuite/gfortran.dg/goacc/finalize-1.f b/gcc/testsuite/gfortran.dg/goacc/finalize-1.f
index 74fa408082b..266ead35192 100644
--- a/gcc/testsuite/gfortran.dg/goacc/finalize-1.f
+++ b/gcc/testsuite/gfortran.dg/goacc/finalize-1.f
@@ -20,7 +20,7 @@
 ! { dg-final { scan-tree-dump-times "(?n)#pragma omp target oacc_enter_exit_data map\\(delete:del_f \\\[len: \[0-9\]+\\\]\\) finalize$" 1 "gimple" } }
 
 !$ACC EXIT DATA FINALIZE DELETE (del_f_p(2:5))
-! { dg-final { scan-tree-dump-times "(?n)#pragma acc exit data map\\(release:\\*\\(c_char \\*\\) parm\\.0\\.data \\\[len: \[^\\\]\]+\\\]\\) map\\(to:del_f_p \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(alloc:\\(integer\\(kind=1\\)\\\[0:\\\] \\* restrict\\) del_f_p\\.data \\\[pointer assign, bias: \\(integer\\(kind=8\\)\\) parm\\.0\\.data - \\(integer\\(kind=8\\)\\) del_f_p\\.data\\\]\\) finalize;$" 1 "original" } }
+! { dg-final { scan-tree-dump-times "(?n)#pragma acc exit data map\\(release:\\*\\(c_char \\*\\) parm\\.0\\.data \\\[len: \[^\\\]\]+\\\]\\) map\\(to:del_f_p \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(alloc:\\(integer\\(kind=1\\)\\\[0:\\\] \\* restrict\\) del_f_p\\.data \\\[pointer assign, bias: \\(integer\\(kind=.\\)\\) parm\\.0\\.data - \\(integer\\(kind=.\\)\\) del_f_p\\.data\\\]\\) finalize;$" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "(?n)#pragma omp target oacc_enter_exit_data map\\(delete:MEM\\\[\\(c_char \\*\\)\[^\\\]\]+\\\] \\\[len: \[^\\\]\]+\\\]\\) map\\(to:del_f_p \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(alloc:del_f_p\\.data \\\[pointer assign, bias: \[^\\\]\]+\\\]\\) finalize$" 1 "gimple" } }
 
 !$ACC EXIT DATA COPYOUT (cpo_r)
@@ -32,6 +32,6 @@
 ! { dg-final { scan-tree-dump-times "(?n)#pragma omp target oacc_enter_exit_data map\\(force_from:cpo_f \\\[len: \[0-9\]+\\\]\\) finalize$" 1 "gimple" } }
 
 !$ACC EXIT DATA COPYOUT (cpo_f_p(4:10)) FINALIZE
-! { dg-final { scan-tree-dump-times "(?n)#pragma acc exit data map\\(from:\\*\\(c_char \\*\\) parm\\.1\\.data \\\[len: \[^\\\]\]+\\\]\\) map\\(to:cpo_f_p \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(alloc:\\(integer\\(kind=1\\)\\\[0:\\\] \\* restrict\\) cpo_f_p\\.data \\\[pointer assign, bias: \\(integer\\(kind=8\\)\\) parm\\.1\\.data - \\(integer\\(kind=8\\)\\) cpo_f_p\\.data\\\]\\) finalize;$" 1 "original" } }
+! { dg-final { scan-tree-dump-times "(?n)#pragma acc exit data map\\(from:\\*\\(c_char \\*\\) parm\\.1\\.data \\\[len: \[^\\\]\]+\\\]\\) map\\(to:cpo_f_p \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(alloc:\\(integer\\(kind=1\\)\\\[0:\\\] \\* restrict\\) cpo_f_p\\.data \\\[pointer assign, bias: \\(integer\\(kind=.\\)\\) parm\\.1\\.data - \\(integer\\(kind=.\\)\\) cpo_f_p\\.data\\\]\\) finalize;$" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "(?n)#pragma omp target oacc_enter_exit_data map\\(force_from:MEM\\\[\\(c_char \\*\\)\[^\\\]\]+\\\] \\\[len: \[^\\\]\]+\\\]\\) map\\(to:cpo_f_p \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(alloc:cpo_f_p\\.data \\\[pointer assign, bias: \[^\\\]\]+\\\]\\) finalize$" 1 "gimple" } }
   END SUBROUTINE f


[Patch][Middle-end]Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]

2020-07-14 Thread Qing Zhao via Gcc-patches
Hi, Gcc team,

This patch is a follow-up on the previous patch and corresponding discussion:
https://gcc.gnu.org/pipermail/gcc-patches/2020-May/545101.html 


From the previous round of discussion, the major issues raised were:

A. should be rewritten by using regsets infrastructure.  
B. Put the patch into middle-end instead of x86 backend. 

This new patch is rewritten based on the above 2 comments.  The major changes 
compared to the previous patch are:

1. Change the names of the option and attribute from 
-mzero-caller-saved-regs=[skip|used-gpr|all-gpr|used|all]  and 
zero_caller_saved_regs("skip|used-gpr|all-gpr||used|all”)
to:
-fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]   and  
zero_call_used_regs("skip|used-gpr|all-gpr||used|all”) 
Add the new option and  new attribute in general. 
2. The main code generation part is moved from i386 backend to middle-end;
3. Add 4 target-hooks;
4. Implement these 4 target-hooks on i386 backend. 
5. On a target that does not implement the target hook, issue error for the new 
option, issue warning for the new attribute.

The patch is as following:

[PATCH] Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]
command-line option and
zero_call_used_regs("skip|used-gpr|all-gpr||used|all") function attribue:

  1. -fzero-call-used-regs=skip and zero_call_used_regs("skip")

  Don't zero call-used registers upon function return.

  2. -fzero-call-used-regs=used-gpr and zero_call_used_regs("used-gpr")

  Zero used call-used general purpose registers upon function return.

  3. -fzero-call-used-regs=all-gpr and zero_call_used_regs("all-gpr")

  Zero all call-used general purpose registers upon function return.

  4. -fzero-call-used-regs=used and zero_call_used_regs("used")

  Zero used call-used registers upon function return.

  5. -fzero-call-used-regs=all and zero_call_used_regs("all")

  Zero all call-used registers upon function return.

The feature is implemented in middle-end. But currently is only valid on X86.

Tested on x86-64 and aarch64 with bootstrapping GCC trunk, making
-fzero-call-used-regs=used-gpr, -fzero-call-used-regs=all-gpr
-fzero-call-used-regs=used, and -fzero-call-used-regs=all enabled
by default on x86-64.

Please take a look and let me know any more comment?

thanks.

Qing




gcc/ChangeLog:

2020-07-13  qing zhao  mailto:qing.z...@oracle.com>>
2020-07-13  H.J. Lu mailto:hjl.to...@gmail.com>>

* common.opt: Add new option -fzero-call-used-regs.
* config/i386/i386.c (ix86_zero_call_used_regno_p): New function.
(ix86_zero_call_used_regno_mode): Likewise.
(ix86_zero_all_vector_registers): Likewise.
(ix86_expand_prologue): Replace gen_prologue_use with
gen_pro_epilogue_use.
(TARGET_ZERO_CALL_USED_REGNO_P): Define.
(TARGET_ZERO_CALL_USED_REGNO_MODE): Define.
(TARGET_PRO_EPILOGUE_USE): Define.
(TARGET_ZERO_ALL_VECTOR_REGISTERS): Define.
* config/i386/i386.md: Replace UNSPECV_PROLOGUE_USE
with UNSPECV_PRO_EPILOGUE_USE.
* coretypes.h (enum zero_call_used_regs): New type.
* doc/extend.texi: Document the new zero_call_used_regs attribute.
* doc/invoke.texi: Document the new -fzero-call-used-regs option.
* doc/tm.texi: Regenerate.
* doc/tm.texi.in (TARGET_ZERO_CALL_USED_REGNO_P): New hook.
(TARGET_ZERO_CALL_USED_REGNO_MODE): Likewise.
(TARGET_PRO_EPILOGUE_USE): Likewise.
(TARGET_ZERO_ALL_VECTOR_REGISTERS): Likewise.
* function.c (is_live_reg_at_exit): New function.
(gen_call_used_regs_seq): Likewise.
(make_epilogue_seq): Call gen_call_used_regs_seq.
* function.h (is_live_reg_at_exit): Declare.
* target.def (zero_call_used_regno_p): New hook.
(zero_call_used_regno_mode): Likewise.
(pro_epilogue_use): Likewise.
(zero_all_vector_registers): Likewise.
* targhooks.c (default_zero_call_used_regno_p): New function.
(default_zero_call_used_regno_mode): Likewise.
* targhooks.h (default_zero_call_used_regno_p): Declare.
(default_zero_call_used_regno_mode): Declare.
* toplev.c (process_options): Issue errors when -fzero-call-used-regs
is used on targets that do not support it.
* tree-core.h (struct tree_decl_with_vis): New field 
zero_call_used_regs_type.
* tree.h (DECL_ZERO_CALL_USED_REGS): New macro.

gcc/c-family/ChangeLog:

2020-07-13  qing zhao  mailto:qing.z...@oracle.com>>
2020-07-13  H.J. Lu mailto:hjl.to...@gmail.com>>

* c-attribs.c (c_common_attribute_table): Add new attribute
zero_call_used_regs.
(handle_zero_call_used_regs_attribute): New function.

gcc/c/ChangeLog:

2020-07-13  qing zhao  mailto:qing.z...@oracle.com>>
2020-07-13  H.J. Lu mailto:hjl.to...@gmail.com>>

* c-decl.c (merge_decls): Merge zero_call_used_regs

Re: [PATCH] libgomp: Add OMPD functions in 5.5.6 and related data types.

2020-07-14 Thread y2s1982 via Gcc-patches
Hello Jakub,


On Tue, Jul 14, 2020 at 6:07 AM Jakub Jelinek  wrote:

> On Sat, Jul 11, 2020 at 06:05:36PM -0400, y2s1982 wrote:
> > 2020-07-11  Tony Sim  
> >
> > libgomp/ChangeLog:
> >
> >   * libgompd.h (ompd_thread_handle_t): Add.
> >   (ompd_parallel_handle_t): Add.
> >   (ompd_task_handle_t): Add.
> >   * ompd-parallel.c: New file.
>
> So you add a new file, but don't add it to Makefile.am - that means
> nothing will compile it.

> +}ompd_thread_handle_t;
>
> Formatting, space after }
> > +
> > +typedef struct _ompd_parallel_handle{
>
> and space bef before { (etc.).
> > +  ompd_address_space_handle_t *ah;
> > +  ompd_parallel_handle_t *enclosing_ph;
> > +  ompd_size_t enclosed_ph;
> > +  ompd_address_t thread_pool; /* Stores gomp_thread_pool *.  */
> > +}ompd_parallel_handle_t;
> > +
> > +typedef struct _ompd_task_handle{
> > +  ompd_parallel_handle_t *ph;
> > +}ompd_task_handle_t;
> > +
> >  #endif /* LIBGOMPD_H */
> > +  ompd_rc_t ret = ompd_rc_error;
> > +  ompd_size_t i = 0;
> > +  struct gomp_thread_pool * pool =
> > +  (struct gomp_thread_pool *)ph->thread_pool.address;
>
> Formatting, = should never be at the end of line.  And no space between
> * and pool, so:
>   struct gomp_thread_pool *pool
> = (struct gomp_thread_pool *) ph->thread_pool.address;
>
Ah! I was hoping to find some information on this. I kept looking around the
code base but the formatting wasn't consistent. Thank you for spelling this
out for me.

>
> > +  for (i = 0; i < pool->threads_used && ret == ompd_rc_error; i++)
> > +  {
> > +if (pool->threads[i]->ts.team == NULL)
> > +  ret = ompd_rc_ok;
> > +  }
>
> Like I said on other patches, { would need to be indented by 2 spaces from
> for, but as the body contains a single statement, just leave the {}s out
> completely and then it can stay as is.
>
Yes, of course. I am sorry, my old habbit is dieing hard. I will keep it in
mind.


>
> More important, I don't see any function that would initialize
> e.g. threads_used, etc., IMNSHO you should start with those and
> there write the reading of those from the inferior.
>

I started from parallel only because it seemed easier haha.
Okay, I will get working on the thread section, which was just previous to
this section.
Does this mean I should scrap this patch for now and submit something on
this section after the thread section is completed?

And, unless that routine copies everything from the inferior, which is risky
> because it can change there any time, I think the above is not really what
> you want, you instead want to read it from the inferior.
>

When you say inferior, are you referring to the gomp_thread and
gomp_thread_pool?

The debugged process (if it is a process and not e.g. a core file) is not in
> the same address space as the debugger (that loads the OMPD library), so
> even if you get pointers copied from the debugged process, you can't
> dereference them, but need to use the callbacks to read the corresponding
> memory.
>

Okay. I was afraid I might have made a mistake with this regard. The use of
callbacks was to make the debugging process decoupled, right? I think I
remember reading the example of running the process and debugging in a
different machine.

Thank you again for the help.

Cheers,

Tony Sim

>
> Jakub
>
>


c++: refactor a few class.c fns

2020-07-14 Thread Nathan Sidwell
Storing CLASSTYPE_AS_BASE in a local variable makes some code clearer 
(and textually no longer).  For some reason we store a DECL in a 
variable called 'value', which is confusing.


gcc/cp/
* class.c (build_base_field_1): Cache CLASSTYPE_AS_BASE.
(build_self_reference): Rename value -> decl.
(dump_class_hierarchy_1): Cache CLASSTYPE_AS_BASE.

pushed
--
Nathan Sidwell
diff --git i/gcc/cp/class.c w/gcc/cp/class.c
index c49055d384e..803b33bf346 100644
--- i/gcc/cp/class.c
+++ w/gcc/cp/class.c
@@ -4456,9 +4456,10 @@ build_base_field_1 (tree t, tree binfo, tree access, tree *&next_field)
 {
   /* Create the FIELD_DECL.  */
   tree basetype = BINFO_TYPE (binfo);
-  gcc_assert (CLASSTYPE_AS_BASE (basetype));
-  tree decl = build_decl (input_location,
-			  FIELD_DECL, NULL_TREE, CLASSTYPE_AS_BASE (basetype));
+  tree as_base = CLASSTYPE_AS_BASE (basetype);
+  gcc_assert (as_base);
+  tree decl = build_decl (input_location, FIELD_DECL, NULL_TREE, as_base);
+
   DECL_ARTIFICIAL (decl) = 1;
   DECL_IGNORED_P (decl) = 1;
   DECL_FIELD_CONTEXT (decl) = t;
@@ -8678,20 +8679,20 @@ void
 build_self_reference (void)
 {
   tree name = DECL_NAME (TYPE_NAME (current_class_type));
-  tree value = build_lang_decl (TYPE_DECL, name, current_class_type);
+  tree decl = build_lang_decl (TYPE_DECL, name, current_class_type);
 
-  DECL_NONLOCAL (value) = 1;
-  DECL_CONTEXT (value) = current_class_type;
-  DECL_ARTIFICIAL (value) = 1;
-  SET_DECL_SELF_REFERENCE_P (value);
-  set_underlying_type (value);
+  DECL_NONLOCAL (decl) = 1;
+  DECL_CONTEXT (decl) = current_class_type;
+  DECL_ARTIFICIAL (decl) = 1;
+  SET_DECL_SELF_REFERENCE_P (decl);
+  set_underlying_type (decl);
 
   if (processing_template_decl)
-value = push_template_decl (value);
+decl = push_template_decl (decl);
 
   tree saved_cas = current_access_specifier;
   current_access_specifier = access_public_node;
-  finish_member_declaration (value);
+  finish_member_declaration (decl);
   current_access_specifier = saved_cas;
 }
 
@@ -9006,11 +9007,11 @@ dump_class_hierarchy_1 (FILE *stream, dump_flags_t flags, tree t)
   fprintf (stream, "   size=%lu align=%lu\n",
 	   (unsigned long)(tree_to_shwi (TYPE_SIZE (t)) / BITS_PER_UNIT),
 	   (unsigned long)(TYPE_ALIGN (t) / BITS_PER_UNIT));
-  fprintf (stream, "   base size=%lu base align=%lu\n",
-	   (unsigned long)(tree_to_shwi (TYPE_SIZE (CLASSTYPE_AS_BASE (t)))
-			   / BITS_PER_UNIT),
-	   (unsigned long)(TYPE_ALIGN (CLASSTYPE_AS_BASE (t))
-			   / BITS_PER_UNIT));
+  if (tree as_base = CLASSTYPE_AS_BASE (t))
+fprintf (stream, "   base size=%lu base align=%lu\n",
+	 (unsigned long)(tree_to_shwi (TYPE_SIZE (as_base))
+			 / BITS_PER_UNIT),
+	 (unsigned long)(TYPE_ALIGN (as_base) / BITS_PER_UNIT));
   dump_class_hierarchy_r (stream, flags, TYPE_BINFO (t), TYPE_BINFO (t), 0);
   fprintf (stream, "\n");
 }


c++: tree dumper

2020-07-14 Thread Nathan Sidwell

A couple of C++ nodes were unknown to the tree dumper.

gcc/cp/
* ptree.c (cxx_print_type): Add TYPEOF_TYPE and BASES.

pushed

--
Nathan Sidwell
diff --git i/gcc/cp/ptree.c w/gcc/cp/ptree.c
index 7d329049549..224cf14edb5 100644
--- i/gcc/cp/ptree.c
+++ w/gcc/cp/ptree.c
@@ -106,6 +106,16 @@ cxx_print_type (FILE *file, tree node, int indent)
 		  indent + 4);
   return;
 
+case TYPEOF_TYPE:
+  print_node (file, "expr", TYPEOF_TYPE_EXPR (node), indent + 4);
+  return;
+
+case BASES:
+  if (BASES_DIRECT (node))
+	fputs (" direct", file);
+  print_node (file, "type", BASES_TYPE (node), indent + 4);
+  return;
+
 case TYPE_PACK_EXPANSION:
   print_node (file, "args", PACK_EXPANSION_EXTRA_ARGS (node), indent + 4);
   return;


Re: [PATCH] Add TARGET_LOWER_LOCAL_DECL_ALIGNMENT [PR95237]

2020-07-14 Thread Sunil Pandey via Gcc-patches
On Sat, Jul 4, 2020 at 9:11 AM Richard Biener
 wrote:
>
> On July 3, 2020 11:16:46 PM GMT+02:00, Jason Merrill  wrote:
> >On 6/29/20 5:00 AM, Richard Biener wrote:
> >> On Fri, Jun 26, 2020 at 10:11 PM H.J. Lu  wrote:
> >>>
> >>> On Thu, Jun 25, 2020 at 1:10 AM Richard Biener
> >>>  wrote:
> 
>  On Thu, Jun 25, 2020 at 2:53 AM Sunil Pandey 
> >wrote:
> >
> > On Wed, Jun 24, 2020 at 12:30 AM Richard Biener
> >  wrote:
> >>
> >> On Tue, Jun 23, 2020 at 5:31 PM Sunil K Pandey via Gcc-patches
> >>  wrote:
> >>>
> >>> From: Sunil K Pandey 
> >>>
> >>> Default for this hook is NOP. For x86, in 32 bit mode, this hook
> >>> sets alignment of long long on stack to 32 bits if preferred
> >stack
> >>> boundary is 32 bits.
> >>>
> >>>   - This patch fixes
> >>>  gcc.target/i386/pr69454-2.c
> >>>  gcc.target/i386/stackalign/longlong-1.c
> >>>   - Regression test on x86-64, no new fail introduced.
> >>
> >> I think the name is badly chosen,
> >TARGET_LOWER_LOCAL_DECL_ALIGNMENT
> >
> > Yes, I can change the target hook name.
> >
> >> would be better suited (and then asks for LOCAL_DECL_ALIGNMENT to
> >be
> >> renamed to INCREASE_LOCAL_DECL_ALIGNMENT).
> >
> > It seems like LOCAL_DECL_ALIGNMENT macro documentation is
> >incorrect.
> > It increases as well as decreases alignment based on
> >condition(-m32
> > -mpreferred-stack-boundary=2)
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95885
> >
> >>
> >> You're calling it from do_type_align which IMHO is dangerous
> >since that's
> >> invoked from FIELD_DECL layout as well.  Instead invoke it from
> >> layout_decl itself where we do
> >>
> >>if (code != FIELD_DECL)
> >>  /* For non-fields, update the alignment from the type.  */
> >>  do_type_align (type, decl);
> >>
> >> and invoke the hook _after_ do_type_align.  Also avoid
> >> invoking the hook on globals or hard regs and only
> >> invoke it on VAR_DECLs, thus only
> >>
> >>if (VAR_P (decl) && !is_global_var (decl) &&
> >!DECL_HARD_REGISTER (decl))
> >
> > It seems like decl property is not fully populated at this point
> >call
> > to is_global_var (decl) on global variable return false.
> >
> > $ cat foo.c
> > long long x;
> > int main()
> > {
> > if (__alignof__(x) != 8)
> >__builtin_abort();
> > return 0;
> > }
> >
> > Breakpoint 1, layout_decl (decl=0x77ffbb40, known_align=0)
> >  at /local/skpandey/gccwork/gccwork/gcc/gcc/stor-layout.c:674
> > 674 do_type_align (type, decl);
> > Missing separate debuginfos, use: dnf debuginfo-install
> > gmp-6.1.2-10.fc31.x86_64 isl-0.16.1-9.fc31.x86_64
> > libmpc-1.1.0-4.fc31.x86_64 mpfr-3.1.6-5.fc31.x86_64
> > zlib-1.2.11-20.fc31.x86_64
> > (gdb) call debug_tree(decl)
> >>  type  >  size 
> >  unit-size 
> >  align:64 warn_if_not_align:0 symtab:0 alias-set -1
> > canonical-type 0x7fffea801888 precision:64 min  > 0x7fffea7e8fd8 -9223372036854775808> max  >0x7fffea806000
> > 9223372036854775807>
> >  pointer_to_this >
> >  DI foo.c:1:11 size  unit-size
> > 
> >  align:1 warn_if_not_align:0>
> >
> > (gdb) p is_global_var(decl)
> > $1 = false
> > (gdb)
> >
> >
> > What about calling hook here
> >
> >   603 do_type_align (tree type, tree decl)
> >   604 {
> >   605   if (TYPE_ALIGN (type) > DECL_ALIGN (decl))
> >   606 {
> >   607   SET_DECL_ALIGN (decl, TYPE_ALIGN (type));
> >   608   if (TREE_CODE (decl) == FIELD_DECL)
> >   609 DECL_USER_ALIGN (decl) = TYPE_USER_ALIGN (type);
> >   610   else
> >   611 /* Lower local decl alignment */
> >   612 if (VAR_P (decl)
> >   613 && !is_global_var (decl)
> >   614 && !DECL_HARD_REGISTER (decl)
> >   615 && cfun != NULL)
> >   616   targetm.lower_local_decl_alignment (decl);
> >   617 }
> 
>  But that doesn't change anything (obviously).  layout_decl
>  is called quite early, too early it looks like.
> 
>  Now there doesn't seem to be any other good place where
>  we are sure to catch the decl before we evaluate things
>  like __alignof__
> 
>  void __attribute__((noipa))
>  foo (__SIZE_TYPE__ align, long long *p)
>  {
> if ((__SIZE_TYPE__)p & (align-1))
>   __builtin_abort ();
>  }
>  int main()
>  {
> long long y;
> foo (_Alignof y, &y);
> return 0;
>  }
> 
>  Joseph/Jason - do you have a good recommendation
>  how to deal with targets where natural alignment
>  is supposed to be lowered for optimization purposes?
>  (this case is for i?8

[PATCH, rs6000, gcc-8 ] Improve handling of built-in initialization. [PR95952]

2020-07-14 Thread will schmidt via Gcc-patches

Hi,
  We've got a scenario with a combination of old hardware, gcc-8 and
binutils where gcc will ICE during it's selftest.  This ICE was exposed
when the builtin processing for better #pragma support was added, where
we no longer skip builtin initialization based on the current mask.

Per the bug report and assorted debug, the ICE occurrs when building
the gcc-8 branch on a 970* based system with an old binutils.  (gcc-9 and
newer is OK.  binutils 2.34 is reported to allow success).

The attached patch adds a clause to the builtin initialization to skip
initialization of a builtin when the builtin mask is set but the icode
value is zero.   The subsequent assert check remains in place.

I've successfully tested this on a yellowdog (970mp) based system.
Mikael has confirmed this allows success on his system. 

OK for gcc-8 ?
Thanks,
-Will

  
2020-07-13  Will Schmidt  

gcc/ChangeLog:

PR target/95952

* gcc/config/rs6000.c (altivec_init_builtins): Add continue clause to 
predicate
builtin handling.



diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 75d40367a98..18713599d3b 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -18015,10 +18015,24 @@ altivec_init_builtins (void)
 
   if (rs6000_overloaded_builtin_p (d->code))
mode1 = VOIDmode;
   else
{
+ /* PR95952:  Gracefully skip builtins that do not have the icode 
properly
+ set, but do have the builtin mask set.  This has occurred in older gcc
+ builds with older binutils support when binutils refuses code 
generation
+ for instructions that it does not support.  This was exposed by 
changes
+ allowing all builtins being initialized for better #pragma support.  
*/
+ if (d->icode == CODE_FOR_nothing && d->mask) {
+HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
+if (TARGET_DEBUG_BUILTIN) {
+   fprintf (stderr, "altivec_init_builtins, altivec predicate 
builtin %s", d->name);
+   fprintf (stderr, " was skipped.  icode:%d, mask: %lx, 
builtin_mask: 0x%lx",
+d->icode, d->mask, builtin_mask);
+ }
+ continue;
+ }
  /* Cannot define builtin if the instruction is disabled.  */
  gcc_assert (d->icode != CODE_FOR_nothing);
  mode1 = insn_data[d->icode].operand[1].mode;
}
 



Re: [PATCH] rs6000: clean up testsuite power10_hw check

2020-07-14 Thread Segher Boessenkool
Hi!

On Mon, Jul 13, 2020 at 04:27:42PM -0500, Aaron Sawdey via Gcc-patches wrote:
> Because the check for power10_hw is not called
> check_effective_target_power10_hw, it needs to be looked
> for by is-effective-target-keyword.

Ah, so *that* is the reason we have this...  So probably we should just
call the function check_effective_target_power10_hw and then everything
works as we want?

(For future consideration, of course :-) )


Segher


[PATCH v2] sparc/sparc64: use crtendS.o for default-pie executables [PR96190]

2020-07-14 Thread Sergei Trofimovich via Gcc-patches
From: Sergei Trofimovich 

In --enable-default-pie mode compiler should switch from
using crtend.o to crtendS.o. On sparc it is especially important
because crtend.o contains PIC-unfriendly code.

We use GNU_USER_TARGET_ENDFILE_SPEC as a baseline spec to get
crtendS.o instead of crtend.o in !no-pie mode.

gcc:

2020-07-14  Sergei Trofimovich  

PR target/96190
* config/sparc/linux.h: Extend GNU_USER_TARGET_ENDFILE_SPEC
to get crtendS.o for !no-pie mode.
* config/sparc/linux64.h: Ditto.
---
 gcc/config/sparc/linux.h   | 10 ++
 gcc/config/sparc/linux64.h | 10 ++
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/gcc/config/sparc/linux.h b/gcc/config/sparc/linux.h
index 81201e67a2f..63853e60c03 100644
--- a/gcc/config/sparc/linux.h
+++ b/gcc/config/sparc/linux.h
@@ -27,16 +27,10 @@ along with GCC; see the file COPYING3.  If not see
 }  \
   while (0)
 
-/* Provide a ENDFILE_SPEC appropriate for GNU/Linux.  Here we tack on
-   the GNU/Linux magical crtend.o file (see crtstuff.c) which
-   provides part of the support for getting C++ file-scope static
-   object constructed before entering `main', followed by a normal
-   GNU/Linux "finalizer" file, `crtn.o'.  */
-
 #undef  ENDFILE_SPEC
 #define ENDFILE_SPEC \
-  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s\
-   %{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+  GNU_USER_TARGET_ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
 
 /* -mcpu=native handling only makes sense with compiler running on
a SPARC chip.  */
diff --git a/gcc/config/sparc/linux64.h b/gcc/config/sparc/linux64.h
index a1a0efd8f28..19ce84d7adb 100644
--- a/gcc/config/sparc/linux64.h
+++ b/gcc/config/sparc/linux64.h
@@ -44,16 +44,10 @@ along with GCC; see the file COPYING3.  If not see
 #undef ASM_CPU64_DEFAULT_SPEC
 #define ASM_CPU64_DEFAULT_SPEC "-Av9a"
 
-/* Provide a ENDFILE_SPEC appropriate for GNU/Linux.  Here we tack on
-   the GNU/Linux magical crtend.o file (see crtstuff.c) which
-   provides part of the support for getting C++ file-scope static
-   object constructed before entering `main', followed by a normal
-   GNU/Linux "finalizer" file, `crtn.o'.  */
-
 #undef ENDFILE_SPEC
 #define ENDFILE_SPEC \
-  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s\
-   %{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+  GNU_USER_TARGET_ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
 
 /* The default code model.  */
 #undef SPARC_DEFAULT_CMODEL
-- 
2.27.0



c++: Parser initialization cleanup

2020-07-14 Thread Nathan Sidwell
The handling of PCH is a little trick, because we have to deal with it 
before allocating memory.  I found the layering somewhat confusing. 
This patch reorganizes that, so that the stopping of PCH is done in 
exactly one place, and the ordering of lexer creation relative to that 
is much clearer.


I also changed the error message about multiple source files as with 
C++20, 'modules' means something rather specific.


Other than the error message changes, no functional changes.

gcc/cp/
* parser.c (cp_lexer_alloc): Do not deal with PCH here.
(cp_lexer_new_main): Deal with PCH here.  Store the tokens 
directly

into the buffer.
(cp_lexer_new_from_tokens): Assert last token isn't purged 
either.
(cp_lexer_get_preprocessor_token): Change first arg to 
flags, adjust.

(cp_parser_new): Pass the lexer in, don't create it here.
(cp_parser_translation_unit): Initialize access checks here.
(cp_parser_initial_pragma): First token is provided by caller,
don't deal with PCH stopping here.  Adjust error message.
(c_parse_file): Adjust, change error message to avoid C++20 
module

confusion.

pushed

--
Nathan Sidwell
diff --git i/gcc/cp/parser.c w/gcc/cp/parser.c
index 9e32a3c7772..08cfd23d8c4 100644
--- i/gcc/cp/parser.c
+++ w/gcc/cp/parser.c
@@ -212,7 +212,7 @@ static int cp_lexer_saving_tokens
 static cp_token *cp_lexer_token_at
   (cp_lexer *, cp_token_position);
 static void cp_lexer_get_preprocessor_token
-  (cp_lexer *, cp_token *);
+  (unsigned, cp_token *);
 static inline cp_token *cp_lexer_peek_token
   (cp_lexer *);
 static cp_token *cp_lexer_peek_nth_token
@@ -613,12 +613,8 @@ debug (cp_parser *ptr)
 static cp_lexer *
 cp_lexer_alloc (void)
 {
-  cp_lexer *lexer;
-
-  c_common_no_more_pch ();
-
   /* Allocate the memory.  */
-  lexer = ggc_cleared_alloc ();
+  cp_lexer *lexer = ggc_cleared_alloc ();
 
   /* Initially we are not debugging.  */
   lexer->debugging_p = false;
@@ -631,31 +627,30 @@ cp_lexer_alloc (void)
   return lexer;
 }
 
-
 /* Create a new main C++ lexer, the lexer that gets tokens from the
preprocessor.  */
 
 static cp_lexer *
 cp_lexer_new_main (void)
 {
-  cp_lexer *lexer;
   cp_token token;
 
   /* It's possible that parsing the first pragma will load a PCH file,
  which is a GC collection point.  So we have to do that before
  allocating any memory.  */
+  cp_lexer_get_preprocessor_token (0, &token);
   cp_parser_initial_pragma (&token);
+  c_common_no_more_pch ();
 
-  lexer = cp_lexer_alloc ();
-
+  cp_lexer *lexer = cp_lexer_alloc ();
   /* Put the first token in the buffer.  */
-  lexer->buffer->quick_push (token);
+  cp_token *tok = lexer->buffer->quick_push (token);
 
   /* Get the remaining tokens from the preprocessor.  */
-  while (token.type != CPP_EOF)
+  while (tok->type != CPP_EOF)
 {
-  cp_lexer_get_preprocessor_token (lexer, &token);
-  vec_safe_push (lexer->buffer, token);
+  tok = vec_safe_push (lexer->buffer, cp_token ());
+  cp_lexer_get_preprocessor_token (C_LEX_STRING_NO_JOIN, tok);
 }
 
   lexer->next_token = lexer->buffer->address ();
@@ -698,7 +693,8 @@ cp_lexer_new_from_tokens (cp_token_cache *cache)
   /* Initially we are not debugging.  */
   lexer->debugging_p = false;
 
-  gcc_assert (!lexer->next_token->purged_p);
+  gcc_assert (!lexer->next_token->purged_p
+	  && !lexer->last_token->purged_p);
   return lexer;
 }
 
@@ -815,14 +811,14 @@ cp_lexer_saving_tokens (const cp_lexer* lexer)
processed strings.  */
 
 static void
-cp_lexer_get_preprocessor_token (cp_lexer *lexer, cp_token *token)
+cp_lexer_get_preprocessor_token (unsigned flags, cp_token *token)
 {
   static int is_extern_c = 0;
 
/* Get a new token from the preprocessor.  */
   token->type
 = c_lex_with_flags (&token->u.value, &token->location, &token->flags,
-			lexer == NULL ? 0 : C_LEX_STRING_NO_JOIN);
+			flags);
   token->keyword = RID_MAX;
   token->purged_p = false;
   token->error_reported = false;
@@ -2029,7 +2025,7 @@ pop_unparsed_function_queues (cp_parser *parser)
 /* Constructors and destructors.  */
 
 static cp_parser *cp_parser_new
-  (void);
+  (cp_lexer *);
 
 /* Routines to parse various constructs.
 
@@ -3997,22 +3993,14 @@ cp_parser_make_indirect_declarator (enum tree_code code, tree class_type,
 /* Create a new C++ parser.  */
 
 static cp_parser *
-cp_parser_new (void)
+cp_parser_new (cp_lexer *lexer)
 {
-  cp_parser *parser;
-  cp_lexer *lexer;
-  unsigned i;
-
-  /* cp_lexer_new_main is called before doing GC allocation because
- cp_lexer_new_main might load a PCH file.  */
-  lexer = cp_lexer_new_main ();
-
   /* Initialize the binops_by_token so that we can get the tree
  directly from the token.  */
-  for (i = 0; i < sizeof (binops) / sizeof (binops[0]); i++)
+  for (unsigned i = 0; i < sizeof (binops) / sizeof (binops[0]); i++)
 binops_by_token[binops[i].token_t

Re: [PATCH v3] c++: Make convert_like complain about bad ck_ref_bind again [PR95789]

2020-07-14 Thread Nathan Sidwell

On 7/13/20 4:48 PM, Marek Polacek via Gcc-patches wrote:


Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk/10?

-- >8 --
convert_like issues errors about bad_p conversions at the beginning
of the function, but in the ck_ref_bind case, it only issues them
after we've called convert_like on the next conversion.

This doesn't work as expected since r10-7096 because when we see
a conversion from/to class type in a template, we return early, thereby
missing the error, and a bad_p conversion goes by undetected.  That
made the attached test to compile even though it should not.

I had thought that I could just move the ck_ref_bind/bad_p errors
above to the rest of them, but that regressed diagnostics because
expr then wasn't converted yet by the nested convert_like_real call.

So, for bad_p conversions, do the normal processing, but still return
the IMPLICIT_CONV_EXPR to avoid introducing trees that the template
processing can't handle well.  This I achieved by adding a wrapper
function.


LGTM, thanks!



gcc/cp/ChangeLog:

PR c++/95789
PR c++/96104
PR c++/96179
* call.c (convert_like_real_1): Renamed from convert_like_real.
(convert_like_real): New wrapper for convert_like_real_1.

gcc/testsuite/ChangeLog:

PR c++/95789
PR c++/96104
PR c++/96179
* g++.dg/conversion/ref4.C: New test.
* g++.dg/conversion/ref5.C: New test.
* g++.dg/conversion/ref6.C: New test.
---
  gcc/cp/call.c  | 54 ++
  gcc/testsuite/g++.dg/conversion/ref4.C | 22 +++
  gcc/testsuite/g++.dg/conversion/ref5.C | 14 +++
  gcc/testsuite/g++.dg/conversion/ref6.C | 24 
  4 files changed, 98 insertions(+), 16 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/conversion/ref4.C
  create mode 100644 gcc/testsuite/g++.dg/conversion/ref5.C
  create mode 100644 gcc/testsuite/g++.dg/conversion/ref6.C

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 5341a572980..6d5d5e801a5 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -171,6 +171,8 @@ static tree build_over_call (struct z_candidate *, int, 
tsubst_flags_t);
 /*c_cast_p=*/false, (COMPLAIN))
  static tree convert_like_real (conversion *, tree, tree, int, bool,
   bool, tsubst_flags_t);
+static tree convert_like_real_1 (conversion *, tree, tree, int, bool,
+bool, tsubst_flags_t);
  static void op_error (const op_location_t &, enum tree_code, enum tree_code,
  tree, tree, tree, bool);
  static struct z_candidate *build_user_type_conversion_1 (tree, tree, int,
@@ -7281,6 +7283,39 @@ maybe_warn_array_conv (location_t loc, conversion *c, 
tree expr)
 "are only available with %<-std=c++20%> or %<-std=gnu++20%>");
  }
  
+/* Wrapper for convert_like_real_1 that handles creating IMPLICIT_CONV_EXPR.  */

+
+static tree
+convert_like_real (conversion *convs, tree expr, tree fn, int argnum,
+  bool issue_conversion_warnings,
+  bool c_cast_p, tsubst_flags_t complain)
+{
+  /* Creating &TARGET_EXPR<> in a template breaks when substituting,
+ and creating a CALL_EXPR in a template breaks in finish_call_expr
+ so use an IMPLICIT_CONV_EXPR for this conversion.  We would have
+ created such codes e.g. when calling a user-defined conversion
+ function.  */
+  tree conv_expr = NULL_TREE;
+  if (processing_template_decl
+  && convs->kind != ck_identity
+  && (CLASS_TYPE_P (convs->type) || CLASS_TYPE_P (TREE_TYPE (expr
+{
+  conv_expr = build1 (IMPLICIT_CONV_EXPR, convs->type, expr);
+  if (convs->kind != ck_ref_bind)
+   conv_expr = convert_from_reference (conv_expr);
+  if (!convs->bad_p)
+   return conv_expr;
+  /* Do the normal processing to give the bad_p errors.  But we still
+need to return the IMPLICIT_CONV_EXPR, unless we're returning
+error_mark_node.  */
+}
+  expr = convert_like_real_1 (convs, expr, fn, argnum,
+ issue_conversion_warnings, c_cast_p, complain);
+  if (expr == error_mark_node)
+return error_mark_node;
+  return conv_expr ? conv_expr : expr;
+}
+
  /* Perform the conversions in CONVS on the expression EXPR.  FN and
 ARGNUM are used for diagnostics.  ARGNUM is zero based, -1
 indicates the `this' argument of a method.  INNER is nonzero when
@@ -7292,9 +7327,9 @@ maybe_warn_array_conv (location_t loc, conversion *c, 
tree expr)
 conversions to inaccessible bases are permitted.  */
  
  static tree

-convert_like_real (conversion *convs, tree expr, tree fn, int argnum,
-  bool issue_conversion_warnings,
-  bool c_cast_p, tsubst_flags_t complain)
+convert_like_real_1 (conversion *convs, tree expr, tree fn, int argnum,
+bool issue_conversion_warnings,
+bool c_cast_p, tsubst_flags_t complain)
  {

[PATCH 2/4] libstdc++: Apply modifications to our local copy of Ryu

2020-07-14 Thread Patrick Palka via Gcc-patches
This performs the following modifications to our local copy of Ryu in
order to make it more easily usable for our std::to_chars implementation:

  * Remove all #includes
  * Remove copy_special_str routines
  * Adjust the exponent formatting to match printf
  * Remove some functions we're not going to use
  * Add an out-parameter to d2exp_buffered_n for the scientific exponent
  * Store the sign bit inside struct floating_decimal_[32|64]
  * Rename [df]2s_buffered_n and change their return type

libstdc++-v3/ChangeLog:

* src/c++17/ryu/common.h, src/c++17/ryu/d2fixed.c,
src/c++17/ryu/d2fixed_full_table.h, src/c++17/ryu/d2s.c,
src/c++17/ryu/d2s_intrinsics.h, src/c++17/ryu/f2s.c,
src/c++17/ryu/f2s_intrinsics.h: Apply local modifications.
---
 libstdc++-v3/src/c++17/ryu/common.h   | 19 
 libstdc++-v3/src/c++17/ryu/d2fixed.c  | 98 ++-
 .../src/c++17/ryu/d2fixed_full_table.h|  1 -
 libstdc++-v3/src/c++17/ryu/d2s.c  | 56 +++
 libstdc++-v3/src/c++17/ryu/d2s_intrinsics.h   |  4 -
 libstdc++-v3/src/c++17/ryu/f2s.c  | 52 +++---
 libstdc++-v3/src/c++17/ryu/f2s_intrinsics.h   |  4 -
 libstdc++-v3/src/c++17/ryu/generic_128.c  |  7 +-
 8 files changed, 43 insertions(+), 198 deletions(-)

diff --git a/libstdc++-v3/src/c++17/ryu/common.h 
b/libstdc++-v3/src/c++17/ryu/common.h
index 7dc130947ac..f8ee147db04 100644
--- a/libstdc++-v3/src/c++17/ryu/common.h
+++ b/libstdc++-v3/src/c++17/ryu/common.h
@@ -17,9 +17,6 @@
 #ifndef RYU_COMMON_H
 #define RYU_COMMON_H
 
-#include 
-#include 
-#include 
 
 #if defined(_M_IX86) || defined(_M_ARM)
 #define RYU_32_BIT_PLATFORM
@@ -83,22 +80,6 @@ static inline uint32_t log10Pow5(const int32_t e) {
   return (((uint32_t) e) * 732923) >> 20;
 }
 
-static inline int copy_special_str(char * const result, const bool sign, const 
bool exponent, const bool mantissa) {
-  if (mantissa) {
-memcpy(result, "NaN", 3);
-return 3;
-  }
-  if (sign) {
-result[0] = '-';
-  }
-  if (exponent) {
-memcpy(result + sign, "Infinity", 8);
-return sign + 8;
-  }
-  memcpy(result + sign, "0E0", 3);
-  return sign + 3;
-}
-
 static inline uint32_t float_to_bits(const float f) {
   uint32_t bits = 0;
   memcpy(&bits, &f, sizeof(float));
diff --git a/libstdc++-v3/src/c++17/ryu/d2fixed.c 
b/libstdc++-v3/src/c++17/ryu/d2fixed.c
index 5f479abb91b..642a29d3010 100644
--- a/libstdc++-v3/src/c++17/ryu/d2fixed.c
+++ b/libstdc++-v3/src/c++17/ryu/d2fixed.c
@@ -23,23 +23,11 @@
 //
 // -DRYU_AVOID_UINT128 Avoid using uint128_t. Slower, depending on your 
compiler.
 
-#include "ryu/ryu.h"
 
-#include 
-#include 
-#include 
-#include 
-#include 
 
 #ifdef RYU_DEBUG
-#include 
-#include 
 #endif
 
-#include "ryu/common.h"
-#include "ryu/digit_table.h"
-#include "ryu/d2fixed_full_table.h"
-#include "ryu/d2s_intrinsics.h"
 
 #define DOUBLE_MANTISSA_BITS 52
 #define DOUBLE_EXPONENT_BITS 11
@@ -328,33 +316,6 @@ static inline uint32_t lengthForIndex(const uint32_t idx) {
   return (log10Pow2(16 * (int32_t) idx) + 1 + 16 + 8) / 9;
 }
 
-static inline int copy_special_str_printf(char* const result, const bool sign, 
const uint64_t mantissa) {
-#if defined(_MSC_VER)
-  // TODO: Check that -nan is expected output on Windows.
-  if (sign) {
-result[0] = '-';
-  }
-  if (mantissa) {
-if (mantissa < (1ull << (DOUBLE_MANTISSA_BITS - 1))) {
-  memcpy(result + sign, "nan(snan)", 9);
-  return sign + 9;
-}
-memcpy(result + sign, "nan", 3);
-return sign + 3;
-  }
-#else
-  if (mantissa) {
-memcpy(result, "nan", 3);
-return 3;
-  }
-  if (sign) {
-result[0] = '-';
-  }
-#endif
-  memcpy(result + sign, "Infinity", 8);
-  return sign + 8;
-}
-
 int d2fixed_buffered_n(double d, uint32_t precision, char* result) {
   const uint64_t bits = double_to_bits(d);
 #ifdef RYU_DEBUG
@@ -372,20 +333,10 @@ int d2fixed_buffered_n(double d, uint32_t precision, 
char* result) {
 
   // Case distinction; exit early for the easy cases.
   if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u)) {
-return copy_special_str_printf(result, ieeeSign, ieeeMantissa);
+__builtin_abort();
   }
   if (ieeeExponent == 0 && ieeeMantissa == 0) {
-int index = 0;
-if (ieeeSign) {
-  result[index++] = '-';
-}
-result[index++] = '0';
-if (precision > 0) {
-  result[index++] = '.';
-  memset(result + index, '0', precision);
-  index += precision;
-}
-return index;
+__builtin_abort();
   }
 
   int32_t e2;
@@ -549,21 +500,9 @@ int d2fixed_buffered_n(double d, uint32_t precision, char* 
result) {
   return index;
 }
 
-void d2fixed_buffered(double d, uint32_t precision, char* result) {
-  const int len = d2fixed_buffered_n(d, precision, result);
-  result[len] = '\0';
-}
-
-char* d2fixed(double d, uint32_t precision) {
-  char* const buffer = (char*)malloc(2000);
-  const int index = d2fixed_buffered_n(d, precision, buffer);
-  buffer[index] = '\0';
-  retur

[PATCH 3/4] libstdc++: Add floating-point std::to_chars implementation

2020-07-14 Thread Patrick Palka via Gcc-patches
This implements the floating-point std::to_chars overloads for float,
double and long double.  We use the Ryu library to compute the shortest
round-trippable fixed and scientific forms of a number for float, double
and long double.  We also use Ryu for performing fixed and scientific
formatting of float and double. For formatting long double with an
explicit precision argument we use a printf fallback.  Hexadecimal
formatting for float, double and long double is implemented from
scratch.

The supported long double binary formats are float64 (same as double),
float80 (x86 extended precision), float128 and ibm128.

Much of the complexity of the implementation is in computing the exact
output length before handing it off to Ryu (which doesn't do bounds
checking).  In some cases it's hard to compute the output length before
the fact, so in these cases we instead compute an upper bound on the
output length and use a sufficiently-sized intermediate buffer (if the
output range is smaller than the upper bound).

Another source of complexity is in the general-with-precision formatting
mode, where we need to do zero-trimming of the string returned by Ryu, and
where we also take care to avoid having to format the string a second
time when the general formatting mode resolves to fixed.

Tested on x86_64-pc-linux-gnu, aarch64-unknown-linux-gnu,
s390x-ibm-linux-gnu, and powerpc64-unknown-linux-gnu.

libstdc++-v3/ChangeLog:

* acinclude.m4 (libtool_VERSION): Bump to 6:29:0.
* config/abi/pre/gnu.ver: Add new exports.
* configure: Regenerate.
* include/std/charconv (to_chars): Declare the floating-point
overloads for float, double and long double.
* src/c++17/Makefile.am (sources): Add floating_to_chars.cc.
* src/c++17/Makefile.in: Regenerate.
* src/c++17/floating_to_chars.cc: New file.
* testsuite/20_util/to_chars/long_double.cc: New test.
* testsuite/util/testsuite_abi.cc: Add new symbol version.
---
 libstdc++-v3/acinclude.m4 |2 +-
 libstdc++-v3/config/abi/pre/gnu.ver   |   12 +
 libstdc++-v3/configure|2 +-
 libstdc++-v3/include/std/charconv |   21 +
 libstdc++-v3/src/c++17/Makefile.am|1 +
 libstdc++-v3/src/c++17/Makefile.in|5 +-
 libstdc++-v3/src/c++17/floating_to_chars.cc   | 1488 +
 .../testsuite/20_util/to_chars/long_double.cc |  197 +++
 libstdc++-v3/testsuite/util/testsuite_abi.cc  |3 +-
 9 files changed, 1726 insertions(+), 5 deletions(-)
 create mode 100644 libstdc++-v3/src/c++17/floating_to_chars.cc
 create mode 100644 libstdc++-v3/testsuite/20_util/to_chars/long_double.cc

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index ee5e0336f2c..e3926e1c9c2 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -3846,7 +3846,7 @@ changequote([,])dnl
 fi
 
 # For libtool versioning info, format is CURRENT:REVISION:AGE
-libtool_VERSION=6:28:0
+libtool_VERSION=6:29:0
 
 # Everything parsed; figure out what files and settings to use.
 case $enable_symvers in
diff --git a/libstdc++-v3/config/abi/pre/gnu.ver 
b/libstdc++-v3/config/abi/pre/gnu.ver
index edf4485e607..9a1bcfd25d1 100644
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -2299,6 +2299,18 @@ GLIBCXX_3.4.28 {
 
 } GLIBCXX_3.4.27;
 
+GLIBCXX_3.4.29 {
+# to_chars(char*, char*, [float|double|long double])
+_ZSt8to_charsPcS_[fdeg];
+
+# to_chars(char*, char*, [float|double|long double], chars_format)
+_ZSt8to_charsPcS_[fdeg]St12chars_format;
+
+# to_chars(char*, char*, [float|double|long double], chars_format, int)
+_ZSt8to_charsPcS_[fdeg]St12chars_formati;
+
+} GLIBCXX_3.4.28;
+
 # Symbols in the support library (libsupc++) have their own tag.
 CXXABI_1.3 {
 
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index dd54bd406a9..73f771e7335 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -75231,7 +75231,7 @@ $as_echo "$as_me: WARNING: === Symbol versioning will 
be disabled." >&2;}
 fi
 
 # For libtool versioning info, format is CURRENT:REVISION:AGE
-libtool_VERSION=6:28:0
+libtool_VERSION=6:29:0
 
 # Everything parsed; figure out what files and settings to use.
 case $enable_symvers in
diff --git a/libstdc++-v3/include/std/charconv 
b/libstdc++-v3/include/std/charconv
index cc7dd0e3758..042cc4fbcca 100644
--- a/libstdc++-v3/include/std/charconv
+++ b/libstdc++-v3/include/std/charconv
@@ -688,6 +688,27 @@ namespace __detail
   operator^=(chars_format& __lhs, chars_format __rhs) noexcept
   { return __lhs = __lhs ^ __rhs; }
 
+  // Floating-point std::to_chars
+
+  to_chars_result to_chars(char* __first, char* __last, float __value) 
noexcept;
+  to_chars_result to_chars(char* __first, char* __last, double __value) 
noexcept;
+  to_chars_result to_chars(char* __first, char* __last, long double __value)
+noexcept;
+
+  to_chars_result to_

[PATCH] libgomp: Add OMPD Address Space Information functions.

2020-07-14 Thread y2s1982 via Gcc-patches
This patch adds Address Space Information function implementations as
defined in section 5.5.4 of OpenMP API Specification 5.0. It also
defines a struct that stores various information used by OMPD.

This patch addressed all feedbacks.

2020-07-14  Tony Sim  

libgomp/ChangeLog:

* Makefile.am (libgompd_la_OBJECTS): Add ompd-addr.c.
* Makefile.in: Regenerate.
* libgompd.h (gompd_set_environment): Declare.
(gompd_env): Define.
(ompd_address_space_handle_t): Add new member variable.
* ompd-proc.c (gompd_set_environment): Define.
(ompd_process_initialize): Sets new handle member variable.
* ompd-addr.c: New file.

---
 libgomp/Makefile.am |  2 +-
 libgomp/Makefile.in |  5 +--
 libgomp/libgompd.h  | 15 
 libgomp/ompd-addr.c | 83 +
 libgomp/ompd-proc.c | 11 ++
 5 files changed, 113 insertions(+), 3 deletions(-)
 create mode 100644 libgomp/ompd-addr.c

diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index fe0a92122ea..0a4a9c10eb9 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -90,7 +90,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c 
env.c error.c \
oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c oacc-target.c
 
-libgompd_la_SOURCES = ompd-lib.c ompd-proc.c
+libgompd_la_SOURCES = ompd-lib.c ompd-proc.c ompd-addr.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 2b487e00499..9ceb2c6e460 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -235,7 +235,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo 
critical.lo \
$(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
 libgompd_la_LIBADD =
-am_libgompd_la_OBJECTS = ompd-lib.lo ompd-proc.lo
+am_libgompd_la_OBJECTS = ompd-lib.lo ompd-proc.lo ompd-addr.lo
 libgompd_la_OBJECTS = $(am_libgompd_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -592,7 +592,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c 
env.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
oacc-target.c $(am__append_4)
-libgompd_la_SOURCES = ompd-lib.c ompd-proc.c
+libgompd_la_SOURCES = ompd-lib.c ompd-proc.c ompd-addr.c
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info 
$(libtool_VERSION)
@@ -816,6 +816,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-addr.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-lib.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-proc.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
diff --git a/libgomp/libgompd.h b/libgomp/libgompd.h
index 495995e00d3..9130de4dc36 100644
--- a/libgomp/libgompd.h
+++ b/libgomp/libgompd.h
@@ -38,6 +38,20 @@
 
 extern ompd_callbacks_t gompd_callbacks;
 
+typedef struct gompd_environment_variables
+{
+  /* TODO: when the struct is better defined, turn it into a compact form.
+ LINK: https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549698.html
+ For now, keep it as a struct.  */
+
+  /* Environment set version number.  */
+  ompd_word_t gompd_env_version;
+  /* Represents _OPENMP that is in mm format.  */
+  ompd_word_t openmp_version;
+} gompd_env;
+
+ompd_rc_t gompd_set_environment (gompd_env **) __GOMPD_NOTHROW;
+
 typedef struct _ompd_aspace_handle {
   ompd_address_space_context_t *context;
   ompd_device_t kind;
@@ -45,6 +59,7 @@ typedef struct _ompd_aspace_handle {
   void *id;
   ompd_address_space_handle_t *process_reference;
   ompd_size_t ref_count;
+  gompd_env *env;
 } ompd_address_space_handle_t;
 
 #endif /* LIBGOMPD_H */
diff --git a/libgomp/ompd-addr.c b/libgomp/ompd-addr.c
new file mode 100644
index 000..281d02b1902
--- /dev/null
+++ b/libgomp/ompd-addr.c
@@ -0,0 +1,83 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+   Contributed by Yoosuk Sim .
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GP

Re: [PATCH 0/6 ver 4] ] Permute Class Operations

2020-07-14 Thread Segher Boessenkool
Hi!

On Wed, Jul 08, 2020 at 12:59:19PM -0700, Carl Love wrote:
> [PATCH 4/6] rs6000, Add vector shift double builtin support

>   * config/rs6000/altivec.h (vec_sldb, vec_srdb): New defines.
>   * config/rs6000/altivec.md (UNSPEC_SLDB, UNSPEC_SRDB): New.
>   (SLDB_LR): New attribute.
>   (VSHIFT_DBL_LR): New iterator.
>   (vsdb_): New define_insn.

You renamed these iterators / attributes?  Well, some of them, anyway.
Hrm.

>   * config/rs6000/rs6000-call.c (P10_BUILTIN_VEC_SLDB,
>   P10_BUILTIN_VEC_SRDB): New definitions.
>   (rs6000_expand_ternop_builtin) [CODE_FOR_vsldb_v16qi,
>   CODE_FOR_vsldb_v8hi, CODE_FOR_vsldb_v4si, CODE_FOR_vsldb_v2di,
>   CODE_FOR_vsrdb_v16qi, CODE_FOR_vsrdb_v8hi, CODE_FOR_vsrdb_v4si,
>   CODE_FOR_vsrdb_v2di}: Add clauses.

"]" (not "}").


Okay for trunk.  Thank you!


Segher


Re: RFA: Fix combine.c combining a move and a non-move into two non-moves, PR93372

2020-07-14 Thread Segher Boessenkool
On Mon, Jul 13, 2020 at 07:29:00AM +0200, Hans-Peter Nilsson wrote:
> > From: Segher Boessenkool 
> > Date: Tue, 7 Jul 2020 22:50:43 +0200
> 
> > I'll make a simpler patch.  Thanks!
> 
> You're welcome.  So, you'll take care of the updated patch
> yourself?

Yes.  Delayed a bit, I had a lot to do for 10.2...  But Soon(tm) :-)


Segher


[PATCH] x86: Replace __glibc_unlikely with __builtin_expect

2020-07-14 Thread H.J. Lu via Gcc-patches
On Mon, Jul 13, 2020 at 9:44 AM Jeff Law  wrote:
>
> On Sun, 2020-05-31 at 16:10 -0700, H.J. Lu via Gcc-patches wrote:
> > cmpstrnsi expander may pass the actual string length directly to cmpstrnqi
> > patterns.  For cmpstrnsi, one of the strings must be a constant and
> > expand_builtin_strncmp rewrites the length argument to be the minimum of
> > the const string length and the actual string length.  But it is not the
> > case for cmpmemsi.  Pass a copy of the string length to cmpstrnqi patterns
> > to avoid changing the actual string length by cmpstrnqi patterns.
> >
> > gcc/
> >
> >   PR target/95443
> >   * config/i386/i386.md (cmpstrnsi): Pass a copy of the string
> >   length to cmpstrnqi patterns.
> >
> > gcc/testsuite/
> >
> >   PR target/95443
> >   * gcc.target/i386/pr95443-1.c: New test.
> >   * gcc.target/i386/pr95443-2.c: Likewise.
> OK
> jeff
> >

I checked in this patch to replace glibc specific __glibc_unlikely with
__builtin_expect.

-- 
H.J.
From f80a36c44d3a9c92bd48981f2bc9e88abebafbc5 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Tue, 14 Jul 2020 14:01:51 -0700
Subject: [PATCH] x86: Replace __glibc_unlikely with __builtin_expect

Replace glibc specific __glibc_unlikely with __builtin_expect.

	PR target/95443
	* gcc.target/i386/pr95443-1.c (simple_strstr): Replace
	__glibc_unlikely with __builtin_expect.
---
 gcc/testsuite/gcc.target/i386/pr95443-1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr95443-1.c b/gcc/testsuite/gcc.target/i386/pr95443-1.c
index 292ff16afdd..698dfa02189 100644
--- a/gcc/testsuite/gcc.target/i386/pr95443-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr95443-1.c
@@ -49,7 +49,7 @@ simple_strstr (const char *haystack, const char *needle)
 
   while (1)
 {
-  if (__glibc_unlikely (hs > end))
+  if (__builtin_expect (hs > end, 0))
 	{
 	  end += strnlen ((const char*)end + m1 + 1, 2048);
 	  if (hs > end)
-- 
2.26.2



Re: [PATCH] libgccjit: Handle truncation and extension for casts [PR 95498]

2020-07-14 Thread David Malcolm via Gcc-patches
On Sun, 2020-07-12 at 20:30 -0400, Antoni Boucher via Jit wrote:
> Hello.
> 
> As mentioned in bug 95498, some conversions do not work. After 
> investigation, it turns out that it's caused by multiple casts on an 
> expression where it should do a truncation/extension.

Thanks for investigating this.

> I added a testcase, but for some reasons, the tests only pass when
> ran 
> via `./testsuite/jit/test-cast.c.exe`, not when ran via `make check-
> jit 
> RUNTESTFLAGS="-v -v -v  jit.exp=test-cast.c"`.

That's weird.

It sounds like you're following the instructions on 
https://gcc.gnu.org/onlinedocs/jit/internals/index.html#running-the-test-suite

If you're not specifying LD_LIBRARY_PATH, it might be that when you
run 
  ./testsuite/jit/test-cast.c.exe
directly that the version of libgccjit.so that the exe gets dynamically
linked against might not be the one you expect (e.g. a distribution-
provided libgccjit.so, rather than the one in your build
directory).  Maybe that explains the differences in behavior?  (or
maybe somehow "make check-jit" is using the wrong DSO???).  You could
try exporting LD_DEBUG=files (or somesuch) to get info on which
libgccjit.so is being used in each manner of invoking the test.

There are some notes at the URL above on how to run a test under gdb.

If all else fails, add some printfs and look at the output from "make
check-jit", and look at jit.sum and jit.log (though you don't specify
how the tests fail to pass when run that way)

> Furthermore, some other tests failed, but they also fail on master.

Which tests are failing for you?  (and for what configuration triplet?)

> Also, I was under the impression that adding `STRIP_TYPE_NOPS
> (t_expr);` 
> in `playback::context::build_cast` would be a better fix for this,
> but 
> that doesn't fix the issue.
> Am I missing something?

I'm not sure yet what the best fix would be.  In general I try to mimic
C behavior as much as is reasonable, but off the top of my head I'm not
sure what the C frontend does here.

> It's my first contribution to gcc, so I'd need help for fixing the
> tests 
> and also a confirmation that this is the best way to fix this issue.

Thanks for posting the patch; I hope the above is helpful
Dave



[PATCH] PR fortran/89574 - [8/9/10/11 Regression] ICE in conv_function_val, at fortran/trans-expr.c:3792

2020-07-14 Thread Harald Anlauf
As Fortran allows to rename a symbol on use, we need to be careful to
check whether a name we are looking for is a global one or belongs to
a different module.  Check for this condition.

Regtested on x86_64-pc-linux-gnu.

OK for master / backports after some time for a proper burn-in?

Thanks,
Harald


PR fortran/89574 - ICE in conv_function_val, at fortran/trans-expr.c:3792

When checking for an external procedure from the same file, do not
consider symbols from different modules.

gcc/fortran/
PR fortran/89574
* trans-decl.c (gfc_get_extern_function_decl): Check whether a
symbol belongs to a different module.

diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c
index 769ab20c82d..45a739ac860 100644
--- a/gcc/fortran/trans-decl.c
+++ b/gcc/fortran/trans-decl.c
@@ -2090,12 +2090,17 @@ gfc_get_extern_function_decl (gfc_symbol * sym, gfc_actual_arglist *actual_args)
   if (gsym && !gsym->bind_c)
 	gsym = NULL;
 }
-  else
+  else if (sym->module == NULL)
 {
   gsym = gfc_find_gsymbol (gfc_gsym_root, sym->name);
   if (gsym && gsym->bind_c)
 	gsym = NULL;
 }
+  else
+{
+  /* Procedure from a different module.  */
+  gsym = NULL;
+}

   if (gsym && !gsym->defined)
 gsym = NULL;
diff --git a/gcc/testsuite/gfortran.dg/pr89574.f90 b/gcc/testsuite/gfortran.dg/pr89574.f90
new file mode 100644
index 000..48dd0680a48
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr89574.f90
@@ -0,0 +1,29 @@
+! { dg-do compile }
+! PR fortran/89574 - ICE in conv_function_val, at fortran/trans-expr.c:3792
+
+module mod1
+contains
+  subroutine init
+  end subroutine
+end module
+
+module mod2
+contains
+  subroutine init
+  end subroutine
+end module
+
+module init
+  use mod1, only : test_init1 => init
+  use mod2, only : test_init2 => init
+  implicit none
+contains
+  subroutine sub
+call test_init1
+call test_init2
+call init
+  contains
+subroutine init
+end subroutine
+  end subroutine
+end module


[committed] c++: Add new test [PR59978]

2020-07-14 Thread Marek Polacek via Gcc-patches
Fixed in r224162.  That came without a test so adding this one.
Previously, we issued a bogus "too few arguments to function" error.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/testsuite/ChangeLog:

PR c++/59978
* g++.dg/cpp0x/vt-59978.C: New test.
---
 gcc/testsuite/g++.dg/cpp0x/vt-59978.C | 16 
 1 file changed, 16 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/vt-59978.C

diff --git a/gcc/testsuite/g++.dg/cpp0x/vt-59978.C 
b/gcc/testsuite/g++.dg/cpp0x/vt-59978.C
new file mode 100644
index 000..b7cdb19353a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/vt-59978.C
@@ -0,0 +1,16 @@
+// PR c++/59978
+// { dg-do compile { target c++11 } }
+
+static void testFunc(int i1, int i2) {
+(void)i1;
+(void)i2;
+}
+
+template  void wrapper() {
+testFunc(Ints...);
+}
+
+int main()
+{
+wrapper<1, 2>();
+}

base-commit: b2984e5ada65f417e8704d2e1e81ccd0272b5eb3
-- 
2.26.2



Re: RFA: Fix combine.c combining a move and a non-move into two non-moves, PR93372

2020-07-14 Thread Segher Boessenkool
Hi!

On Mon, Jul 13, 2020 at 07:25:37AM +0200, Hans-Peter Nilsson wrote:
> > > > > TL;DR: fixing a misdetection of what is a "simple move".
> > > > 
> > > > That is not a very correct characterisation of what this does :-)
> > > 
> > > That's apparently where we completely disagree. :-)
> > 
> > Well, I wrote that code, I know what is considered "just a move" there.
> 
> You lost some context: I'm comparing before/after the
> cc0-conversion for CRIS, where this is a misdetection (a false
> negative) of a move and causes a performance-regression.

The cc0 conversion caused a performance regression.  You can improve
some code in combine to make that not happen.

> > > I certainly don't contest that the move can be eliminated, and
> > > that most cost-effective 2-2 eliminations are helpful.  (See my
> > > other post about combine being eager with allowing same-cost
> > > combinations.)
> > 
> > I did not see that post, do you have a pointer?
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549416.html

I'll reply to that separately.

> > single_set also allows other insns, for example, multiple sets!
> 
> ...where the other sets are unused.  Not sure how that kind of
> thing would get combined here, but if its combined cost is
> beneficial, it would be a win, I guess.

The unused outputs are thrown away by combine, except when that doesn't
match, then it is tried again but with the original clobbers.

> > > Do you have some pointers to PR:s or something else backing that
> > > statement, or is it your work-in-progress hinted below?
> > 
> > I do not know what "work in progress" you mean?
> 
> I'm referring to your "I'll rerun some testing to show this.
> It'll take a while."  Are the regressions you refer to above
> tracked in bugzilla or on some mailing list?

The whole 2-2 combine thing took half a year at least to develop.  I
have posted to gcc-patches@ about it a few times.  Not having the
is_just_move stuff causes highly visible regressions on some targets, I
do not remember which, but it hurts all targets.  2-2 combine with the
is_just_move (and make_extra_copies) stuff was a win everywhere.

The tests just take long to run (it used to be 2h per run, but it is
closer to 3h per run with the current compiler: building cross-compilers
used to be less than 4m per target, this is much worse since a few
years).

Analysing the results is easy for most kinds of instruction combination:
just looking at the binary size of the testcase (I usually build Linux)
gives a good idea how effective combine was.  But for 2-2 combinations
that doesn't show much at all, so I dig through the actual resulting
code (for many targets, not all 30, just those I think are interesting).

> > For 2-2, size does *not* usually change, which brings us immediately
> > into "a lot more work" territory.  Oh, and all x86 compilers ICE.

The x86-64 kernel *does* build, just some boot wrapper code fails, but
the kernel itself does build.  i386 does ICE however, something with
memcpy or some such.

> If combine only did lower-cost combinations (perhaps with
> Richard Sandifords lower-size-when-tied suggestion), I guess
> this wouldn't happen. 0:-)

And we would regress (a LOT).

> > It shows we can change to use single_set here.
> 
> Did you mean "will show whether" or is it already complete?

It did complete, yes (and didn't change a single resulting intruction).
So that was easy :-)

> > I'll review the original patch again, to point out where it still needs
> > changing.
> 
> ...but if you're in progress with a single_set variant, I'm all
> for it.

Yup, it's pretty simple actually :-)

Thanks,


Segher


Re: [Patch] libgomp: Add Fortran routine support for allocators

2020-07-14 Thread Tobias Burnus

+ fortran@, which I forgot for the initial patch.

On 7/14/20 11:43 AM, Jakub Jelinek wrote:


+  type omp_alloctrait
+integer (omp_alloctrait_key_kind) key
+integer (omp_alloctrait_val_kind) value
+  end type omp_alloctrait

I know this is a problem in the standard, but won't gfortran in some strict
F77 conformance mode if it has any diagnose this?  If not, fine, if yes,
do we want some extension that it will accept the derived type quietly?


gfortran only supports -std=f95 or higher.

I did now update as suggested – and I added an
-fdefault-integer-8 and an fixed-form omp_lib.h testcase.

OK?

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
libgomp: Add Fortran routine support for allocators

libgomp/ChangeLog:

	* allocator.c: Add ialias for omp_init_allocator and
	omp_destroy_allocator.
	* configure.ac: Set INTPTR_T_KIND.
	* configure: Regenerate.
	* Makefile.in: Regenerate.
	* testsuite/Makefile.in: Regenerate.
	* fortran.c (omp_init_allocator_, omp_destroy_allocator_,
	omp_set_default_allocator_, omp_get_default_allocator_): New
	functions and ialias_redirect.
	* icv.c: Add ialias for omp_set_default_allocator and
	omp_get_default_allocator.
	* libgomp.map (OMP_5.0.1): Add omp_init_allocator_,
	omp_destroy_allocator_, omp_set_default_allocator_ and
	omp_get_default_allocator_.
	* omp_lib.f90.in: Add allocator traits parameters, declare
	allocator routines and add related kind parameters.
	* omp_lib.h.in: Likewise.
	* testsuite/libgomp.c-c++-common/alloc-2.c: Fix sizeof.
	* testsuite/libgomp.fortran/alloc-1.F90: New test.
	* testsuite/libgomp.fortran/alloc-2.F90: New test.
	* testsuite/libgomp.fortran/alloc-3.F: New test.
	* testsuite/libgomp.fortran/alloc-4.f90: New test.
	* testsuite/libgomp.fortran/alloc-5.f90: New test.

 libgomp/Makefile.in  |   1 +
 libgomp/allocator.c  |   3 +
 libgomp/configure|  11 +-
 libgomp/configure.ac |   2 +
 libgomp/fortran.c|  38 +
 libgomp/icv.c|   2 +
 libgomp/libgomp.map  |   5 +
 libgomp/omp_lib.f90.in   | 138 ++
 libgomp/omp_lib.h.in | 103 ++
 libgomp/testsuite/Makefile.in|   2 +
 libgomp/testsuite/libgomp.c-c++-common/alloc-2.c |   4 +-
 libgomp/testsuite/libgomp.fortran/alloc-1.F90| 169 +++
 libgomp/testsuite/libgomp.fortran/alloc-2.F90|   3 +
 libgomp/testsuite/libgomp.fortran/alloc-3.F  |   3 +
 libgomp/testsuite/libgomp.fortran/alloc-4.f90|  71 ++
 libgomp/testsuite/libgomp.fortran/alloc-5.f90|  23 +++
 16 files changed, 574 insertions(+), 4 deletions(-)

diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index b570a942cff..bc044b1820a 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -405,6 +405,7 @@ INSTALL_DATA = @INSTALL_DATA@
 INSTALL_PROGRAM = @INSTALL_PROGRAM@
 INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTPTR_T_KIND = @INTPTR_T_KIND@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
 LIBOBJS = @LIBOBJS@
diff --git a/libgomp/allocator.c b/libgomp/allocator.c
index 76feba71082..7166538b1de 100644
--- a/libgomp/allocator.c
+++ b/libgomp/allocator.c
@@ -202,6 +202,9 @@ omp_destroy_allocator (omp_allocator_handle_t allocator)
 }
 }
 
+ialias (omp_init_allocator)
+ialias (omp_destroy_allocator)
+
 void *
 omp_alloc (size_t size, omp_allocator_handle_t allocator)
 {
diff --git a/libgomp/configure b/libgomp/configure
index fd65828136d..d85023f4f05 100755
--- a/libgomp/configure
+++ b/libgomp/configure
@@ -647,6 +647,7 @@ OMP_NEST_LOCK_ALIGN
 OMP_NEST_LOCK_SIZE
 OMP_LOCK_ALIGN
 OMP_LOCK_SIZE
+INTPTR_T_KIND
 USE_FORTRAN_FALSE
 USE_FORTRAN_TRUE
 link_gomp
@@ -11433,7 +11434,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11436 "configure"
+#line 11437 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11539,7 +11540,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11542 "configure"
+#line 11543 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -16962,6 +16963,11 @@ for i in $config_path; do
   fi
 done
 
+if ac_fn_c_compute_int "$LINENO" "sizeof (__INTPTR_TYPE__)" "INTPTR_T_KIND"""; then :
+
+fi
+
+
 if ac_fn_c_compute_int "$LINENO" "sizeof (omp_lock_t)" "OMP_LOCK_SIZE"""; then :
 
 else
@@ -17041,6 +17047,7 @@ fi
 
 
 
+
 CFLAGS="$save_CFLAGS"
 
 # Determine what GCC version number to use in filesystem paths.
diff --git a/libgomp/configure.ac b/libgomp/configure.ac
index 201d26fff7a..d1034dab

Re: RFC: make combine do as advertised (cheaper-than)?

2020-07-14 Thread Segher Boessenkool
Hi!

On Mon, Jul 06, 2020 at 10:48:25AM +0100, Richard Sandiford wrote:
> Originally combine always produced shorter sequences, so by the

(Shorter in # insns, not # bytes).

> above reasoning, combining for == was correct.  These days we allow
> N-to-N replacements too, which are obviously a good thing when

Only 2-2 so far (not 1-1 yet, there are some target problems to
overcome first).

> the new cost is lower, but are more of a wash when the costs
> are the same.  But even then, the combination should have a
> “canonicalisation” effect.  (Unfortunately that effect includes
> the result of expand_compound_operation/make_compound_operation.)

2-2 is always reducing latency if the costs are equal (and sane ;-) ),
that is a large part of what makes 2-2 combinations useful.  Originally
the output of i2 is input to i3, but not anymore in the new insns.


Segher


Re: RFC: make combine do as advertised (cheaper-than)?

2020-07-14 Thread Segher Boessenkool
Hi!

On Mon, Jul 06, 2020 at 04:01:54AM +0200, Hans-Peter Nilsson via Gcc-patches 
wrote:
> Most comments, including the second sentence in the head comment
> of combine_validate_cost, the main decision-maker of the combine
> pass, refer to the function as returning true if the new
> insns(s) *cheaper* than the old insns, when in fact the function
> returned true also if the cost was the same.  Returning true for
> cheaper also seems more sane than as-cheap-as considering the
> need to avoid oscillation between same-cost combinations.  Also,
> it makes the job of later passes harder, having combine make
> more complex combinations of the same cost.

All of this was added in https://gcc.gnu.org/g:64b8935d4809 , which also
adds the

+  /* Disallow this recombination if both new_cost and old_cost are
+ greater than zero, and new_cost is greater than old cost.  */

comment (which is what it actually does).  Before that change, combine
didn't look at costs at all.

Combine never has used a "strictly smaller than" cost thing.

> Right, you can affect this with your target TARGET_RTX_COSTS and
> TARGET_INSN_COST hooks, but only for trivial cases, and you have
> increasingly more complex combinations (many-to-many
> combinations) where you have to twist simple logic to appease
> combine (stop it from combining) or give up.

There are 2-1, 3-1, 4-1, 3-2, 4-2, which all reduce the number of insns,
and then there is 2-2, which gets rid of one log_link.  If the isnn_cost
stays the same, it always wins something else.

> Alternatives from the top of my head, one of:

...

5) Improve your target so that its insn_cost reflects ithe costs of
the insns better.

Can you share some typical examples where things are worse with the
current behaviour?


Segher


Re: [Patch] libgomp: Add Fortran routine support for allocators

2020-07-14 Thread Jakub Jelinek via Gcc-patches
On Tue, Jul 14, 2020 at 11:42:15PM +0200, Tobias Burnus wrote:
> + fortran@, which I forgot for the initial patch.
> 
> On 7/14/20 11:43 AM, Jakub Jelinek wrote:
> 
> > > +  type omp_alloctrait
> > > +integer (omp_alloctrait_key_kind) key
> > > +integer (omp_alloctrait_val_kind) value
> > > +  end type omp_alloctrait
> > I know this is a problem in the standard, but won't gfortran in some strict
> > F77 conformance mode if it has any diagnose this?  If not, fine, if yes,
> > do we want some extension that it will accept the derived type quietly?
> 
> gfortran only supports -std=f95 or higher.
> 
> I did now update as suggested – and I added an
> -fdefault-integer-8 and an fixed-form omp_lib.h testcase.
> 
> OK?

Ok, thanks.

Jakub



Re: [PATCH rs6000]: Refine RTL unroll hook for small loops

2020-07-14 Thread Segher Boessenkool
Hi Jiu Fu,

On Mon, Jul 13, 2020 at 07:50:28PM +0800, guojiufu wrote:
> For very small loops (< 6 insns), it would be fine to unroll 4
> times to run fast with less latency and better cache usage.

> -  /* TODO: This is hardcoded to 10 right now.  It can be refined, for
> -  example we may want to unroll very small loops more times (4 perhaps).
> -  We also should use a PARAM for this.  */
> +  /* TODO: Using hardcodes here, for tunable, PARAM(s) maybe refined.  */

  /* TODO: These are hardcoded values right now.  We probably should use
 a PARAM here.  */

Okay for trunk with that.  Thanks!


Segher


Re: [PATCH] libgomp: Add OMPD Address Space Information functions.

2020-07-14 Thread y2s1982 via Gcc-patches
Hello Jakub,

I remember now why I had gompd_env_info as a global variable. It was meant
to be in libgomp.so.1. It was mistakenly placed in libgompd.h; it was
supposed to be in libgomp.h.

I am having trouble connecting the dots and need help. To expose a
variable, such as the gompd_env_info, so that a callback function can then
fetch it based on a name string passed in as an argument, is it sufficient
to just create a global variable in libgomp.h?
Does it need to be included in the .map file as a symbol? Or would
callbacks already have pointers to the variable and simply use switch to
fetch the values?
I looked at nm command and readelf command, too, and although I didn't
check everything, I only found function symbols as defined in the .map. I
couldn't find any variables, and I wasn't sure if I was looking at the
right place.

Thanks again for your help.

Cheers,

Tony Sim


On Tue, Jul 14, 2020 at 5:57 AM Jakub Jelinek  wrote:

> On Thu, Jul 09, 2020 at 07:01:00PM -0400, y2s1982 via Gcc-patches wrote:
> > --- a/libgomp/libgompd.h
> > +++ b/libgomp/libgompd.h
> > @@ -47,4 +47,19 @@ typedef struct _ompd_aspace_handle {
> >ompd_size_t ref_count;
> >  } ompd_address_space_handle_t;
> >
> > +struct gompd_env
> > +{
> > +  /* TODO: when the struct is better defined, turn it into a compact
> form.
> > + LINK:
> https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549698.html
> > + For now, keep it as a struct.  */
> > +
> > +  /* Environment set version number.  */
> > +  ompd_word_t gompd_env_version;
> > +  /* Represents _OPENMP that is in mm format.  */
> > +  ompd_word_t openmp_version;
> > +};
> > +
> > +/* TODO: when gompd_env is better defined, turn it into a compact
> form.  */
> > +extern struct gompd_env gompd_env_info;
>
> I don't think you want this to be a global var.
>
>
> > --- a/libgomp/ompd-lib.c
> > +++ b/libgomp/ompd-lib.c
> > @@ -31,6 +31,17 @@
> >
> >  ompd_callbacks_t gompd_callbacks;
> >  static int ompd_initialized = 0;
> > +struct gompd_env gompd_env_info;
>
> Again.
>
> > +
> > +ompd_rc_t
> > +gompd_set_environment ()
> > +{
> > +  /* TODO: Turn this placeholder function to handle OMPD environment
> variables
> > + when it becomes compact.  */
> > +  struct gompd_env temp_env = { 202007, 201811 };
> > +  gompd_env_info = temp_env;
> > +  return ompd_rc_ok;
> > +}
> >
> >  ompd_rc_t
> >  ompd_get_api_version (ompd_word_t *version)
> > @@ -57,6 +68,7 @@ ompd_initialize (ompd_word_t api_version, const
> ompd_callbacks_t *callbacks)
> >  return ompd_rc_error;
> >
> >gompd_callbacks = *callbacks;
> > +  gompd_set_environment ();
>
> And you shouldn't call it here, but instead in ompd_process_initialize
> and put the struct into the handle.
>
> The thing is, the same OMPD library can e.g. handle a 64-bit and 32-bit
> process, or one with older and one with newer libgomp.so.1.
>
> Jakub
>
>


Re: [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310]

2020-07-14 Thread luoxhu via Gcc-patches
Hi David,

On 2020/7/14 22:17, David Edelsohn wrote:
> Unfortunately this patch is eliciting a number of new testsuite
> failures, all like
> 
> error: unrecognizable insn:
> (insn 44 43 45 5 (parallel [
>  (set (reg:SI 199)
>  (unspec:SI [
>  (reg:SF 202)
>  ] UNSPEC_SI_FROM_SF))
>  (clobber (scratch:V4SF))
>  ]) 
> "/nasfarm/edelsohn/src/src/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c":70:1
> -1
>   (nil))
> during RTL pass: vregs
> 
> for
> 
> gcc.dg/vect/vect-alias-check-11.c
> gcc.dg/vect/vect-alias-check-12.c
> gcc.dg/vect/pr57741-2.c
> gcc.dg/vect/pr57741-3.c
> gcc.dg/vect/pr89440.c
> gcc.target/powerpc/sse-movss-1.c

This patch won't match the instruction with a "clobber (scratch:V4SF)",
it only matches "(clobber (match_scratch:DI 2 "=r"))", I guess you are
replying to the other patch?

"[PATCH 2/2] rs6000: Define define_insn_and_split to split unspec sldi+or to 
rldimi"

Thanks for your fix patch! :)

This patch's regression tested pass on Power8-LE, I re-run these cases on
Power8-LE, and confirmed these could pass, what is your platform please?
BTW, TARGET_NO_SF_SUBREG ensured TARGET_POWERPC64 for this 
define_insn_and_split.
Thanks.

Xionghu

> 
> Thanks, David
> 
> On Mon, Jul 13, 2020 at 2:30 AM luoxhu  wrote:
>>
>> Hi,
>>
>> On 2020/7/11 08:54, Segher Boessenkool wrote:
>>> Hi!
>>>
>>> On Fri, Jul 10, 2020 at 09:39:40AM +0800, luoxhu wrote:
 OK, seems the md file needs a format tool too...
>>>
>>> Heh.  Just make sure it looks good (that is, does what it looks like),
>>> looks like the rest, etc.  It's hard to do anything nice with unspecs,
>>> [ ] lists do not format well.
>>>
>> +  "TARGET_NO_SF_SUBREG"
>> +  "#"
>> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"
>
> Put this in the insn condition?  And since this is just a predicate,
> you can just use it instead of gpc_reg_operand.
>
> (The split condition becomes "&& 1" then, not "").

 OK, this seems a bit strange as movsi_from_sf, movsf_from_si and
 movdi_from_sf_zero_ext all use it as condition...
>>>
>>> Since in your case you *always* split, the split condition should be
>>> "always".  There are no alternatives that do not split here.
>>>
 And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG
 "avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The
 TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.
>>>
>>> Good question.  I do not know.
>>>
>>> Well...  Since this define_insn* requires p8 *anyway*, we do not need
>>> any of these sf_subreg things?  We always know for each one if it should
>>> be true or false.
>>
>> Yes, removed the vsx_reg_sfsubreg_ok check.
>>
>>>
 +  "TARGET_NO_SF_SUBREG"
>>>
>>> But here we should require p8 some other way, then.
>>
>> TARGET_NO_SF_SUBREG is defined to TARGET_DIRECT_MOVE_64BIT, and
>> TARGET_DIRECT_MOVE_64BIT is TARGET_DIRECT_MOVE && TARGET_P8_VECTOR && 
>> TARGET_POWERPC64
>> which means TARGET_P8_VECTOR must be true for TARGET_NO_SF_SUBREG.
>>
>>>
 +  (set_attr "isa" "p8v")])
>>>
>>> (This isn't enough, unfortunately).
>>>
>>
>>
>> Updated patch to removed the vsx_reg_sfsubreg_ok and ICE fix:
>>
>>
>> For extracting high part element from DImode register like:
>>
>> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>>
>> split it before reload with "and mask" to avoid generating shift right
>> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
>> PR67741, etc.
>>
>> srdi 3,3,32
>> sldi 9,3,32
>> mtvsrd 1,9
>> xscvspdpn 1,1
>>
>> =>
>>
>> rldicr 3,3,0,31
>> mtvsrd 1,3
>> xscvspdpn 1,1
>>
>> Bootstrap and regression tested pass on Power8-LE.
>>
>> gcc/ChangeLog:
>>
>> 2020-07-13  Xionghu Luo  
>>
>>  PR rtl-optimization/89310
>>  * config/rs6000/rs6000.md (movsf_from_si2): New
>>  define_insn_and_split.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2020-07-13  Xionghu Luo  
>>
>>  PR rtl-optimization/89310
>>  * gcc.target/powerpc/pr89310.c: New test.
>> ---
>>   gcc/config/rs6000/rs6000.md| 31 ++
>>   gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 
>>   2 files changed, 48 insertions(+)
>>   create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c
>>
>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>> index 4fcd6a94022..480385ed4d2 100644
>> --- a/gcc/config/rs6000/rs6000.md
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -7593,6 +7593,37 @@ (define_insn_and_split "movsf_from_si"
>>  "*,  *, p9v,   p8v,   *, *,
>>   p8v,p8v,   p8v,   *")])
>>
>> +;; For extracting high part element from DImode register like:
>> +;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>> +;; split it before reload with "and mask" to avoid generating shift right
>> +;; 32 bit then shift lef

Re: [Patch] [OpenMP, Fortran] Add structure/derived-type element mapping

2020-07-14 Thread Thomas Schwinge
Hi Julian, Tobias!

On 2020-06-24T19:32:09+0200, Tobias Burnus  wrote:
> (OpenMP 5 extends this a lot, but this is about OpenMP 4.5.
> It touches code which is also used by OpenACC's attach/detach.)
>
> @OpenACC/Julian: I think the character attach/detach for
> deferred-length strings does not work properly with OpenACC;
> I did not touch this code – but I think it needs some love.

Please file a PR.

> This code adds support for
>map(dt%comp, dt%comp2)
> where "comp" can be either a nonpointer, nonallocatable element
> scalar, array or array section. Or it can be a pointer - where
> character strings are one complication as for deferred-length
> ones, the length is stored in an extra DT component.
>
> While testing, I encountered two bugs, one relating to kind=4
> character string (patch pending review; PR95837)
> not part of testcase) and one related to deferred-length
> character strings (commented in the test case; larger issue;
> PR95868).
>
> Like always, some more tests/testcase probably would not harm.
>
> Regarding the patch:
>
> (a) openmp.c:
> This enabled component matching for 'map(' and
> piggybacks on the OpenACC code for the checks. I think that
> some additional checks might be useful – and I hope that no
> check is too strict.
> The "depend" clause was excluded as one otherwise gets a
> testsuite fails due to the is-contiguous check.
>
> (b) trans-openmp.c:
> - gfc_trans_omp_clauses now has a "bool openacc".
> - GOMP_MAP_ATTACH_DETACH is replaced by GOMP_MAP_ALWAYS_POINTER
> - For arrays, the mapping of the descriptor is squeezed before
>"node" which contains the data transfer (var.desc.data mapping
>followed by the always_pointer for the mapping).
>In this array case, the latter gets a pointless cast in order
>to prevent that for both var.desc and var.desc.data memory gets
>allocated in the struct.
>→ That's also the reason the big switch table is moved up.
> - For deferred-length strings, the string-length is in an extra
>struct element (derived-type component) and will be mapped in
>addition.
> - Bugs in the previous version:
>* gfc_trans_omp_array_section for "element == true", the size
>  of a pointer instead of the size of the element was mapped.
>* For string variables (with constant length) the kind=4 was
>  not properly handled.
>* Allocatable scalars were not handled – missing second clause
>  for the always_pointer (and attach_detach, I assume)

I understand correctly that your remark "Bugs in the previous version"
translates to "bugs still existing on releases/gcc-10 branch for OpenACC
'attach'/'detach'"?  Should we thus backport to releases/gcc-10 branch
this commit 102502e32ea4e8a75d6b252ba319d09d735d9aa7 "[OpenMP, Fortran]
Add structure/derived-type element mapping", and fixup commit
524862db444b6544c6dc87c5f06f351100ecf50d "Fix goacc/finalize-1.f tree
dump-scanning for -m32"?


Grüße
 Thomas


> Comments, remarks, suggestions?
> Otherwise: OK for the trunk?

> [OpenMP, Fortran] Add structure/derived-type element mapping
>
> gcc/fortran/ChangeLog:
>
>   * openmp.c (gfc_match_omp_clauses): Match also derived-type
>   component refs in OMP_CLAUSE_MAP.
>   (resolve_omp_clauses): Resolve those.
>   * trans-openmp.c (gfc_trans_omp_array_section, gfc_trans_omp_clauses):
>   Handle OpenMP structure-element mapping.
>   (gfc_trans_oacc_construct, gfc_trans_oacc_executable_directive,
>   (gfc_trans_oacc_combined_directive, gfc_trans_oacc_declare): Update
>   add openacc=true in gfc_trans_omp_clauses call.
>
> gcc/testsuite/ChangeLog:
>
>   * gfortran.dg/goacc/finalize-1.f: Update dump scan pattern.
>   * gfortran.dg/gomp/map-1.f90: Update dg-error.
>   * gfortran.dg/gomp/map-2.f90: New test.
>
>
> libgomp/ChangeLog:
>
>   * testsuite/libgomp.fortran/struct-elem-map-1.f90: New test.
>
>  gcc/fortran/openmp.c   |   5 +-
>  gcc/fortran/trans-openmp.c | 332 
> +++--
>  gcc/testsuite/gfortran.dg/goacc/finalize-1.f   |   4 +-
>  gcc/testsuite/gfortran.dg/gomp/map-1.f90   |  35 +--
>  gcc/testsuite/gfortran.dg/gomp/map-2.f90   |   6 +
>  .../libgomp.fortran/struct-elem-map-1.f90  | 331 
>  6 files changed, 595 insertions(+), 118 deletions(-)
>
> diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
> index e681903c7c2..7de2f6e1b1d 100644
> --- a/gcc/fortran/openmp.c
> +++ b/gcc/fortran/openmp.c
> @@ -1464,7 +1464,7 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
> omp_mask mask,
> head = NULL;
> if (gfc_match_omp_variable_list ("", &c->lists[OMP_LIST_MAP],
>  false, NULL, &head,
> -true) == MATCH_YES)
> +true, true) == MATCH_YES)
>   {
> gfc_omp_namelist *n;
> for