[PATCH] middle-end/96369 - fix missed short-circuiting during range folding

2020-07-31 Thread Richard Biener
This makes the special case of constant evaluated LHS for a
short-circuiting or/and explicit rather than doing range
merging and eventually exposing a side-effect that shouldn't be
evaluated.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

2020-07-31  Richard Biener  

PR middle-end/96369
* fold-const.c (fold_range_test): Special-case constant
LHS for short-circuiting operations.

* c-c++-common/pr96369.c: New testcase.
---
 gcc/fold-const.c |  7 +++
 gcc/testsuite/c-c++-common/pr96369.c | 12 
 2 files changed, 19 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/pr96369.c

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 300d959278b..1324a194995 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -5931,6 +5931,13 @@ fold_range_test (location_t loc, enum tree_code code, 
tree type,
 return 0;
 
   lhs = make_range (op0, &in0_p, &low0, &high0, &strict_overflow_p);
+  /* If op0 is known true or false and this is a short-circuiting
+ operation we must not merge with op1 since that makes side-effects
+ unconditional.  So special-case this.  */
+  if (!lhs
+  && ((code == TRUTH_ORIF_EXPR && in0_p)
+ || (code == TRUTH_ANDIF_EXPR && !in0_p)))
+return op0;
   rhs = make_range (op1, &in1_p, &low1, &high1, &strict_overflow_p);
 
   /* If this is an OR operation, invert both sides; we will invert
diff --git a/gcc/testsuite/c-c++-common/pr96369.c 
b/gcc/testsuite/c-c++-common/pr96369.c
new file mode 100644
index 000..8c468d9fec2
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr96369.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+int main()
+{
+  const long ONE = 1L;
+  long y = 0L;
+  long x = ((long) (ONE || (y = 1L)) % 8L);
+  if (y != 0)
+__builtin_abort ();
+  return 0;
+}
-- 
2.26.2


[PATCH] CSKY: Add -mfloat-abi= option.

2020-07-31 Thread Jojo R
gcc/ChangeLog:

* config/csky/csky_opts.h (float_abi_type): New.
* config/csky/csky.h (TARGET_SOFT_FLOAT): New.
(TARGET_HARD_FLOAT): New.
(TARGET_HARD_FLOAT_ABI): New.
(OPTION_DEFAULT_SPECS): Use mfloat-abi.
* config/csky/csky.opt (mfloat-abi): New.
* doc/invoke.texi (C-SKY Options): Document -mfloat-abi=.

---
 gcc/config/csky/csky.h  |  9 -
 gcc/config/csky/csky.opt| 29 +
 gcc/config/csky/csky_opts.h |  7 +++
 gcc/doc/invoke.texi | 18 ++
 4 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/gcc/config/csky/csky.h b/gcc/config/csky/csky.h
index 2d5a66ca187..8f4090b4b38 100644
--- a/gcc/config/csky/csky.h
+++ b/gcc/config/csky/csky.h
@@ -126,6 +126,13 @@
 #define TARGET_TLS \
   (CSKY_TARGET_ARCH (CK807) || CSKY_TARGET_ARCH (CK810))
 
+/* Run-time Target Specification.  */
+#define TARGET_SOFT_FLOAT   (csky_float_abi == CSKY_FLOAT_ABI_SOFT)
+/* Use hardware floating point instructions. */
+#define TARGET_HARD_FLOAT   (csky_float_abi != CSKY_FLOAT_ABI_SOFT)
+/* Use hardware floating point calling convention.  */
+#define TARGET_HARD_FLOAT_ABI   (csky_float_abi == CSKY_FLOAT_ABI_HARD)
+
 /* Number of loads/stores handled by ldm/stm.  */
 #define CSKY_MIN_MULTIPLE_STLD 3
 #define CSKY_MAX_MULTIPLE_STLD 12
@@ -818,7 +825,7 @@ while (0)
   {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" }, \
   {"cpu", "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
   {"endian", "%{!mbig-endian:%{!mlittle-endian:-m%(VALUE)-endian}}" }, \
-  {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" },
+  {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" },
 
 
 /**
diff --git a/gcc/config/csky/csky.opt b/gcc/config/csky/csky.opt
index 5846e50344f..60b51e5798f 100644
--- a/gcc/config/csky/csky.opt
+++ b/gcc/config/csky/csky.opt
@@ -57,12 +57,33 @@ Target RejectNegative Report Alias(mlittle-endian) 
Undocumented
 ;; assembly.
 
 mhard-float
-Target Report RejectNegative Mask(HARD_FLOAT)
-Enable hardware floating-point instructions.
+Target RejectNegative Alias(mfloat-abi=, hard) Undocumented
 
 msoft-float
-Target Report RejectNegative InverseMask(HARD_FLOAT)
-Use library calls to perform floating-point operations (default).
+Target RejectNegative Alias(mfloat-abi=, soft) Undocumented
+
+mfloat-abi=v2
+Target RejectNegative Alias(mfloat-abi=, hard) Undocumented
+
+mfloat-abi=v1
+Target RejectNegative Alias(mfloat-abi=, softfp) Undocumented
+
+mfloat-abi=
+Target RejectNegative Joined Enum(float_abi_type) Var(csky_float_abi) 
Init(CSKY_FLOAT_ABI_SOFT)
+Specify if floating point hardware should be used.
+
+Enum
+Name(float_abi_type) Type(enum float_abi_type)
+Known floating-point ABIs (for use with the -mfloat-abi= option):
+
+EnumValue
+Enum(float_abi_type) String(soft) Value(CSKY_FLOAT_ABI_SOFT)
+
+EnumValue
+Enum(float_abi_type) String(softfp) Value(CSKY_FLOAT_ABI_SOFTFP)
+
+EnumValue
+Enum(float_abi_type) String(hard) Value(CSKY_FLOAT_ABI_HARD)
 
 mfpu=
 Target RejectNegative Joined Enum(csky_fpu) Var(csky_fpu_index) 
Init(TARGET_FPU_auto) Save
diff --git a/gcc/config/csky/csky_opts.h b/gcc/config/csky/csky_opts.h
index a6dbf5ab6dd..7ee56be3e81 100644
--- a/gcc/config/csky/csky_opts.h
+++ b/gcc/config/csky/csky_opts.h
@@ -59,5 +59,12 @@ enum csky_fpu_type
 };
 #define CSKY_TARGET_FPU_GET(name) TARGET_FPU_ ## name
 
+enum float_abi_type
+{
+  CSKY_FLOAT_ABI_SOFT,
+  CSKY_FLOAT_ABI_SOFTFP,
+  CSKY_FLOAT_ABI_HARD
+};
+
 
 #endif /* CSKY_OPTS_H */
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index eaaf6d06b65..a132e09c674 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -820,6 +820,7 @@ Objective-C and Objective-C++ Dialects}.
 @gccoptlist{-march=@var{arch}  -mcpu=@var{cpu} @gol
 -mbig-endian  -EB  -mlittle-endian  -EL @gol
 -mhard-float  -msoft-float  -mfpu=@var{fpu}  -mdouble-float  -mfdivdu @gol
+-mfloat-abi=@var{name} @gol
 -melrw  -mistack  -mmp  -mcp  -mcache  -msecurity  -mtrust @gol
 -mdsp  -medsp  -mvdsp @gol
 -mdiv  -msmart  -mhigh-registers  -manchor @gol
@@ -20644,6 +20645,23 @@ Specify the C-SKY target processor.  Valid values for 
@var{cpu} are:
 
 Select big- or little-endian code.  The default is little-endian.
 
+@item -mfloat-abi=@var{name}
+@opindex mfloat-abi
+Specifies which floating-point ABI to use.  Permissible values
+are: @samp{soft}, @samp{softfp} and @samp{hard}.
+
+Specifying @samp{soft} causes GCC to generate output containing
+library calls for floating-point operations.
+@samp{softfp} allows the generation of code using hardware floating-point
+instructions, but still uses the soft-float calling conventions.
+@samp{hard} allows generation of floating-point instructions
+and uses FPU-specific calling conventions.
+
+The default depends on the specific target configuration.  Note that
+the hard-float and soft-float ABIs are not link-compatible; you must
+compile your ent

[PATCH] Aarch64: Add missing clobber for fjcvtzs

2020-07-31 Thread Andrea Corallo
Hi all,

I'd like to submit the following patch that adds a missing CC reg
clobber for the FJCVTZS instruction [1].

The patch adds an executing testcase that fails without the clobber.

Bootstrapped on aarch64-linux-gnu, regression ongoing.

Okay for trunk when finished testing?

  Andrea

[1] 
https://developer.arm.com/docs/ddi0596/latest/simd-and-floating-point-instructions-alphabetic-order/fjcvtzs-floating-point-javascript-convert-to-signed-fixed-point-rounding-toward-zero

gcc/ChangeLog

2020-07-30  Andrea Corallo  

* config/aarch64/aarch64.md (aarch64_fjcvtzs): Add missing
clobber.

gcc/testsuite/ChangeLog

2020-07-30  Andrea Corallo  

* gcc.target/aarch64/acle/jcvt_2.c: New testcase.
* lib/target-supports.exp
(check_effective_target_aarch64_fjcvtzs_hw): Add new check for
FJCVTZS hw.
>From 419436f9717a18545e5e89dff7f28e6958891f4c Mon Sep 17 00:00:00 2001
From: Andrea Corallo 
Date: Wed, 29 Jul 2020 19:04:40 +0200
Subject: [PATCH] Add missing clobber for fjcvtzs

gcc/ChangeLog

2020-07-30  Andrea Corallo  

* config/aarch64/aarch64.md (aarch64_fjcvtzs): Add missing
clobber.

gcc/testsuite/ChangeLog

2020-07-30  Andrea Corallo  

* gcc.target/aarch64/acle/jcvt_2.c: New testcase.
* lib/target-supports.exp
(check_effective_target_aarch64_fjcvtzs_hw): Add new check for
FJCVTZS hw.
---
 gcc/config/aarch64/aarch64.md |  3 +-
 .../gcc.target/aarch64/acle/jcvt_2.c  | 33 +++
 gcc/testsuite/lib/target-supports.exp | 21 
 3 files changed, 56 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/jcvt_2.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index d5ca1898c02e..df780b863707 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7059,7 +7059,8 @@
 (define_insn "aarch64_fjcvtzs"
   [(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(match_operand:DF 1 "register_operand" "w")]
-  UNSPEC_FJCVTZS))]
+  UNSPEC_FJCVTZS))
+   (clobber (reg:CC CC_REGNUM))]
   "TARGET_JSCVT"
   "fjcvtzs\\t%w0, %d1"
   [(set_attr "type" "f_cvtf2i")]
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/jcvt_2.c 
b/gcc/testsuite/gcc.target/aarch64/acle/jcvt_2.c
new file mode 100644
index ..ea2dfd14cf29
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/jcvt_2.c
@@ -0,0 +1,33 @@
+/* Test the __jcvt ACLE intrinsic.  */
+/* { dg-do run } */
+/* { dg-options "-O2 -march=armv8.3-a -save-temps" } */
+/* { dg-require-effective-target aarch64_fjcvtzs_hw } */
+
+#include 
+
+extern void abort (void);
+
+#ifdef __ARM_FEATURE_JCVT
+volatile int32_t x;
+
+int __attribute__((noinline))
+foo (double a, int b, int c)
+{
+  b = b > c;
+  x = __jcvt (a);
+  return b;
+}
+
+int
+main (void)
+{
+  int x = foo (1.1, 2, 3);
+  if (x)
+abort ();
+
+  return 0;
+}
+
+#endif
+
+/* { dg-final { scan-assembler-times "fjcvtzs\tw\[0-9\]+, d\[0-9\]+\n" 1 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 57eed3012b94..5973f7edb4b3 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4848,6 +4848,27 @@ proc check_effective_target_aarch64_bti_hw { } {
 } "-O2" ]
 }
 
+# Return 1 if the target supports executing the armv8.3-a FJCVTZS
+# instruction.
+proc check_effective_target_aarch64_fjcvtzs_hw { } {
+if { ![istarget aarch64*-*-*] } {
+   return 0
+}
+return [check_runtime aarch64_fjcvtzs_hw_available {
+   int
+   main (void)
+   {
+ double in = 25.1;
+ int out;
+ asm volatile ("fjcvtzs %w0, %d1"
+   : "=r" (out)
+   : "w" (in)
+   : /* No clobbers.  */);
+ return out == 25;
+   }
+} "-march=armv8.3-a" ]
+}
+
 # Return 1 if GCC was configured with --enable-standard-branch-protection
 proc check_effective_target_default_branch_protection { } {
 return [check_configured_with "enable-standard-branch-protection"]
-- 
2.17.1



RE: [PATCH] AArch64: Fix hwasan failure in readline.

2020-07-31 Thread Kyrylo Tkachov
Hi Tamar

> -Original Message-
> From: Tamar Christina 
> Sent: 30 July 2020 09:28
> To: gcc-patches@gcc.gnu.org
> Cc: nd ; Richard Earnshaw ;
> Marcus Shawcroft ; Kyrylo Tkachov
> ; Richard Sandiford
> 
> Subject: [PATCH] AArch64: Fix hwasan failure in readline.
> 
> Hi All,
> 
> My previous fix added an unchecked call to fgets in the new function readline.
> fgets can fail when there's an error reading the file in which case it returns
> NULL.  It also returns NULL when the next character is EOF.
> 
> The EOF case is already covered by the existing code but the error case isn't.
> This fixes it by returning the empty string on error.
> 
> Also I now use strnlen instead of strlen to make sure we never read outside
> the
> buffer.
> 
> This was flagged by Matthew Malcomson during his hwasan work.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master? And for backport with the other patches? (haven't done
> backport yet.)

Code looks ok, but I'm wondering what kind of input triggered this. Now that we 
can exercise this code in the testsuite (thanks!) perhaps a new test is in 
order?

Thanks,
Kyrill

> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * config/aarch64/driver-aarch64.c (readline): Check return value
> fgets.
> 
> --


Re: [PATCH] c++: Use error_at rather than warning_at for missing return in constexpr functions [PR96182]

2020-07-31 Thread Jakub Jelinek via Gcc-patches
On Wed, Jul 29, 2020 at 03:30:07PM -0400, Jason Merrill via Gcc-patches wrote:
> On 7/14/20 4:50 AM, Jakub Jelinek wrote:
> > For C++11 we already emit an error if a constexpr function doesn't contain
> > a return statement, because in C++11 that is the only thing it needs to
> > contain, but for C++14 we would normally issue a -Wreturn-type warning.
> > 
> > As mentioned by Jonathan, such constexpr functions are invalid, no
> > diagnostics required, because there doesn't exist any arguments for
> > which it would result in valid constant expression.
> > 
> > This raises it to an error in such cases.  The !LAMBDA_TYPE_P case
> > is to avoid error on g++.dg/pr81194.C where the user didn't write
> > constexpr anywhere and the operator() is compiler generated.
> 
> We set DECL_DECLARED_CONSTEXPR_P on lambdas earlier in finish_function if
> suitable; can we move this diagnostic up before that?

That works too.  Bootstrapped/regtested on x86_64-linux and i686-linux, ok
for trunk then?

2020-07-31  Jakub Jelinek  

PR c++/96182
* decl.c (finish_function): In constexpr functions use for C++14 and
later error instead of warning if no return statement is present and
diagnose it regardless of warn_return_type.  Move the warn_return_type
diagnostics earlier in the function.

* g++.dg/cpp1y/constexpr-96182.C: New test.
* g++.dg/other/error35.C (S::g()): Add return statement.
* g++.dg/cpp1y/pr63996.C (foo): Likewise.
* g++.dg/cpp1y/constexpr-return2.C (f): Likewise.
* g++.dg/cpp1y/var-templ44.C (make_array): Add throw 1.

--- gcc/cp/decl.c.jj2020-07-29 11:57:23.340517489 +0200
+++ gcc/cp/decl.c   2020-07-30 20:44:33.634951396 +0200
@@ -17112,6 +17112,51 @@ finish_function (bool inline_p)
  DECL_ATTRIBUTES (fndecl)))
   omp_declare_variant_finalize (fndecl, attr);
 
+  /* Complain if there's just no return statement.  */
+  if ((warn_return_type
+   || (cxx_dialect >= cxx14
+  && DECL_DECLARED_CONSTEXPR_P (fndecl)))
+  && !VOID_TYPE_P (TREE_TYPE (fntype))
+  && !dependent_type_p (TREE_TYPE (fntype))
+  && !current_function_returns_value && !current_function_returns_null
+  /* Don't complain if we abort or throw.  */
+  && !current_function_returns_abnormally
+  /* Don't complain if there's an infinite loop.  */
+  && !current_function_infinite_loop
+  /* Don't complain if we are declared noreturn.  */
+  && !TREE_THIS_VOLATILE (fndecl)
+  && !DECL_NAME (DECL_RESULT (fndecl))
+  && !TREE_NO_WARNING (fndecl)
+  /* Structor return values (if any) are set by the compiler.  */
+  && !DECL_CONSTRUCTOR_P (fndecl)
+  && !DECL_DESTRUCTOR_P (fndecl)
+  && targetm.warn_func_return (fndecl))
+{
+  gcc_rich_location richloc (input_location);
+  /* Potentially add a "return *this;" fix-it hint for
+assignment operators.  */
+  if (IDENTIFIER_ASSIGN_OP_P (DECL_NAME (fndecl)))
+   {
+ tree valtype = TREE_TYPE (DECL_RESULT (fndecl));
+ if (TREE_CODE (valtype) == REFERENCE_TYPE
+ && current_class_ref
+ && same_type_ignoring_top_level_qualifiers_p
+ (TREE_TYPE (valtype), TREE_TYPE (current_class_ref))
+ && global_dc->option_enabled (OPT_Wreturn_type,
+   global_dc->lang_mask,
+   global_dc->option_state))
+   add_return_star_this_fixit (&richloc, fndecl);
+   }
+  if (cxx_dialect >= cxx14
+ && DECL_DECLARED_CONSTEXPR_P (fndecl))
+   error_at (&richloc, "no return statement in % function "
+   "returning non-void");
+  else if (warning_at (&richloc, OPT_Wreturn_type,
+  "no return statement in function returning "
+  "non-void"))
+   TREE_NO_WARNING (fndecl) = 1;
+}
+
   /* Lambda closure members are implicitly constexpr if possible.  */
   if (cxx_dialect >= cxx17
   && LAMBDA_TYPE_P (CP_DECL_CONTEXT (fndecl)))
@@ -17163,44 +17208,6 @@ finish_function (bool inline_p)
  to the FUNCTION_DECL node itself.  */
   BLOCK_SUPERCONTEXT (DECL_INITIAL (fndecl)) = fndecl;
 
-  /* Complain if there's just no return statement.  */
-  if (warn_return_type
-  && !VOID_TYPE_P (TREE_TYPE (fntype))
-  && !dependent_type_p (TREE_TYPE (fntype))
-  && !current_function_returns_value && !current_function_returns_null
-  /* Don't complain if we abort or throw.  */
-  && !current_function_returns_abnormally
-  /* Don't complain if there's an infinite loop.  */
-  && !current_function_infinite_loop
-  /* Don't complain if we are declared noreturn.  */
-  && !TREE_THIS_VOLATILE (fndecl)
-  && !DECL_NAME (DECL_RESULT (fndecl))
-  && !TREE_NO_WARNING (fndecl)
-  /* Structor return values (if any) are set by the compiler.  */
-  

Re: [stage1][PATCH] Make TOPN counter dynamically allocated.

2020-07-31 Thread Martin Liška

On 6/3/20 8:28 AM, Martin Liška wrote:

On 6/2/20 3:19 PM, Martin Liška wrote:

I'm suggesting to pre-allocate 16 gcov_kvp in the gcov run-time library.
Please take a look at the attached patch. I also added a test-case that
stresses that. I've just finished LTO PGO bootstrap of the GCC.

Ready for master?


There's V2 of the patch:

- guard __atomic_fetch_add in GCOV_SUPPORTS_ATOMIC

Martin


There's V3 of the patch:
- TLS is used for in_recursion variable
- when we run out of statically allocated buffers, do not bail out

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin
>From 55d6422a16782153abeca70d6e7c6c62eacfe8eb Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Tue, 2 Jun 2020 13:31:48 +0200
Subject: [PATCH] libgcov: support overloaded malloc

gcc/ChangeLog:

	* gcov-io.h (GCOV_PREALLOCATED_KVP): New.
	* gcov-tool.c: Add __gcov_kvp_pool
	and __gcov_kvp_pool_index variables.

libgcc/ChangeLog:

	* libgcov-driver.c: Add __gcov_kvp_pool
	and __gcov_kvp_pool_index variables.
	* libgcov.h (allocate_gcov_kvp): New.
	(gcov_topn_add_value): Use it.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-prof/indir-call-prof-malloc.c: New test.
---
 gcc/gcov-io.h |  3 ++
 .../gcc.dg/tree-prof/indir-call-prof-malloc.c | 49 +++
 libgcc/libgcov-driver.c   |  6 +++
 libgcc/libgcov.h  | 49 ++-
 4 files changed, 105 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-malloc.c

diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index 940eb7d8561..4dba01c78ce 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -292,6 +292,9 @@ GCOV_COUNTERS
 /* Maximum number of tracked TOP N value profiles.  */
 #define GCOV_TOPN_MAXIMUM_TRACKED_VALUES 32
 
+/* Number of pre-allocated gcov_kvp structures.  */
+#define GCOV_PREALLOCATED_KVP 16
+
 /* Convert a counter index to a tag.  */
 #define GCOV_TAG_FOR_COUNTER(COUNT)\
 	(GCOV_TAG_COUNTER_BASE + ((gcov_unsigned_t)(COUNT) << 17))
diff --git a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-malloc.c b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-malloc.c
new file mode 100644
index 000..454e224c95f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-malloc.c
@@ -0,0 +1,49 @@
+/* { dg-options "-O2 -ldl" } */
+
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+
+int global;
+int global2;
+
+void report1 (size_t size)
+{
+  global++;
+}
+
+void report2 (size_t size)
+{
+  global2++;
+}
+
+typedef void (*tp) (size_t);
+static tp fns[] = {report1, report2};
+
+void* malloc(size_t size)
+{
+  static void* (*real_malloc)(size_t) = NULL;
+  if (!real_malloc)
+  real_malloc = dlsym(RTLD_NEXT, "malloc");
+
+  void *p = real_malloc (size);
+  fns[size % 2] (size);
+  // fprintf(stderr, "malloc(%d) = %p\n", size, p);
+  return p;
+}
+
+void *calloc (size_t n, size_t size)
+{
+  void *ptr = malloc (n * size);
+  __builtin_memset (ptr, 0, n * size);
+  return ptr;
+}
+
+void *ptr;
+
+int main()
+{
+  ptr = malloc (16);
+  return 0;
+}
diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c
index 2590593a58a..58914268d4e 100644
--- a/libgcc/libgcov-driver.c
+++ b/libgcc/libgcov-driver.c
@@ -588,6 +588,12 @@ struct gcov_root __gcov_root;
 struct gcov_master __gcov_master = 
   {GCOV_VERSION, 0};
 
+/* Pool of pre-allocated gcov_kvp strutures.  */
+struct gcov_kvp __gcov_kvp_pool[GCOV_PREALLOCATED_KVP];
+
+/* Index to first free gcov_kvp in the pool.  */
+unsigned __gcov_kvp_pool_index;
+
 void
 __gcov_exit (void)
 {
diff --git a/libgcc/libgcov.h b/libgcc/libgcov.h
index 81e18950a50..8be5bebcac0 100644
--- a/libgcc/libgcov.h
+++ b/libgcc/libgcov.h
@@ -250,6 +250,8 @@ struct indirect_call_tuple
   
 /* Exactly one of these will be active in the process.  */
 extern struct gcov_master __gcov_master;
+extern struct gcov_kvp __gcov_kvp_pool[GCOV_PREALLOCATED_KVP];
+extern unsigned __gcov_kvp_pool_index;
 
 /* Dump a set of gcov objects.  */
 extern void __gcov_dump_one (struct gcov_root *) ATTRIBUTE_HIDDEN;
@@ -402,6 +404,47 @@ gcov_counter_add (gcov_type *counter, gcov_type value,
 *counter += value;
 }
 
+/* Allocate gcov_kvp from heap.  If we are recursively called, then allocate
+   it from a list of pre-allocated pool.  */
+
+static inline struct gcov_kvp *
+allocate_gcov_kvp (void)
+{
+  struct gcov_kvp *new_node = NULL;
+
+  static
+#if defined(HAVE_CC_TLS)
+__thread
+#endif
+  volatile unsigned in_recursion ATTRIBUTE_UNUSED = 0;
+
+#if !defined(IN_GCOV_TOOL) && !defined(L_gcov_merge_topn)
+  if (__builtin_expect (in_recursion, 0))
+{
+  unsigned index;
+#if GCOV_SUPPORTS_ATOMIC
+  index
+	= __atomic_fetch_add (&__gcov_kvp_pool_index, 1, __ATOMIC_RELAXED);
+#else
+  index = __gcov_kvp_pool_index++;
+#endif
+  if (index < GCOV_PREALLOCATED_KVP)
+	new_node = &__gcov_kvp_pool[index];
+  else
+	/* Do not crash in the situation.  */
+	re

RE: [PATCH] AArch64: Fix hwasan failure in readline.

2020-07-31 Thread Tamar Christina
Hi Kyrill,

> -Original Message-
> From: Kyrylo Tkachov 
> Sent: Friday, July 31, 2020 8:47 AM
> To: Tamar Christina ; gcc-patches@gcc.gnu.org
> Cc: nd ; Richard Earnshaw ;
> Marcus Shawcroft ; Richard Sandiford
> 
> Subject: RE: [PATCH] AArch64: Fix hwasan failure in readline.
> 
> Hi Tamar
> 
> > -Original Message-
> > From: Tamar Christina 
> > Sent: 30 July 2020 09:28
> > To: gcc-patches@gcc.gnu.org
> > Cc: nd ; Richard Earnshaw ;
> > Marcus Shawcroft ; Kyrylo Tkachov
> > ; Richard Sandiford
> > 
> > Subject: [PATCH] AArch64: Fix hwasan failure in readline.
> >
> > Hi All,
> >
> > My previous fix added an unchecked call to fgets in the new function
> readline.
> > fgets can fail when there's an error reading the file in which case it
> > returns NULL.  It also returns NULL when the next character is EOF.
> >
> > The EOF case is already covered by the existing code but the error case 
> > isn't.
> > This fixes it by returning the empty string on error.
> >
> > Also I now use strnlen instead of strlen to make sure we never read
> > outside the buffer.
> >
> > This was flagged by Matthew Malcomson during his hwasan work.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master? And for backport with the other patches? (haven't done
> > backport yet.)
> 
> Code looks ok, but I'm wondering what kind of input triggered this. Now that
> we can exercise this code in the testsuite (thanks!) perhaps a new test is in
> order?

No code triggered this, this was HWAsan saying that there is a code-path that 
can
potentially cause a failure.  When the system call fails (i.e. corrupt file, 
file becomes
unavailable after you opened the stream etc) then the fgets call returns NULL,
but crucially the buffer is left in an indeterminate state.

This means that the call to strlen can fail due to the buffer not having a \0 
in it
and so you will read out of bounds in buf[last].

On it's own, the strlen -> strnlen change already covers this, but it would 
return a
partial string, which is not what we want on failure. On failure we want to 
abort
and so returning the empty string makes it ignore the entire block.

If you prefer we can also hard abort.  But I don't know how to emulate this 
behaviour
In the testsuite. I would need to be able to asynchronously make the file 
invalid after
the fopen call but during readline. 

Thanks,
Tamar

> 
> Thanks,
> Kyrill
> 
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > * config/aarch64/driver-aarch64.c (readline): Check return value
> > fgets.
> >
> > --


RE: [PATCH] AArch64: Fix hwasan failure in readline.

2020-07-31 Thread Kyrylo Tkachov
Hi Tamar,

> -Original Message-
> From: Tamar Christina 
> Sent: 31 July 2020 09:07
> To: Kyrylo Tkachov ; gcc-patches@gcc.gnu.org
> Cc: nd ; Richard Earnshaw ;
> Marcus Shawcroft ; Richard Sandiford
> 
> Subject: RE: [PATCH] AArch64: Fix hwasan failure in readline.
> 
> Hi Kyrill,
> 
> > -Original Message-
> > From: Kyrylo Tkachov 
> > Sent: Friday, July 31, 2020 8:47 AM
> > To: Tamar Christina ; gcc-
> patc...@gcc.gnu.org
> > Cc: nd ; Richard Earnshaw ;
> > Marcus Shawcroft ; Richard Sandiford
> > 
> > Subject: RE: [PATCH] AArch64: Fix hwasan failure in readline.
> >
> > Hi Tamar
> >
> > > -Original Message-
> > > From: Tamar Christina 
> > > Sent: 30 July 2020 09:28
> > > To: gcc-patches@gcc.gnu.org
> > > Cc: nd ; Richard Earnshaw
> ;
> > > Marcus Shawcroft ; Kyrylo Tkachov
> > > ; Richard Sandiford
> > > 
> > > Subject: [PATCH] AArch64: Fix hwasan failure in readline.
> > >
> > > Hi All,
> > >
> > > My previous fix added an unchecked call to fgets in the new function
> > readline.
> > > fgets can fail when there's an error reading the file in which case it
> > > returns NULL.  It also returns NULL when the next character is EOF.
> > >
> > > The EOF case is already covered by the existing code but the error case
> isn't.
> > > This fixes it by returning the empty string on error.
> > >
> > > Also I now use strnlen instead of strlen to make sure we never read
> > > outside the buffer.
> > >
> > > This was flagged by Matthew Malcomson during his hwasan work.
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > >
> > > Ok for master? And for backport with the other patches? (haven't done
> > > backport yet.)
> >
> > Code looks ok, but I'm wondering what kind of input triggered this. Now
> that
> > we can exercise this code in the testsuite (thanks!) perhaps a new test is 
> > in
> > order?
> 
> No code triggered this, this was HWAsan saying that there is a code-path
> that can
> potentially cause a failure.  When the system call fails (i.e. corrupt file, 
> file
> becomes
> unavailable after you opened the stream etc) then the fgets call returns NULL,
> but crucially the buffer is left in an indeterminate state.
> 
> This means that the call to strlen can fail due to the buffer not having a \0 
> in
> it
> and so you will read out of bounds in buf[last].
> 
> On it's own, the strlen -> strnlen change already covers this, but it would
> return a
> partial string, which is not what we want on failure. On failure we want to
> abort
> and so returning the empty string makes it ignore the entire block.

Ah, ok.

> 
> If you prefer we can also hard abort.  But I don't know how to emulate this
> behaviour
> In the testsuite. I would need to be able to asynchronously make the file
> invalid after
> the fopen call but during readline.
> 

No need, we should just bail out here.
This patch is okay.

Thanks,
Kyrill

> Thanks,
> Tamar
> 
> >
> > Thanks,
> > Kyrill
> >
> > >
> > > Thanks,
> > > Tamar
> > >
> > > gcc/ChangeLog:
> > >
> > >   * config/aarch64/driver-aarch64.c (readline): Check return value
> > > fgets.
> > >
> > > --


Re: [PATCH] c++: Add __builtin_bit_cast to implement std::bit_cast [PR93121]

2020-07-31 Thread Jakub Jelinek via Gcc-patches
On Thu, Jul 30, 2020 at 05:59:18PM -0400, Jason Merrill via Gcc-patches wrote:
> > Even if we are guaranteed that (what guarantees it?) when __builtin_bit_cast
> > is constexpr evaluated no initializer will be omitted if may be value 
> > initialized
> > to something other than all zeros,
> 
> This is established by digest_init/process_init_constructor_record, which
> replace implicit value-initialization with explicit values when it's not
> simple zero-initialization.

Ok, I see.

> > we still need to track what bits are well
> > defined and what are not (e.g. any padding in there).
> 
> > Thinking about it now, maybe the mask handling for !CONSTRUCTOR_NO_CLEARING
> > is incorrect though if there are missing initializers, because those omitted
> > initializers still could have paddings with unspecified content, right?
> 
> Zero-initialization (and thus trivial value-initialization) of a class also
> zeroes out padding bits, so when !CONSTRUCTOR_NO_CLEARING all bits should be
> well defined.

Does the standard require that somewhere?  Because that is not what the
compiler implements right now.
If I try:
extern "C" void *memcpy (void *, const void *, decltype (sizeof 0)) noexcept;
struct S { int a, b : 31, c; short d; signed char e; };
S a = S ();
constexpr S d = S ();

S
foo ()
{
  return S ();
}

void
bar (int *p)
{
  S b = foo ();
  S c = S ();
  memcpy (p, &a, sizeof (S));
  memcpy (p + 4, &b, sizeof (S));
  memcpy (p + 8, &c, sizeof (S));
  memcpy (p + 12, &d, sizeof (S));
}
then a will have the padding bit initialized iff it has a constant
initializer (as .data/.rodata initializers have all bits well defined),
but both foo, the copying of foo result to b and the initialization of c
all make the padding bit unspecified.

The gimplifier indeed has code that for !CONSTRUCTOR_NO_CLEARING
CONSTRUCTORs it will clear them fully first, but that only triggers if
the CONSTRUCTOR is characterized as incomplete, which is checked by whether
all the initializable fields have a corresponding initializer element,
so it doesn't count padding bits, in the middle or at the end of structures.

So, shall std::bit_cast be well defined if reading the padding bits
from these cases even when at runtime they would be undefined?  Or do we
need to change the gimplifier (and expansion etc.) for C++?

Talking about the cases where the types in the destination aren't unsigned
char/std::byte, that has some special rules and Marek has filed a PR for
those.

Jakub



Re: [stage1][PATCH] Make TOPN counter dynamically allocated.

2020-07-31 Thread Jan Hubicka
> On 6/3/20 8:28 AM, Martin Liška wrote:
> > On 6/2/20 3:19 PM, Martin Liška wrote:
> > > I'm suggesting to pre-allocate 16 gcov_kvp in the gcov run-time library.
> > > Please take a look at the attached patch. I also added a test-case that
> > > stresses that. I've just finished LTO PGO bootstrap of the GCC.
> > > 
> > > Ready for master?
> > 
> > There's V2 of the patch:
> > 
> > - guard __atomic_fetch_add in GCOV_SUPPORTS_ATOMIC
> > 
> > Martin
> 
> There's V3 of the patch:
> - TLS is used for in_recursion variable
> - when we run out of statically allocated buffers, do not bail out
> 
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> 
> Ready to be installed?
> diff --git a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-malloc.c 
> b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-malloc.c
> new file mode 100644
> index 000..454e224c95f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-malloc.c
> @@ -0,0 +1,49 @@
> +/* { dg-options "-O2 -ldl" } */

I think this needs to be restricted to targets that have dl library
Otherwise the patch looks good to me.
We may try to add the testcase doing division by all possible integers
in two threads or so to stress the race conditions in link list
management.

Thank you,
Honza
> +
> +#define _GNU_SOURCE
> +#include 
> +#include 
> +#include 
> +
> +int global;
> +int global2;
> +
> +void report1 (size_t size)
> +{
> +  global++;
> +}
> +
> +void report2 (size_t size)
> +{
> +  global2++;
> +}
> +
> +typedef void (*tp) (size_t);
> +static tp fns[] = {report1, report2};
> +
> +void* malloc(size_t size)
> +{
> +  static void* (*real_malloc)(size_t) = NULL;
> +  if (!real_malloc)
> +  real_malloc = dlsym(RTLD_NEXT, "malloc");
> +
> +  void *p = real_malloc (size);
> +  fns[size % 2] (size);
> +  // fprintf(stderr, "malloc(%d) = %p\n", size, p);
> +  return p;
> +}
> +
> +void *calloc (size_t n, size_t size)
> +{
> +  void *ptr = malloc (n * size);
> +  __builtin_memset (ptr, 0, n * size);
> +  return ptr;
> +}
> +
> +void *ptr;
> +
> +int main()
> +{
> +  ptr = malloc (16);
> +  return 0;
> +}
> diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c
> index 2590593a58a..58914268d4e 100644
> --- a/libgcc/libgcov-driver.c
> +++ b/libgcc/libgcov-driver.c
> @@ -588,6 +588,12 @@ struct gcov_root __gcov_root;
>  struct gcov_master __gcov_master = 
>{GCOV_VERSION, 0};
>  
> +/* Pool of pre-allocated gcov_kvp strutures.  */
> +struct gcov_kvp __gcov_kvp_pool[GCOV_PREALLOCATED_KVP];
> +
> +/* Index to first free gcov_kvp in the pool.  */
> +unsigned __gcov_kvp_pool_index;
> +
>  void
>  __gcov_exit (void)
>  {
> diff --git a/libgcc/libgcov.h b/libgcc/libgcov.h
> index 81e18950a50..8be5bebcac0 100644
> --- a/libgcc/libgcov.h
> +++ b/libgcc/libgcov.h
> @@ -250,6 +250,8 @@ struct indirect_call_tuple
>
>  /* Exactly one of these will be active in the process.  */
>  extern struct gcov_master __gcov_master;
> +extern struct gcov_kvp __gcov_kvp_pool[GCOV_PREALLOCATED_KVP];
> +extern unsigned __gcov_kvp_pool_index;
>  
>  /* Dump a set of gcov objects.  */
>  extern void __gcov_dump_one (struct gcov_root *) ATTRIBUTE_HIDDEN;
> @@ -402,6 +404,47 @@ gcov_counter_add (gcov_type *counter, gcov_type value,
>  *counter += value;
>  }
>  
> +/* Allocate gcov_kvp from heap.  If we are recursively called, then allocate
> +   it from a list of pre-allocated pool.  */
> +
> +static inline struct gcov_kvp *
> +allocate_gcov_kvp (void)
> +{
> +  struct gcov_kvp *new_node = NULL;
> +
> +  static
> +#if defined(HAVE_CC_TLS)
> +__thread
> +#endif
> +  volatile unsigned in_recursion ATTRIBUTE_UNUSED = 0;
> +
> +#if !defined(IN_GCOV_TOOL) && !defined(L_gcov_merge_topn)
> +  if (__builtin_expect (in_recursion, 0))
> +{
> +  unsigned index;
> +#if GCOV_SUPPORTS_ATOMIC
> +  index
> + = __atomic_fetch_add (&__gcov_kvp_pool_index, 1, __ATOMIC_RELAXED);
> +#else
> +  index = __gcov_kvp_pool_index++;
> +#endif
> +  if (index < GCOV_PREALLOCATED_KVP)
> + new_node = &__gcov_kvp_pool[index];
> +  else
> + /* Do not crash in the situation.  */
> + return NULL;
> +}
> +  else
> +#endif
> +{
> +  in_recursion = 1;
> +  new_node = (struct gcov_kvp *)xcalloc (1, sizeof (struct gcov_kvp));
> +  in_recursion = 0;
> +}
> +
> +  return new_node;
> +}
> +
>  /* Add key value pair VALUE:COUNT to a top N COUNTERS.  When INCREMENT_TOTAL
> is true, add COUNT to total of the TOP counter.  If USE_ATOMIC is true,
> do it in atomic way.  */
> @@ -443,8 +486,10 @@ gcov_topn_add_value (gcov_type *counters, gcov_type 
> value, gcov_type count,
>  }
>else
>  {
> -  struct gcov_kvp *new_node
> - = (struct gcov_kvp *)xcalloc (1, sizeof (struct gcov_kvp));
> +  struct gcov_kvp *new_node = allocate_gcov_kvp ();
> +  if (new_node == NULL)
> + return;
> +
>new_node->value = value;
>new_node->count = count;
>  
> -- 
> 2.27.0
> 



Re: [stage1][PATCH] Make TOPN counter dynamically allocated.

2020-07-31 Thread Martin Liška

On 7/31/20 10:47 AM, Jan Hubicka wrote:

I think this needs to be restricted to targets that have dl library


Hm, I can't find a dg-require that is used for similar test-cases.

Martin


Re: [PATCH PR94442] [AArch64] Redundant ldp/stp instructions emitted at -O3

2020-07-31 Thread Richard Sandiford
xiezhiheng  writes:
>> -Original Message-
>> From: Richard Sandiford [mailto:richard.sandif...@arm.com]
>> Sent: Friday, July 17, 2020 5:04 PM
>> To: xiezhiheng 
>> Cc: Richard Biener ; gcc-patches@gcc.gnu.org
>> Subject: Re: [PATCH PR94442] [AArch64] Redundant ldp/stp instructions
>> emitted at -O3
>>
>
> Cut...
>
>> 
>> Thanks, pushed to master.
>> 
>> Richard
>
> And I have finished the second part.
>
> In function aarch64_general_add_builtin, I add an argument ATTRS to
> pass attributes for each built-in function.
>
> And some new functions are added:
> aarch64_call_properties: return flags for each built-in function based
> on command-line options.  When the built-in function handles
> floating-points, add FLAG_FP flag.
>
> aarch64_modifies_global_state_p: True if the function would modify
> global states.
>
> aarch64_reads_global_state_p: True if the function would read
> global states.
>
> aarch64_could_trap_p: True if the function would raise a signal.
>
> aarch64_add_attribute: Add attributes in ATTRS.
>
> aarch64_get_attributes: return attributes for each built-in functons
> based on flags and command-line options.
>
> In function aarch64_init_simd_builtins, attributes are get by flags
> and pass them to function aarch64_general_add_builtin.
>
>
> Bootstrap is tested OK on aarch64 Linux platform, but regression
> FAIL one test case  pr93423.f90.
> However, I found that this test case would fail randomly in trunk.
>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93423
>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96041
> Some PRs have tracked it.  After my patch, this test case would
> always fail.  I guess the syntax errors in fortran crash some structures
> result in illegal memory access but I can't find what exactly it is.
> But I think my patch should have no influence on it.

Yeah, I agree.  And FWIW, I didn't see this in my testing.

I've pushed the patch with one trivial change: to remove the “and”
before “CODE” in:

>  /* Wrapper around add_builtin_function.  NAME is the name of the built-in
> function, TYPE is the function type, and CODE is the function subcode
> -   (relative to AARCH64_BUILTIN_GENERAL).  */
> +   (relative to AARCH64_BUILTIN_GENERAL), and ATTRS is the function
> +   attributes.  */

BTW, one thing to be careful of in future is that not all FP intrinsics
raise FP exceptions.  So while:

> +  switch (d->mode)
> +{
> +/* Floating-point.  */
> +case E_BFmode:
> +case E_V4BFmode:
> +case E_V8BFmode:
> +case E_HFmode:
> +case E_V4HFmode:
> +case E_V8HFmode:
> +case E_SFmode:
> +case E_V2SFmode:
> +case E_V4SFmode:
> +case E_DFmode:
> +case E_V1DFmode:
> +case E_V2DFmode:
> +  flags |= FLAG_FP;
> +  break;
> +
> +default:
> +  break;
> +}

is a good, conservatively-correct default, we might need an additional
flag to suppress it for certain intrinsics.

I've just realised that the code above could have used FLOAT_MODE_P,
but I didn't think of that before pushing the patch :-)

Thanks,
Richard


Re: [PATCH PR95696] regrename creates overlapping register allocations for vliw

2020-07-31 Thread Richard Sandiford
Thanks for the update, looks good.  Could you post a changelog too
so that I can use it when committing?

The changelog was the only reason I didn't just push the patch,
but FWIW, a couple of very minor things…

Zhongyunde  writes:
> diff --git a/gcc/regrename.c b/gcc/regrename.c
> old mode 100644
> new mode 100755
> index 637b3cbe6d7..815ed22805d
> --- a/gcc/regrename.c
> +++ b/gcc/regrename.c
> @@ -684,10 +684,12 @@ merge_chains (du_head_p c1, du_head_p c2)
>c1->cannot_rename |= c2->cannot_rename;
>  }
>  
> -/* Analyze the current function and build chains for renaming.  */
> +/* Analyze the current function and build chains for renaming.
> +   If INCLUDE_ALL_BLOCKS_P is set to true, should process all blocks,
> +   ignoring BB_DISABLE_SCHEDULE.  The default value is true.  */

I think s/should// here, since GCC comments usually use an imperative
style.

> @@ -737,6 +739,14 @@ regrename_analyze (bitmap bb_mask)
>if (dump_file)
>   fprintf (dump_file, "\nprocessing block %d:\n", bb1->index);
>  
> +  if (!include_all_block_p && (bb1->flags & BB_DISABLE_SCHEDULE) != 0)
> + {
> +   if (dump_file)
> + fprintf (dump_file, "avoid disrupting the sms schedule of bb %d\n",
> +  bb1->index);

bb1->index should be indented below “dump_file”.

Richard


Re: [PATCH] Do not allocate huge array in output_in_order.

2020-07-31 Thread Jan Hubicka
> We noticed that when analyzing LTRANS memory peak that happens right
> after the start:
> https://gist.github.com/marxin/223890df4d8d8e490b6b2918b77dacad
> 
> In case of chrome, we have symtab->order == 200M, so we allocate
> 16B * 200M = 3.2GB.

200M is very large number even for overall number of symbols (and close
to overflow), so I guess we want to start adding overflow checks and
turn some stuff to 64bit...

There are many unnecesary orders assigned, so I will take care of this
first.
> 
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> 
> Ready to be installed?
> Thanks,
> Martin
> 
> gcc/ChangeLog:
> 
>   * cgraph.h: Remove leading empty lines.
>   * cgraphunit.c (enum cgraph_order_sort_kind): Remove
>   ORDER_UNDEFINED.
>   (struct cgraph_order_sort): Add constructors.
>   (cgraph_order_sort::process): New.
>   (cgraph_order_cmp): New.
>   (output_in_order): Simplify and push nodes to vector.

OK

Honza


Re: [PATCH] Aarch64: Add missing clobber for fjcvtzs

2020-07-31 Thread Richard Sandiford
Andrea Corallo  writes:
> Hi all,
>
> I'd like to submit the following patch that adds a missing CC reg
> clobber for the FJCVTZS instruction [1].
>
> The patch adds an executing testcase that fails without the clobber.
>
> Bootstrapped on aarch64-linux-gnu, regression ongoing.
>
> Okay for trunk when finished testing?
>
>   Andrea
>
> [1] 
> https://developer.arm.com/docs/ddi0596/latest/simd-and-floating-point-instructions-alphabetic-order/fjcvtzs-floating-point-javascript-convert-to-signed-fixed-point-rounding-toward-zero
>
> gcc/ChangeLog
>
> 2020-07-30  Andrea Corallo  
>
>   * config/aarch64/aarch64.md (aarch64_fjcvtzs): Add missing
>   clobber.
>
> gcc/testsuite/ChangeLog
>
> 2020-07-30  Andrea Corallo  
>
>   * gcc.target/aarch64/acle/jcvt_2.c: New testcase.
>   * lib/target-supports.exp
>   (check_effective_target_aarch64_fjcvtzs_hw): Add new check for
>   FJCVTZS hw.

OK, thanks.  Also OK for any branches that need it.

As a follow-up, it might be nice at some point to have a pattern that
takes advantage of the Z flag result, but that's obviously much less
important than fixing the bug.

Richard


Re: [PATCH] c++: Add __builtin_bit_cast to implement std::bit_cast [PR93121]

2020-07-31 Thread Jonathan Wakely via Gcc-patches

On 31/07/20 10:19 +0200, Jakub Jelinek wrote:

On Thu, Jul 30, 2020 at 05:59:18PM -0400, Jason Merrill via Gcc-patches wrote:

> Even if we are guaranteed that (what guarantees it?) when __builtin_bit_cast
> is constexpr evaluated no initializer will be omitted if may be value 
initialized
> to something other than all zeros,

This is established by digest_init/process_init_constructor_record, which
replace implicit value-initialization with explicit values when it's not
simple zero-initialization.


Ok, I see.


> we still need to track what bits are well
> defined and what are not (e.g. any padding in there).

> Thinking about it now, maybe the mask handling for !CONSTRUCTOR_NO_CLEARING
> is incorrect though if there are missing initializers, because those omitted
> initializers still could have paddings with unspecified content, right?

Zero-initialization (and thus trivial value-initialization) of a class also
zeroes out padding bits, so when !CONSTRUCTOR_NO_CLEARING all bits should be
well defined.


Does the standard require that somewhere?  Because that is not what the
compiler implements right now.


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78620


If I try:
extern "C" void *memcpy (void *, const void *, decltype (sizeof 0)) noexcept;
struct S { int a, b : 31, c; short d; signed char e; };
S a = S ();
constexpr S d = S ();

S
foo ()
{
 return S ();
}

void
bar (int *p)
{
 S b = foo ();
 S c = S ();
 memcpy (p, &a, sizeof (S));
 memcpy (p + 4, &b, sizeof (S));
 memcpy (p + 8, &c, sizeof (S));
 memcpy (p + 12, &d, sizeof (S));
}
then a will have the padding bit initialized iff it has a constant
initializer (as .data/.rodata initializers have all bits well defined),
but both foo, the copying of foo result to b and the initialization of c
all make the padding bit unspecified.

The gimplifier indeed has code that for !CONSTRUCTOR_NO_CLEARING
CONSTRUCTORs it will clear them fully first, but that only triggers if
the CONSTRUCTOR is characterized as incomplete, which is checked by whether
all the initializable fields have a corresponding initializer element,
so it doesn't count padding bits, in the middle or at the end of structures.

So, shall std::bit_cast be well defined if reading the padding bits
from these cases even when at runtime they would be undefined?  Or do we
need to change the gimplifier (and expansion etc.) for C++?

Talking about the cases where the types in the destination aren't unsigned
char/std::byte, that has some special rules and Marek has filed a PR for
those.

Jakub




Re: [PATCH] c++: Add __builtin_bit_cast to implement std::bit_cast [PR93121]

2020-07-31 Thread Jakub Jelinek via Gcc-patches
On Fri, Jul 31, 2020 at 10:54:46AM +0100, Jonathan Wakely wrote:
> > Does the standard require that somewhere?  Because that is not what the
> > compiler implements right now.
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78620

Ah, ok.
But does that imply that all CONSTRUCTORs without CONSTRUCTOR_NO_CLEARING
need to be treated that way?  I mean, aren't such CONSTRUCTORs used also for
other initializations?
And, are the default copy constructors or assignment operators supposed to
also copy the padding bits, or do they become unspecified again through
that?

Jakub



[PATCH] SLP: support entire BB.

2020-07-31 Thread Martin Liška

Hey.

Motivation of the patch is to allow vectorization of an entire BB.
So far we bail out a sub-BB region when we reach a stmt for which
vect_find_stmt_data_reference returns false. That's replaced with
recoding of groups of the data references.

We can newly vectorize code like:

void foo();
void bar (int i, double *a, double *b)
{
  double tem1 = a[2*i];
  double tem2 = 2*a[2*i+1];
  foo ();
  b[2*i] = 2*tem1;
  b[2*i+1] = tem2;
}

into:
   [local count: 1073741824]:
  _1 = i_12(D) * 2;
  _2 = (long unsigned int) _1;
  _3 = _2 * 8;
  _4 = a_13(D) + _3;
  vect_tem1_15.5_24 = MEM  [(double *)_4];
  vect__10.6_25 = vect_tem1_15.5_24 * { 2.0e+0, 2.0e+0 };
  foo ();
  _9 = b_18(D) + _3;
  MEM  [(double *)_9] = vect__10.6_25;
  return;

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Thoughs?
Martin

gcc/ChangeLog:

* tree-vect-data-refs.c (dr_group_sort_cmp): Work on
data_ref_pair.
(vect_analyze_data_ref_accesses): Work on groups.
(vect_find_stmt_data_reference): Add group_id argument and fill
up dataref_groups vector.
* tree-vect-loop.c (vect_get_datarefs_in_loop): Pass new
arguments.
(vect_analyze_loop_2): Likewise.
* tree-vect-slp.c (vect_slp_analyze_bb_1): Pass argument.
(vect_slp_bb_region): Likewise.
(vect_slp_region): Likewise.
(vect_slp_bb):Work on the entire BB.
* tree-vectorizer.h (vect_analyze_data_ref_accesses): Add new
argument.
(vect_find_stmt_data_reference): Likewise.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/bb-slp-38.c: Adjust pattern as now we only process
a single vectorization and now 2 partial.
* gcc.dg/vect/bb-slp-45.c: New test.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-38.c |  2 +-
 gcc/testsuite/gcc.dg/vect/bb-slp-45.c | 36 
 gcc/tree-vect-data-refs.c | 63 +---
 gcc/tree-vect-loop.c  |  5 +-
 gcc/tree-vect-slp.c   | 82 ++-
 gcc/tree-vectorizer.h |  5 +-
 6 files changed, 118 insertions(+), 75 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-45.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c
index 59aec54fffd..9c57ea3c2c1 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c
@@ -41,4 +41,4 @@ int main()
 }
 
 /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm } } } */

-/* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" { 
target vect_perm } } } */
+/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" { target 
vect_perm } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-45.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c
new file mode 100644
index 000..4107a34cb93
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c
@@ -0,0 +1,36 @@
+/* { dg-require-effective-target vect_double } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+double a[8], b[8];
+int x;
+
+void __attribute__((noinline,noclone))
+bar (void)
+{
+  x = 1;
+}
+
+void __attribute__((noinline,noclone))
+foo(int i)
+{
+  double tem1 = a[2*i];
+  double tem2 = 2*a[2*i+1];
+  bar ();
+  b[2*i] = 2*tem1;
+  b[2*i+1] = tem2;
+}
+
+int main()
+{
+  int i;
+  check_vect ();
+  for (i = 0; i < 8; ++i)
+b[i] = i;
+  foo (2);
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index a78ae61d1b0..36e10ff3619 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -2757,14 +2757,18 @@ vect_analyze_data_ref_access (vec_info *vinfo, 
dr_vec_info *dr_info)
   return vect_analyze_group_access (vinfo, dr_info);
 }
 
+typedef std::pair data_ref_pair;

+
 /* Compare two data-references DRA and DRB to group them into chunks
suitable for grouping.  */
 
 static int

 dr_group_sort_cmp (const void *dra_, const void *drb_)
 {
-  data_reference_p dra = *(data_reference_p *)const_cast(dra_);
-  data_reference_p drb = *(data_reference_p *)const_cast(drb_);
+  data_ref_pair dra_pair = *(data_ref_pair *)const_cast(dra_);
+  data_ref_pair drb_pair = *(data_ref_pair *)const_cast(drb_);
+  data_reference_p dra = dra_pair.first;
+  data_reference_p drb = drb_pair.first;
   int cmp;
 
   /* Stabilize sort.  */

@@ -2772,10 +2776,13 @@ dr_group_sort_cmp (const void *dra_, const void *drb_)
 return 0;
 
   /* DRs in different loops never belong to the same group.  */

-  loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
-  loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
-  if (loopa != loopb)
-return loopa->num < loopb->num ? -1 : 1;
+  int bb_index1 = gimple_bb (DR_STMT (dra))->index;
+  int bb_index2 = gimple_bb (DR_STMT (drb))->index;
+  if (bb_index1 != bb_index2)
+return bb_index1 < bb_index2 ? -1 : 1;
+
+  if (dra_pair.second != dr

[PATCH] c: Fix bogus vector initialisation error [PR96377]

2020-07-31 Thread Richard Sandiford
One of the problems in this PR was that if we had:

  vector_type1 array[] = { vector_value1 };

process_init_element would only treat vector_value1 as initialising
a vector_type1 if they had the same TYPE_MAIN_VARIANT.  This has
several problems:

(1) It gives confusing error messages if the vector types are
incompatible.  (Tested by gcc.dg/pr96377-1.c.)

(2) It means that we reject code that should be valid with
-flax-vector-conversions.  (Tested by gcc.dg/pr96377-2.c.)

(3) On arm and aarch64 targets, it means that we reject some
initializers that mix Advanced SIMD and standard GNU vectors.
These vectors have traditionally had different TYPE_MAIN_VARIANTs
because they have different mangling schemes.  (Tested by
gcc.dg/pr96377-[3-6].c.)

(4) It means that we reject SVE initializers that should be valid.
(Tested by gcc.target/aarch64/sve/gnu_vectors_[34].c.)

(5) After r11-1741-g:31427b974ed7b7dd54e2 we reject:

  arm_neon_type1 array[] = { k ^ arm_neon_value1 };

because applying the binary operator to arm_neon_value1 strips
the "Advanced SIMD type" attributes that were added in that patch.
Stripping the attributes is problematic for other reasons though,
so that still needs to be fixed separately.

g++.target/aarch64/sve/gnu_vectors_[34].C already pass.

Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.
OK to instal?

Richard


gcc/c/
PR c/96377
* c-typeck.c (process_init_element): Split test for whether to
recurse into a record, union or array into...
(initialize_elementwise_p): ...this new function.  Don't recurse
into a vector type if the initialization value is also a vector.

gcc/testsuite/
PR c/96377
* gcc.dg/pr96377-1.c: New test.
* gcc.dg/pr96377-2.c: Likewise.
* gcc.dg/pr96377-3.c: Likewise.
* gcc.dg/pr96377-4.c: Likewise.
* gcc.dg/pr96377-5.c: Likewise.
* gcc.dg/pr96377-6.c: Likewise.
* gcc.target/aarch64/pr96377-1.c: Likewise.
* gcc.target/aarch64/sve/acle/general-c/gnu_vectors_3.c: Likewise.
* gcc.target/aarch64/sve/acle/general-c/gnu_vectors_4.c: Likewise.
* g++.target/aarch64/sve/acle/general-c++/gnu_vectors_3.C: Likewise.
* g++.target/aarch64/sve/acle/general-c++/gnu_vectors_4.C: Likewise.
---
 gcc/c/c-typeck.c  | 59 ++-
 .../sve/acle/general-c++/gnu_vectors_3.C  | 15 +
 .../sve/acle/general-c++/gnu_vectors_4.C  | 15 +
 gcc/testsuite/gcc.dg/pr96377-1.c  | 32 ++
 gcc/testsuite/gcc.dg/pr96377-2.c  | 31 ++
 gcc/testsuite/gcc.dg/pr96377-3.c  | 33 +++
 gcc/testsuite/gcc.dg/pr96377-4.c  | 32 ++
 gcc/testsuite/gcc.dg/pr96377-5.c  | 33 +++
 gcc/testsuite/gcc.dg/pr96377-6.c  | 32 ++
 gcc/testsuite/gcc.target/aarch64/pr96377-1.c  | 20 +++
 .../sve/acle/general-c/gnu_vectors_3.c| 15 +
 .../sve/acle/general-c/gnu_vectors_4.c| 15 +
 12 files changed, 317 insertions(+), 15 deletions(-)
 create mode 100644 
gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/gnu_vectors_3.C
 create mode 100644 
gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/gnu_vectors_4.C
 create mode 100644 gcc/testsuite/gcc.dg/pr96377-1.c
 create mode 100644 gcc/testsuite/gcc.dg/pr96377-2.c
 create mode 100644 gcc/testsuite/gcc.dg/pr96377-3.c
 create mode 100644 gcc/testsuite/gcc.dg/pr96377-4.c
 create mode 100644 gcc/testsuite/gcc.dg/pr96377-5.c
 create mode 100644 gcc/testsuite/gcc.dg/pr96377-6.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr96377-1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_3.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_4.c

diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index fb5c288b549..0d639b60ea3 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -9956,6 +9956,47 @@ output_pending_init_elements (int all, struct obstack * 
braced_init_obstack)
   goto retry;
 }
 
+/* Expression VALUE coincides with the start of type TYPE in a braced
+   initializer.  Return true if we should treat VALUE as initializing
+   the first element of TYPE, false if we should treat it as initializing
+   TYPE as a whole.
+
+   If the initializer is clearly invalid, the question becomes:
+   which choice gives the best error message?  */
+
+static bool
+initialize_elementwise_p (tree type, tree value)
+{
+  if (type == error_mark_node || value == error_mark_node)
+return false;
+
+  gcc_checking_assert (TYPE_MAIN_VARIANT (type) == type);
+
+  tree value_type = TREE_TYPE (value);
+  if (value_type == error_mark_node)
+return false;
+
+  /* GNU vectors can be initialized elementwise.  However, treat any
+ kind of vector value as initializing the vector type as a whole,
+ regardless of whether the value is a GNU

Re: Refactor peel_iters_{pro,epi}logue cost model handlings

2020-07-31 Thread Richard Sandiford
"Kewen.Lin"  writes:
> Hi Richard,
>
> on 2020/7/27 下午9:10, Richard Sandiford wrote:
>> "Kewen.Lin"  writes:
>>> Hi,
>>>
>>> As Richard S. suggested in the thread:
>>>
>>> https://gcc.gnu.org/pipermail/gcc-patches/2020-July/550633.html
>>>
>>> this patch is separated from the one of that thread, mainly to refactor the
>>> existing peel_iters_{pro,epi}logue cost model handlings.
>>>
>>> I've addressed Richard S.'s review comments there, moreover because of one
>>> failure of aarch64 testing, I updated it a bit more to keep the logic 
>>> unchanged
>>> as before first (refactor_cost.diff).
>> 
>> Heh, nice when a clean-up exposes an existing bug. ;-)  I agree the
>> updates look correct.  E.g. if vf is 1, we should assume that there
>> are no peeled iterations even if the number of iterations is unknown.
>> 
>> Both patches are OK with some very minor nits (sorry):
>
> Thanks for the comments!  I've addressed them and commit refactor_cost.diff
> via r11-2378.
>
> For adjust.diff, it works well on powerpc64le-linux-gnu (bootstrap/regtest),
> but got one failure on aarch64 as below:
>
> PASS->FAIL: gcc.target/aarch64/sve/cost_model_2.c -march=armv8.2-a+sve  
> scan-assembler-times \\tst1w\\tz 1
>
> I spent some time investigating it and thought it's expected.  As you 
> mentioned
> above, the patch can fix the cases with VF 1, the VF of this case is exactly 
> one.  :)
>
> Without the proposed adjustment, the cost for V16QI looks like:
>
>   Vector inside of loop cost: 1
>   Vector prologue cost: 4
>   Vector epilogue cost: 3
>   Scalar iteration cost: 4
>   Scalar outside cost: 6
>   Vector outside cost: 7
>   prologue iterations: 0
>   epilogue iterations: 0
>
> After the change, the cost becomes to:
>
> Vector inside of loop cost: 1
> Vector prologue cost: 1
> Vector epilogue cost: 0
> Scalar iteration cost: 4
> Scalar outside cost: 6
> Vector outside cost: 1
> prologue iterations: 0
> epilogue iterations: 0
>
> which outperforms VNx16QI that isn't affected by this patch with partial 
> vectors.

Hmm.  V16QI outperforming VNx16QI is kind-of bogus in this case,
since the store is a full-vector store for both Advanced SIMD and
128-bit SVE.  But we do currently still generate the SVE loop in
a more general form (since doing otherwise introduces other problems)
so I guess the result is self-consistent.

More importantly, even if the costs were equal, V16QI would still
win for 128-bit SVE.

So yeah, while I had my doubts at first, I agree this is the right fix.

> gcc/ChangeLog:
>
>   * tree-vect-loop.c (vect_get_known_peeling_cost): Don't consider branch
>   taken costs for prologue and epilogue if they don't exist.
>   (vect_estimate_min_profitable_iters): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/aarch64/sve/cost_model_2.c: Adjust due to cost model
>   change.

OK, thanks.

Richard


Re: [PATCH v4] vect/rs6000: Support vector with length cost modeling

2020-07-31 Thread Richard Sandiford
"Kewen.Lin"  writes:
>>> +  bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
>>> +  bool need_iterate_p
>>> +   = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)
>>> +  && !vect_known_niters_smaller_than_vf (loop_vinfo));
>>> +
>>> +  /* Init min/max, shift and minus cost relative to single
>>> +scalar_stmt. For now we only use length-based partial vectors on
>>> +Power, target specific cost tweaking may be needed for other
>>> +ports in future.  */
>>> +  unsigned int min_max_cost = 2;
>>> +  unsigned int shift_cost = 1, minus_cost = 1;
>> 
>> Please instead add a scalar_min_max to vect_cost_for_stmt, and use
>> scalar_stmt for shift and minus.  There shouldn't be any Power things
>> hard-coded into target-independent code.
>> 
>
> Agree!  It's not good to leave them there.  I thought to wait and see
> if other targets which support vector with length can reuse this, or
> move it to target specific codes then if not sharable.  But anyway
> it looks not good, let's fix it.
>
> I had some concerns on vect_cost_for_stmt way, since it seems to allow
> more computations similar to min/max to be added like this, in long
> term it probably leads to the situtation like: scalar_min_max,
> scalar_div_expr, scalar_mul_expr ... an enum (cost types) bloat, it
> seems not good to maintain.

I guess doing that doesn't seem so bad to me :-)  I think it's been
a recurring problem that the current classification isn't fine-grained
enough for some cases.

> I noticed that i386 port ix86_add_stmt_cost will check stmt_info->stmt,
> whether is assignment and the subcode of the expression, it provides the
> chance to check the statement more fine-grain, not just as normal
> scalar_stmt/vector_stmt.
>
> For the case here, we don't have the stmt_info, but we know the type
> of computation(expression), how about to extend the hook add_stmt_cost
> with one extra tree_code type argument, by default it can be some
> unmeaningful code, for some needs like here, we specify the tree_code
> as the code of computation, like {MIN,MAX}_EXPR, then target specific
> add_stmt_cost can check this tree_code and adjust the cost accordingly.

If we do that, I guess we should “promote” code_helper out of
gimple-match.h and use that instead, so that we can handle
internal and built-in functions too.

Would like to hear Richard's opinion on the best way forward here.

Thanks,
Richard


Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Richard Sandiford
Marc Glisse  writes:
> +/* (c ? a : b) op (c ? d : e)  -->  c ? (a op d) : (b op e) */
> + (simplify
> +  (op (vec_cond:s @0 @1 @2) (vec_cond:s @0 @3 @4))
> +  (with
> +   {
> + tree rhs1, rhs2 = NULL;
> + rhs1 = fold_binary (op, type, @1, @3);
> + if (rhs1 && is_gimple_val (rhs1))
> +   rhs2 = fold_binary (op, type, @2, @4);
> +   }
> +   (if (rhs2 && is_gimple_val (rhs2))
> +(vec_cond @0 { rhs1; } { rhs2; })
> +#endif

This one looks dangerous for potentially-trapping ops.

> +/* (v ? w : 0) ? a : b is just (v & w) ? a : b  */
> +(simplify
> + (vec_cond (vec_cond:s @0 @3 integer_zerop) @1 @2)
> + (vec_cond (bit_and @0 @3) @1 @2))

Does something check automatically that @0 and @3 have compatible types?
Same question for the later folds.

Thanks,
Richard


Re: [PATCH v4] vect/rs6000: Support vector with length cost modeling

2020-07-31 Thread Richard Biener via Gcc-patches
On Fri, Jul 31, 2020 at 1:03 PM Richard Sandiford
 wrote:
>
> "Kewen.Lin"  writes:
> >>> +  bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
> >>> +  bool need_iterate_p
> >>> +   = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)
> >>> +  && !vect_known_niters_smaller_than_vf (loop_vinfo));
> >>> +
> >>> +  /* Init min/max, shift and minus cost relative to single
> >>> +scalar_stmt. For now we only use length-based partial vectors on
> >>> +Power, target specific cost tweaking may be needed for other
> >>> +ports in future.  */
> >>> +  unsigned int min_max_cost = 2;
> >>> +  unsigned int shift_cost = 1, minus_cost = 1;
> >>
> >> Please instead add a scalar_min_max to vect_cost_for_stmt, and use
> >> scalar_stmt for shift and minus.  There shouldn't be any Power things
> >> hard-coded into target-independent code.
> >>
> >
> > Agree!  It's not good to leave them there.  I thought to wait and see
> > if other targets which support vector with length can reuse this, or
> > move it to target specific codes then if not sharable.  But anyway
> > it looks not good, let's fix it.

In other generic places like this we simply use three generic scalar_stmt
costs.  At least I don't see how min_max should be different from it
when shift can be the same as minus.  Note this is also how we treat
vectorization of MAX_EXPR - scalar cost is one scalar_stmt and
vector cost is one vector_stmt.  As you say below the add_stmt_cost
hook can adjust based on the actual GIMPLE stmt -- if one is
available (which indeed it isn't here).

I'm somewhat lacking context here as well - we actually GIMPLE
code-generate the min/max/shift/minus and only the eventual
AND is defered to the target, right?

> > I had some concerns on vect_cost_for_stmt way, since it seems to allow
> > more computations similar to min/max to be added like this, in long
> > term it probably leads to the situtation like: scalar_min_max,
> > scalar_div_expr, scalar_mul_expr ... an enum (cost types) bloat, it
> > seems not good to maintain.
>
> I guess doing that doesn't seem so bad to me :-)  I think it's been
> a recurring problem that the current classification isn't fine-grained
> enough for some cases.

But we eventually want to get rid of this classification enum in favor
of the add_stmt_cost hook ...

> > I noticed that i386 port ix86_add_stmt_cost will check stmt_info->stmt,
> > whether is assignment and the subcode of the expression, it provides the
> > chance to check the statement more fine-grain, not just as normal
> > scalar_stmt/vector_stmt.
> >
> > For the case here, we don't have the stmt_info, but we know the type
> > of computation(expression), how about to extend the hook add_stmt_cost
> > with one extra tree_code type argument, by default it can be some
> > unmeaningful code, for some needs like here, we specify the tree_code
> > as the code of computation, like {MIN,MAX}_EXPR, then target specific
> > add_stmt_cost can check this tree_code and adjust the cost accordingly.
>
> If we do that, I guess we should “promote” code_helper out of
> gimple-match.h and use that instead, so that we can handle
> internal and built-in functions too.
>
> Would like to hear Richard's opinion on the best way forward here.

I'd say defer this to a later patch and for now simply cost one scalar
stmt for MIN/MAX.  I agree that if we add a tree_code we want a
code_helper instead.  Note that I want to eventually have a
full SLP tree for the final code generation where all info should be
there (including SLP nodes for those min/max ops) and which the
target could traverse.  But I'm not sure if I can make enough progress
on that SLP-only thing for GCC 11 even...

Richard.

> Thanks,
> Richard


Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Richard Biener via Gcc-patches
On Thu, Jul 30, 2020 at 9:49 AM Marc Glisse  wrote:
>
> When vector comparisons were forced to use vec_cond_expr, we lost a number
> of optimizations (my fault for not adding enough testcases to prevent
> that). This patch tries to unwrap vec_cond_expr a bit so some
> optimizations can still happen.
>
> I wasn't planning to add all those transformations together, but adding
> one caused a regression, whose fix introduced a second regression, etc.
>
> Using a simple fold_binary internally looks like an ok compromise to me.
> It remains cheap enough (not recursive, and vector instructions are not
> that frequent), while still allowing more than const_binop (X|0 or X&X for
> instance). The transformations are quite conservative with :s and folding
> only if everything simplifies, we may want to relax this later. And of
> course we are going to miss things like a?b:c + a?c:b -> b+c.
>
> In terms of number of operations, some transformations turning 2
> VEC_COND_EXPR into VEC_COND_EXPR + BIT_IOR_EXPR + BIT_NOT_EXPR might not
> look like a gain... I expect the bit_not disappears in most cases, and
> VEC_COND_EXPR looks more costly than a simpler BIT_IOR_EXPR.
>
> I am a bit confused that with avx512 we get types like "vector(4)
> " with :2 and not :1 (is it a hack so true is 1 and not
> -1?), but that doesn't matter for this patch.
>
> Regtest+bootstrap on x86_64-pc-linux-gnu

+  (with
+   {
+ tree rhs1, rhs2 = NULL;
+ rhs1 = fold_binary (op, type, @1, @3);
+ if (rhs1 && is_gimple_val (rhs1))
+   rhs2 = fold_binary (op, type, @2, @3);

ICK.  I guess a more match-and-simplify way would be

   (with
{
  tree rhs1, rhs2;
  gimple_match_op op (gimple_match_cond::UNCOND, op,
  type, @1, @3);
  if (op.resimplify (NULL, valueize)
  && gimple_simplified_result_is_gimple_val (op))
   {
 rhs1 = op.ops[0];
 ... other operand ...
   }

now in theory we could invent some new syntax for this, like

 (simplify
  (op (vec_cond:s @0 @1 @2) @3)
  (vec_cond @0 (op:x @1 @3) (op:x @2 @3)))

and pick something better instead of :x (:s is taken,
would be 'simplified', :c is taken would be 'constexpr', ...).

_Maybe_ just

 (simplify
  (op (vec_cond:s @0 @1 @2) @3)
  (vec_cond:x @0 (op @1 @3) (op @2 @3)))

which would have the same practical meaning as passing
NULL for the seq argument to simplification - do not allow
any intermediate stmt to be generated.

The other "simple" patterns look good, you can commit
them separately if you like.

Richard.

> 2020-07-30  Marc Glisse  
>
> PR tree-optimization/95906
> PR target/70314
> * match.pd ((c ? a : b) op d, (c ? a : b) op (c ? d : e),
> (v ? w : 0) ? a : b, c1 ? c2 ? a : b : b): New transformations.
>
> * gcc.dg/tree-ssa/andnot-2.c: New file.
> * gcc.dg/tree-ssa/pr95906.c: Likewise.
> * gcc.target/i386/pr70314.c: Likewise.
>
> --
> Marc Glisse


Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Marc Glisse

On Fri, 31 Jul 2020, Richard Sandiford wrote:


Marc Glisse  writes:

+/* (c ? a : b) op (c ? d : e)  -->  c ? (a op d) : (b op e) */
+ (simplify
+  (op (vec_cond:s @0 @1 @2) (vec_cond:s @0 @3 @4))
+  (with
+   {
+ tree rhs1, rhs2 = NULL;
+ rhs1 = fold_binary (op, type, @1, @3);
+ if (rhs1 && is_gimple_val (rhs1))
+   rhs2 = fold_binary (op, type, @2, @4);
+   }
+   (if (rhs2 && is_gimple_val (rhs2))
+(vec_cond @0 { rhs1; } { rhs2; })
+#endif


This one looks dangerous for potentially-trapping ops.


I would expect the operation not to be folded if it can trap. Is that too 
optimistic?



+/* (v ? w : 0) ? a : b is just (v & w) ? a : b  */
+(simplify
+ (vec_cond (vec_cond:s @0 @3 integer_zerop) @1 @2)
+ (vec_cond (bit_and @0 @3) @1 @2))


Does something check automatically that @0 and @3 have compatible types?


My memory of this dates from before the avx512-related changes, but at 
least at the time, the type of the condition in vec_cond_expr had to have 
the same size and number of elements as the result, and have signed 
integral elements. Now the size constraint has changed, but it still looks 
like for a given number of elements and size (this last one ignored for 
avx512), there is essentially a single type that can appear as condition. 
Is this wrong for x86? For SVE?


I could add some types_match conditions if that seems too unsafe...

--
Marc Glisse


Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Richard Biener via Gcc-patches
On Fri, Jul 31, 2020 at 1:35 PM Richard Biener
 wrote:
>
> On Thu, Jul 30, 2020 at 9:49 AM Marc Glisse  wrote:
> >
> > When vector comparisons were forced to use vec_cond_expr, we lost a number
> > of optimizations (my fault for not adding enough testcases to prevent
> > that). This patch tries to unwrap vec_cond_expr a bit so some
> > optimizations can still happen.
> >
> > I wasn't planning to add all those transformations together, but adding
> > one caused a regression, whose fix introduced a second regression, etc.
> >
> > Using a simple fold_binary internally looks like an ok compromise to me.
> > It remains cheap enough (not recursive, and vector instructions are not
> > that frequent), while still allowing more than const_binop (X|0 or X&X for
> > instance). The transformations are quite conservative with :s and folding
> > only if everything simplifies, we may want to relax this later. And of
> > course we are going to miss things like a?b:c + a?c:b -> b+c.
> >
> > In terms of number of operations, some transformations turning 2
> > VEC_COND_EXPR into VEC_COND_EXPR + BIT_IOR_EXPR + BIT_NOT_EXPR might not
> > look like a gain... I expect the bit_not disappears in most cases, and
> > VEC_COND_EXPR looks more costly than a simpler BIT_IOR_EXPR.
> >
> > I am a bit confused that with avx512 we get types like "vector(4)
> > " with :2 and not :1 (is it a hack so true is 1 and not
> > -1?), but that doesn't matter for this patch.
> >
> > Regtest+bootstrap on x86_64-pc-linux-gnu
>
> +  (with
> +   {
> + tree rhs1, rhs2 = NULL;
> + rhs1 = fold_binary (op, type, @1, @3);
> + if (rhs1 && is_gimple_val (rhs1))
> +   rhs2 = fold_binary (op, type, @2, @3);
>
> ICK.  I guess a more match-and-simplify way would be
>
>(with
> {
>   tree rhs1, rhs2;
>   gimple_match_op op (gimple_match_cond::UNCOND, op,
>   type, @1, @3);
>   if (op.resimplify (NULL, valueize)
>   && gimple_simplified_result_is_gimple_val (op))
>{
>  rhs1 = op.ops[0];
>  ... other operand ...
>}
>
> now in theory we could invent some new syntax for this, like
>
>  (simplify
>   (op (vec_cond:s @0 @1 @2) @3)
>   (vec_cond @0 (op:x @1 @3) (op:x @2 @3)))
>
> and pick something better instead of :x (:s is taken,
> would be 'simplified', :c is taken would be 'constexpr', ...).
>
> _Maybe_ just
>
>  (simplify
>   (op (vec_cond:s @0 @1 @2) @3)
>   (vec_cond:x @0 (op @1 @3) (op @2 @3)))
>
> which would have the same practical meaning as passing
> NULL for the seq argument to simplification - do not allow
> any intermediate stmt to be generated.

Note I specifically do not like those if (it-simplifies) checks
because we already would code-generate those anyway.  For

(simplify
  (plus (vec_cond:s @0 @1 @2) @3)
  (vec_cond @0 (plus @1 @3) (plus @2 @3)))

we get

res_op->set_op (VEC_COND_EXPR, type, 3);
res_op->ops[0] = captures[1];
res_op->ops[0] = unshare_expr (res_op->ops[0]);
{
  tree _o1[2], _r1;
  _o1[0] = captures[2];
  _o1[1] = captures[4];
  gimple_match_op tem_op (res_op->cond.any_else
(), PLUS_EXPR, TREE_TYPE (_o1[0]), _o1[0], _o1[1]);
  tem_op.resimplify (lseq, valueize);
  _r1 = maybe_push_res_to_seq (&tem_op, lseq);  ()
  if (!_r1) return false;
  res_op->ops[1] = _r1;
}
{
  tree _o1[2], _r1;
  _o1[0] = captures[3];
  _o1[1] = captures[4];
  gimple_match_op tem_op (res_op->cond.any_else
(), PLUS_EXPR, TREE_TYPE (_o1[0]), _o1[0], _o1[1]);
  tem_op.resimplify (lseq, valueize);
  _r1 = maybe_push_res_to_seq (&tem_op, lseq);  (***)
  if (!_r1) return false;
  res_op->ops[2] = _r1;
}
res_op->resimplify (lseq, valueize);
return true;

and the only change required would be to pass NULL to maybe_push_res_to_seq
here instead of lseq at the (***) marked points.

Richard.

> The other "simple" patterns look good, you can commit
> them separately if you like.
>
> Richard.
>
> > 2020-07-30  Marc Glisse  
> >
> > PR tree-optimization/95906
> > PR target/70314
> > * match.pd ((c ? a : b) op d, (c ? a : b) op (c ? d : e),
> > (v ? w : 0) ? a : b, c1 ? c2 ? a : b : b): New transformations.
> >
> > * gcc.dg/tree-ssa/andnot-2.c: New file.
> > * gcc.dg/tree-ssa/pr95906.c: Likewise.
> > * gcc.target/i386/pr70314.c: Likewise.
> >
> > --
> > Marc Glisse


Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Richard Biener via Gcc-patches
On Fri, Jul 31, 2020 at 1:39 PM Marc Glisse  wrote:
>
> On Fri, 31 Jul 2020, Richard Sandiford wrote:
>
> > Marc Glisse  writes:
> >> +/* (c ? a : b) op (c ? d : e)  -->  c ? (a op d) : (b op e) */
> >> + (simplify
> >> +  (op (vec_cond:s @0 @1 @2) (vec_cond:s @0 @3 @4))
> >> +  (with
> >> +   {
> >> + tree rhs1, rhs2 = NULL;
> >> + rhs1 = fold_binary (op, type, @1, @3);
> >> + if (rhs1 && is_gimple_val (rhs1))
> >> +   rhs2 = fold_binary (op, type, @2, @4);
> >> +   }
> >> +   (if (rhs2 && is_gimple_val (rhs2))
> >> +(vec_cond @0 { rhs1; } { rhs2; })
> >> +#endif
> >
> > This one looks dangerous for potentially-trapping ops.
>
> I would expect the operation not to be folded if it can trap. Is that too
> optimistic?
>
> >> +/* (v ? w : 0) ? a : b is just (v & w) ? a : b  */
> >> +(simplify
> >> + (vec_cond (vec_cond:s @0 @3 integer_zerop) @1 @2)
> >> + (vec_cond (bit_and @0 @3) @1 @2))
> >
> > Does something check automatically that @0 and @3 have compatible types?

@0 should always have a vector boolean type and thus will not be generally
compatible with @3.  But OTOH then when you see (vec_cond (vec_cond ...
then @3 must be vector boolean as well...

But in theory with AVX512 the inner vec_cond could have a SImode
condition @0 producing a regular V4SImode vector mask for an outer
AVX512 SSE-style vec-cond and you then would get a mismatch.

So indeed better add a type compatibility check.

> My memory of this dates from before the avx512-related changes, but at
> least at the time, the type of the condition in vec_cond_expr had to have
> the same size and number of elements as the result, and have signed
> integral elements. Now the size constraint has changed, but it still looks
> like for a given number of elements and size (this last one ignored for
> avx512), there is essentially a single type that can appear as condition.
> Is this wrong for x86? For SVE?
>
> I could add some types_match conditions if that seems too unsafe...
>
> --
> Marc Glisse


Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Richard Biener via Gcc-patches
On Fri, Jul 31, 2020 at 1:39 PM Richard Biener
 wrote:
>
> On Fri, Jul 31, 2020 at 1:35 PM Richard Biener
>  wrote:
> >
> > On Thu, Jul 30, 2020 at 9:49 AM Marc Glisse  wrote:
> > >
> > > When vector comparisons were forced to use vec_cond_expr, we lost a number
> > > of optimizations (my fault for not adding enough testcases to prevent
> > > that). This patch tries to unwrap vec_cond_expr a bit so some
> > > optimizations can still happen.
> > >
> > > I wasn't planning to add all those transformations together, but adding
> > > one caused a regression, whose fix introduced a second regression, etc.
> > >
> > > Using a simple fold_binary internally looks like an ok compromise to me.
> > > It remains cheap enough (not recursive, and vector instructions are not
> > > that frequent), while still allowing more than const_binop (X|0 or X&X for
> > > instance). The transformations are quite conservative with :s and folding
> > > only if everything simplifies, we may want to relax this later. And of
> > > course we are going to miss things like a?b:c + a?c:b -> b+c.
> > >
> > > In terms of number of operations, some transformations turning 2
> > > VEC_COND_EXPR into VEC_COND_EXPR + BIT_IOR_EXPR + BIT_NOT_EXPR might not
> > > look like a gain... I expect the bit_not disappears in most cases, and
> > > VEC_COND_EXPR looks more costly than a simpler BIT_IOR_EXPR.
> > >
> > > I am a bit confused that with avx512 we get types like "vector(4)
> > > " with :2 and not :1 (is it a hack so true is 1 and not
> > > -1?), but that doesn't matter for this patch.
> > >
> > > Regtest+bootstrap on x86_64-pc-linux-gnu
> >
> > +  (with
> > +   {
> > + tree rhs1, rhs2 = NULL;
> > + rhs1 = fold_binary (op, type, @1, @3);
> > + if (rhs1 && is_gimple_val (rhs1))
> > +   rhs2 = fold_binary (op, type, @2, @3);
> >
> > ICK.  I guess a more match-and-simplify way would be
> >
> >(with
> > {
> >   tree rhs1, rhs2;
> >   gimple_match_op op (gimple_match_cond::UNCOND, op,
> >   type, @1, @3);
> >   if (op.resimplify (NULL, valueize)
> >   && gimple_simplified_result_is_gimple_val (op))
> >{
> >  rhs1 = op.ops[0];
> >  ... other operand ...
> >}
> >
> > now in theory we could invent some new syntax for this, like
> >
> >  (simplify
> >   (op (vec_cond:s @0 @1 @2) @3)
> >   (vec_cond @0 (op:x @1 @3) (op:x @2 @3)))
> >
> > and pick something better instead of :x (:s is taken,
> > would be 'simplified', :c is taken would be 'constexpr', ...).
> >
> > _Maybe_ just
> >
> >  (simplify
> >   (op (vec_cond:s @0 @1 @2) @3)
> >   (vec_cond:x @0 (op @1 @3) (op @2 @3)))
> >
> > which would have the same practical meaning as passing
> > NULL for the seq argument to simplification - do not allow
> > any intermediate stmt to be generated.
>
> Note I specifically do not like those if (it-simplifies) checks
> because we already would code-generate those anyway.  For
>
> (simplify
>   (plus (vec_cond:s @0 @1 @2) @3)
>   (vec_cond @0 (plus @1 @3) (plus @2 @3)))
>
> we get
>
> res_op->set_op (VEC_COND_EXPR, type, 3);
> res_op->ops[0] = captures[1];
> res_op->ops[0] = unshare_expr (res_op->ops[0]);
> {
>   tree _o1[2], _r1;
>   _o1[0] = captures[2];
>   _o1[1] = captures[4];
>   gimple_match_op tem_op (res_op->cond.any_else
> (), PLUS_EXPR, TREE_TYPE (_o1[0]), _o1[0], _o1[1]);
>   tem_op.resimplify (lseq, valueize);
>   _r1 = maybe_push_res_to_seq (&tem_op, lseq);  ()
>   if (!_r1) return false;
>   res_op->ops[1] = _r1;
> }
> {
>   tree _o1[2], _r1;
>   _o1[0] = captures[3];
>   _o1[1] = captures[4];
>   gimple_match_op tem_op (res_op->cond.any_else
> (), PLUS_EXPR, TREE_TYPE (_o1[0]), _o1[0], _o1[1]);
>   tem_op.resimplify (lseq, valueize);
>   _r1 = maybe_push_res_to_seq (&tem_op, lseq);  (***)
>   if (!_r1) return false;
>   res_op->ops[2] = _r1;
> }
> res_op->resimplify (lseq, valueize);
> return true;
>
> and the only change required would be to pass NULL to maybe_push_res_to_seq
> here instead of lseq at the (***) marked points.

(simplify
  (plus (vec_cond:s @0 @1 @2) @3)
  (vec_cond:l @0 (plus @1 @3) (plus @2 @3)))

'l' for 'force leaf'.  I'll see if I can quickly cme up with a patch.

Richard.



> Richard.
>
> > The other "simple" patterns look good, you can commit
> > them separately if you like.
> >
> > Richard.
> >
> > > 2020-07-30  Marc Glisse  
> > >
> > > PR tree-optimization/95906
> > > PR target/70314
> > > * match.pd

Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Marc Glisse

On Fri, 31 Jul 2020, Richard Biener wrote:


+/* (v ? w : 0) ? a : b is just (v & w) ? a : b  */
+(simplify
+ (vec_cond (vec_cond:s @0 @3 integer_zerop) @1 @2)
+ (vec_cond (bit_and @0 @3) @1 @2))


Does something check automatically that @0 and @3 have compatible types?


@0 should always have a vector boolean type and thus will not be generally
compatible with @3.  But OTOH then when you see (vec_cond (vec_cond ...
then @3 must be vector boolean as well...

But in theory with AVX512 the inner vec_cond could have a SImode
condition @0 producing a regular V4SImode vector mask for an outer
AVX512 SSE-style vec-cond and you then would get a mismatch.


Ah, I thought the SSE-style vec_cond was impossible in AVX512 mode, at 
least I couldn't generate one in a few tests, but I didn't try very hard.



So indeed better add a type compatibility check.


Ok, it can't hurt.

--
Marc Glisse


Re: [PATCH] c: Fix bogus vector initialisation error [PR96377]

2020-07-31 Thread Jakub Jelinek via Gcc-patches
On Fri, Jul 31, 2020 at 11:41:59AM +0100, Richard Sandiford wrote:
> @@ -10135,11 +10176,7 @@ process_init_element (location_t loc, struct c_expr 
> value, bool implicit,
> /* Otherwise, if we have come to a subaggregate,
>and we don't have an element of its type, push into it.  */
> else if (value.value != NULL_TREE
> -&& value.value != error_mark_node
> -&& TYPE_MAIN_VARIANT (TREE_TYPE (value.value)) != fieldtype
> -&& (fieldcode == RECORD_TYPE || fieldcode == ARRAY_TYPE
> -|| fieldcode == UNION_TYPE
> -|| gnu_vector_type_p (fieldtype)))
> +&& initialize_elementwise_p (fieldtype, value.value))

I wonder if it wouldn't be better to do the value.value != NULL_TREE
checking inside of initialize_elementwise_p too, simply because it is done
the same way in all the 3 callers.

Other than that LGTM, but as I'm not a C FE maintainer, please give them a day
to express their opinions (though Joseph commented in the PR, so I assume he
is ok with the general idea).

>   {
> push_init_level (loc, 1, braced_init_obstack);
> continue;
> @@ -10227,11 +10264,7 @@ process_init_element (location_t loc, struct c_expr 
> value, bool implicit,
> /* Otherwise, if we have come to a subaggregate,
>and we don't have an element of its type, push into it.  */
> else if (value.value != NULL_TREE
> -&& value.value != error_mark_node
> -&& TYPE_MAIN_VARIANT (TREE_TYPE (value.value)) != fieldtype
> -&& (fieldcode == RECORD_TYPE || fieldcode == ARRAY_TYPE
> -|| fieldcode == UNION_TYPE
> -|| gnu_vector_type_p (fieldtype)))
> +&& initialize_elementwise_p (fieldtype, value.value))
>   {
> push_init_level (loc, 1, braced_init_obstack);
> continue;

Jakub



Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Richard Biener via Gcc-patches
On Fri, Jul 31, 2020 at 1:47 PM Richard Biener
 wrote:
>
> On Fri, Jul 31, 2020 at 1:39 PM Richard Biener
>  wrote:
> >
> > On Fri, Jul 31, 2020 at 1:35 PM Richard Biener
> >  wrote:
> > >
> > > On Thu, Jul 30, 2020 at 9:49 AM Marc Glisse  wrote:
> > > >
> > > > When vector comparisons were forced to use vec_cond_expr, we lost a 
> > > > number
> > > > of optimizations (my fault for not adding enough testcases to prevent
> > > > that). This patch tries to unwrap vec_cond_expr a bit so some
> > > > optimizations can still happen.
> > > >
> > > > I wasn't planning to add all those transformations together, but adding
> > > > one caused a regression, whose fix introduced a second regression, etc.
> > > >
> > > > Using a simple fold_binary internally looks like an ok compromise to me.
> > > > It remains cheap enough (not recursive, and vector instructions are not
> > > > that frequent), while still allowing more than const_binop (X|0 or X&X 
> > > > for
> > > > instance). The transformations are quite conservative with :s and 
> > > > folding
> > > > only if everything simplifies, we may want to relax this later. And of
> > > > course we are going to miss things like a?b:c + a?c:b -> b+c.
> > > >
> > > > In terms of number of operations, some transformations turning 2
> > > > VEC_COND_EXPR into VEC_COND_EXPR + BIT_IOR_EXPR + BIT_NOT_EXPR might not
> > > > look like a gain... I expect the bit_not disappears in most cases, and
> > > > VEC_COND_EXPR looks more costly than a simpler BIT_IOR_EXPR.
> > > >
> > > > I am a bit confused that with avx512 we get types like "vector(4)
> > > > " with :2 and not :1 (is it a hack so true is 1 and 
> > > > not
> > > > -1?), but that doesn't matter for this patch.
> > > >
> > > > Regtest+bootstrap on x86_64-pc-linux-gnu
> > >
> > > +  (with
> > > +   {
> > > + tree rhs1, rhs2 = NULL;
> > > + rhs1 = fold_binary (op, type, @1, @3);
> > > + if (rhs1 && is_gimple_val (rhs1))
> > > +   rhs2 = fold_binary (op, type, @2, @3);
> > >
> > > ICK.  I guess a more match-and-simplify way would be
> > >
> > >(with
> > > {
> > >   tree rhs1, rhs2;
> > >   gimple_match_op op (gimple_match_cond::UNCOND, op,
> > >   type, @1, @3);
> > >   if (op.resimplify (NULL, valueize)
> > >   && gimple_simplified_result_is_gimple_val (op))
> > >{
> > >  rhs1 = op.ops[0];
> > >  ... other operand ...
> > >}
> > >
> > > now in theory we could invent some new syntax for this, like
> > >
> > >  (simplify
> > >   (op (vec_cond:s @0 @1 @2) @3)
> > >   (vec_cond @0 (op:x @1 @3) (op:x @2 @3)))
> > >
> > > and pick something better instead of :x (:s is taken,
> > > would be 'simplified', :c is taken would be 'constexpr', ...).
> > >
> > > _Maybe_ just
> > >
> > >  (simplify
> > >   (op (vec_cond:s @0 @1 @2) @3)
> > >   (vec_cond:x @0 (op @1 @3) (op @2 @3)))
> > >
> > > which would have the same practical meaning as passing
> > > NULL for the seq argument to simplification - do not allow
> > > any intermediate stmt to be generated.
> >
> > Note I specifically do not like those if (it-simplifies) checks
> > because we already would code-generate those anyway.  For
> >
> > (simplify
> >   (plus (vec_cond:s @0 @1 @2) @3)
> >   (vec_cond @0 (plus @1 @3) (plus @2 @3)))
> >
> > we get
> >
> > res_op->set_op (VEC_COND_EXPR, type, 3);
> > res_op->ops[0] = captures[1];
> > res_op->ops[0] = unshare_expr (res_op->ops[0]);
> > {
> >   tree _o1[2], _r1;
> >   _o1[0] = captures[2];
> >   _o1[1] = captures[4];
> >   gimple_match_op tem_op (res_op->cond.any_else
> > (), PLUS_EXPR, TREE_TYPE (_o1[0]), _o1[0], _o1[1]);
> >   tem_op.resimplify (lseq, valueize);
> >   _r1 = maybe_push_res_to_seq (&tem_op, lseq);  ()
> >   if (!_r1) return false;
> >   res_op->ops[1] = _r1;
> > }
> > {
> >   tree _o1[2], _r1;
> >   _o1[0] = captures[3];
> >   _o1[1] = captures[4];
> >   gimple_match_op tem_op (res_op->cond.any_else
> > (), PLUS_EXPR, TREE_TYPE (_o1[0]), _o1[0], _o1[1]);
> >   tem_op.resimplify (lseq, valueize);
> >   _r1 = maybe_push_res_to_seq (&tem_op, lseq);  (***)
> >   if (!_r1) return false;
> >   res_op->ops[2] = _r1;
> > }
> > res_op->resimplify (lseq, valueize);
> > return true;
> >
> > and the only change required would be to pass NULL to maybe_push_res_to_seq
> > here instead of lseq at the (***) marked points.
>
> (simplify
>   (plus (vec_cond:s @0 @1 @2) @3)
>   (vec_cond:l @0 (plus @1 @3) (plus @2 @3)))
>
> 'l' for 'for

Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Marc Glisse

On Fri, 31 Jul 2020, Richard Biener wrote:


On Fri, Jul 31, 2020 at 1:39 PM Richard Biener
 wrote:


On Fri, Jul 31, 2020 at 1:35 PM Richard Biener
 wrote:


On Thu, Jul 30, 2020 at 9:49 AM Marc Glisse  wrote:


When vector comparisons were forced to use vec_cond_expr, we lost a number
of optimizations (my fault for not adding enough testcases to prevent
that). This patch tries to unwrap vec_cond_expr a bit so some
optimizations can still happen.

I wasn't planning to add all those transformations together, but adding
one caused a regression, whose fix introduced a second regression, etc.

Using a simple fold_binary internally looks like an ok compromise to me.
It remains cheap enough (not recursive, and vector instructions are not
that frequent), while still allowing more than const_binop (X|0 or X&X for
instance). The transformations are quite conservative with :s and folding
only if everything simplifies, we may want to relax this later. And of
course we are going to miss things like a?b:c + a?c:b -> b+c.

In terms of number of operations, some transformations turning 2
VEC_COND_EXPR into VEC_COND_EXPR + BIT_IOR_EXPR + BIT_NOT_EXPR might not
look like a gain... I expect the bit_not disappears in most cases, and
VEC_COND_EXPR looks more costly than a simpler BIT_IOR_EXPR.

I am a bit confused that with avx512 we get types like "vector(4)
" with :2 and not :1 (is it a hack so true is 1 and not
-1?), but that doesn't matter for this patch.

Regtest+bootstrap on x86_64-pc-linux-gnu


+  (with
+   {
+ tree rhs1, rhs2 = NULL;
+ rhs1 = fold_binary (op, type, @1, @3);
+ if (rhs1 && is_gimple_val (rhs1))
+   rhs2 = fold_binary (op, type, @2, @3);

ICK.  I guess a more match-and-simplify way would be


I was focused on avoiding recursion, but indeed that's independent, I 
could have set a trivial valueize function for that.



   (with
{
  tree rhs1, rhs2;
  gimple_match_op op (gimple_match_cond::UNCOND, op,
  type, @1, @3);
  if (op.resimplify (NULL, valueize)


Oh, so you would be ok with something that recurses without limit? I 
thought we were avoiding this because it may allow some compile time 
explosion with pathological examples.



  && gimple_simplified_result_is_gimple_val (op))
   {
 rhs1 = op.ops[0];
 ... other operand ...
   }

now in theory we could invent some new syntax for this, like

 (simplify
  (op (vec_cond:s @0 @1 @2) @3)
  (vec_cond @0 (op:x @1 @3) (op:x @2 @3)))

and pick something better instead of :x (:s is taken,
would be 'simplified', :c is taken would be 'constexpr', ...).

_Maybe_ just

 (simplify
  (op (vec_cond:s @0 @1 @2) @3)
  (vec_cond:x @0 (op @1 @3) (op @2 @3)))

which would have the same practical meaning as passing
NULL for the seq argument to simplification - do not allow
any intermediate stmt to be generated.


Note I specifically do not like those if (it-simplifies) checks
because we already would code-generate those anyway.  For

(simplify
  (plus (vec_cond:s @0 @1 @2) @3)
  (vec_cond @0 (plus @1 @3) (plus @2 @3)))

we get

res_op->set_op (VEC_COND_EXPR, type, 3);
res_op->ops[0] = captures[1];
res_op->ops[0] = unshare_expr (res_op->ops[0]);
{
  tree _o1[2], _r1;
  _o1[0] = captures[2];
  _o1[1] = captures[4];
  gimple_match_op tem_op (res_op->cond.any_else
(), PLUS_EXPR, TREE_TYPE (_o1[0]), _o1[0], _o1[1]);
  tem_op.resimplify (lseq, valueize);
  _r1 = maybe_push_res_to_seq (&tem_op, lseq);  ()
  if (!_r1) return false;
  res_op->ops[1] = _r1;
}
{
  tree _o1[2], _r1;
  _o1[0] = captures[3];
  _o1[1] = captures[4];
  gimple_match_op tem_op (res_op->cond.any_else
(), PLUS_EXPR, TREE_TYPE (_o1[0]), _o1[0], _o1[1]);
  tem_op.resimplify (lseq, valueize);
  _r1 = maybe_push_res_to_seq (&tem_op, lseq);  (***)
  if (!_r1) return false;
  res_op->ops[2] = _r1;
}
res_op->resimplify (lseq, valueize);
return true;

and the only change required would be to pass NULL to maybe_push_res_to_seq
here instead of lseq at the (***) marked points.


(simplify
 (plus (vec_cond:s @0 @1 @2) @3)
 (vec_cond:l @0 (plus @1 @3) (plus @2 @3)))

'l' for 'force leaf'.  I'll see if I can quickly cme up with a patch.


Does it have a clear meaning for GENERIC? I guess that's probably not such 
a big problem.


There are a number of transformations that we would like to perform "if 
 simplifies", but I don't know if it will always have exactly 
this form of "if this part of the output pattern doesn't simplify, 

[PATCH] debug/96383 - emit debug info for used external functions

2020-07-31 Thread Richard Biener
This makes sure to emit full declaration DIEs including
formal parameters for used external functions.  This helps
debugging when debug information of the external entity is
not available and also helps external tools cross-checking
ABI compatibility which was the bug reporters use case.

For cc1 this affects debug information size as follows:

 VM SIZE FILE SIZE
 ++ GROWING   ++
  [ = ]   0 .debug_info   +1.63Mi  +1.3%
  [ = ]   0 .debug_str +263Ki  +3.4%
  [ = ]   0 .debug_abbrev  +101Ki  +4.9%
  [ = ]   0 .debug_line   +5.71Ki  +0.0%
   +44% +16 [Unmapped]+48  +1.2%

 -- SHRINKING --
  [ = ]   0 .debug_loc   -213  -0.0%
  -0.0% -48 .text -48  -0.0%
  [ = ]   0 .debug_ranges -16  -0.0%

  -0.0% -32 TOTAL +1.99Mi  +0.6%

and DWARF compression via DWZ can only shave off minor bits
here.

Previously we emitted no DIEs for external functions at all
unless they were referenced via DW_TAG_GNU_call_site which
for some GCC revs caused a regular DIE to appear and since
GCC 4.9 only a stub without formal parameters.  This means
at -O0 we did not emit any DIE for external functions
but with optimization we emitted stubs.

Bootstrapped and tested on x86_64-unknown-linux-gnu, OK for trunk?
What about branch(es)?

Thanks,
Richard.

2020-07-30  Richard Biener  

PR debug/96383
c-family/
* c-common.h (c_common_finalize_early_debug): Declare.
* c-common.c: Include debug.h.
(c_common_finalize_early_debug): finalize_early_debug langhook
implementation generating debug for extern declarations.

c/
* c-objc-common.h (LANG_HOOKS_FINALIZE_EARLY_DEBUG):
Define to c_common_finalize_early_debug.

cp/
* cp-objcp-common.h (LANG_HOOKS_FINALIZE_EARLY_DEBUG):
Define to c_common_finalize_early_debug.

* langhooks-def.h (lhd_finalize_early_debug): Declare.
(LANG_HOOKS_FINALIZE_EARLY_DEBUG): Define.
(LANG_HOOKS_INITIALIZER): Amend.
* langhooks.c: Include cgraph.h and debug.h.
(lhd_finalize_early_debug): Default implementation from
former code in finalize_compilation_unit.
* langhooks.h (lang_hooks::finalize_early_debug): Add.

gcc/
* cgraphunit.c (symbol_table::finalize_compilation_unit):
Call the finalize_early_debug langhook.

* gcc.dg/debug/dwarf2/pr96383-1.c: New testcase.
* gcc.dg/debug/dwarf2/pr96383-2.c: Likewise.

libstdc++/
* testsuite/20_util/assume_aligned/3.cc: Use -g0.
---
 gcc/c-family/c-common.c | 17 +
 gcc/c-family/c-common.h |  2 ++
 gcc/c/c-objc-common.h   |  2 ++
 gcc/cgraphunit.c|  8 +++-
 gcc/cp/cp-objcp-common.h|  2 ++
 gcc/langhooks-def.h |  5 -
 gcc/langhooks.c | 14 ++
 gcc/langhooks.h |  3 +++
 gcc/testsuite/gcc.dg/debug/dwarf2/pr96383-1.c   | 17 +
 gcc/testsuite/gcc.dg/debug/dwarf2/pr96383-2.c   | 17 +
 .../testsuite/20_util/assume_aligned/3.cc   |  2 +-
 11 files changed, 82 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/pr96383-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/pr96383-2.c

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 116867a4513..b97539c0c2a 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -50,6 +50,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "spellcheck.h"
 #include "c-spellcheck.h"
 #include "selftest.h"
+#include "debug.h"
 
 cpp_reader *parse_in;  /* Declared in c-pragma.h.  */
 
@@ -9086,4 +9087,20 @@ braced_lists_to_strings (tree type, tree ctor)
   return braced_lists_to_strings (type, ctor, false);
 }
 
+
+/* Emit debug for functions before finalizing early debug.  */
+
+void
+c_common_finalize_early_debug (void)
+{
+  /* Emit early debug for reachable functions, and by consequence,
+ locally scoped symbols.  Also emit debug for extern declared
+ functions that are still reachable at this point.  */
+  struct cgraph_node *cnode;
+  FOR_EACH_FUNCTION (cnode)
+if (!cnode->alias && !cnode->thunk.thunk_p
+   && (cnode->has_gimple_body_p () || !DECL_IS_BUILTIN (cnode->decl)))
+  (*debug_hooks->early_global_decl) (cnode->decl);
+}
+
 #include "gt-c-family-c-common.h"
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 61627264e1e..4fc64bc4aa6 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -885,6 +885,8 @@ extern bool bool_promoted_to_int_p (tree);
 extern tree fold_for_warn (tree);
 extern tree c_common_get_narrower (tree, int *);
 extern bool get_attribute_operand (tree,

Re: [PATCH] debug/96383 - emit debug info for used external functions

2020-07-31 Thread Jakub Jelinek via Gcc-patches
On Fri, Jul 31, 2020 at 02:24:08PM +0200, Richard Biener wrote:
> 2020-07-30  Richard Biener  
> 
>   PR debug/96383
>   c-family/
>   * c-common.h (c_common_finalize_early_debug): Declare.
>   * c-common.c: Include debug.h.
>   (c_common_finalize_early_debug): finalize_early_debug langhook
>   implementation generating debug for extern declarations.
> 
>   c/
>   * c-objc-common.h (LANG_HOOKS_FINALIZE_EARLY_DEBUG):
>   Define to c_common_finalize_early_debug.
> 
>   cp/
>   * cp-objcp-common.h (LANG_HOOKS_FINALIZE_EARLY_DEBUG):
>   Define to c_common_finalize_early_debug.
> 
>   * langhooks-def.h (lhd_finalize_early_debug): Declare.
>   (LANG_HOOKS_FINALIZE_EARLY_DEBUG): Define.
>   (LANG_HOOKS_INITIALIZER): Amend.
>   * langhooks.c: Include cgraph.h and debug.h.
>   (lhd_finalize_early_debug): Default implementation from
>   former code in finalize_compilation_unit.
>   * langhooks.h (lang_hooks::finalize_early_debug): Add.
> 
>   gcc/
>   * cgraphunit.c (symbol_table::finalize_compilation_unit):
>   Call the finalize_early_debug langhook.
> 
>   * gcc.dg/debug/dwarf2/pr96383-1.c: New testcase.
>   * gcc.dg/debug/dwarf2/pr96383-2.c: Likewise.
> 
>   libstdc++/
>   * testsuite/20_util/assume_aligned/3.cc: Use -g0.

LGTM for trunk right now, for 10.3 perhaps after 2 weeks.  Not sure about
older branches, either not at all, or after much longer settlement period.

Jakub



[PATCH] Amend match.pd syntax with force-simplified results

2020-07-31 Thread Richard Biener
This adds a ! marker to result expressions that should simplify
(and if not fail the simplification).  This can for example be
used like

(simplify
  (plus (vec_cond:s @0 @1 @2) @3)
  (vec_cond @0 (plus! @1 @3) (plus! @2 @3)))

to make the simplification only apply in case both plus operations
in the result end up simplified to a simple operand.

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

2020-07-31  Richard Biener  

* genmatch.c (expr::force_leaf): Add and initialize.
(expr::gen_transform): Honor force_leaf by passing
NULL as sequence argument to maybe_push_res_to_seq.
(parser::parse_expr): Allow ! marker on result expression
operations.
* doc/match-and-simplify.texi: Amend.
---
 gcc/doc/match-and-simplify.texi | 15 +++
 gcc/genmatch.c  | 19 ---
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi
index 1df8b90e7c4..41980acbfe9 100644
--- a/gcc/doc/match-and-simplify.texi
+++ b/gcc/doc/match-and-simplify.texi
@@ -361,6 +361,21 @@ Usually the types of the generated result expressions are
 determined from the context, but sometimes like in the above case
 it is required that you specify them explicitely.
 
+Another modifier for generated expressions is @code{!} which
+tells the machinery to only consider the simplification in case
+the marked expression simplified to a simple operand.  Consider
+for example
+
+@smallexample
+(simplify
+  (plus (vec_cond:s @@0 @@1 @@2) @@3)
+  (vec_cond @@0 (plus! @@1 @@3) (plus! @@2 @@3)))
+@end smallexample
+
+which moves the outer @code{plus} operation to the inner arms
+of the @code{vec_cond} expression but only if the actual plus
+operations both simplify.
+
 As intermediate conversions are often optional there is a way to
 avoid the need to repeat patterns both with and without such
 conversions.  Namely you can mark a conversion as being optional
diff --git a/gcc/genmatch.c b/gcc/genmatch.c
index 0a8cba62e0c..88459d9686e 100644
--- a/gcc/genmatch.c
+++ b/gcc/genmatch.c
@@ -697,12 +697,13 @@ public:
   expr (id_base *operation_, location_t loc, bool is_commutative_ = false)
 : operand (OP_EXPR, loc), operation (operation_),
   ops (vNULL), expr_type (NULL), is_commutative (is_commutative_),
-  is_generic (false), force_single_use (false), opt_grp (0) {}
+  is_generic (false), force_single_use (false), force_leaf (false),
+  opt_grp (0) {}
   expr (expr *e)
 : operand (OP_EXPR, e->location), operation (e->operation),
   ops (vNULL), expr_type (e->expr_type), is_commutative 
(e->is_commutative),
   is_generic (e->is_generic), force_single_use (e->force_single_use),
-  opt_grp (e->opt_grp) {}
+  force_leaf (e->force_leaf), opt_grp (e->opt_grp) {}
   void append_op (operand *op) { ops.safe_push (op); }
   /* The operator and its operands.  */
   id_base *operation;
@@ -717,6 +718,9 @@ public:
   /* Whether pushing any stmt to the sequence should be conditional
  on this expression having a single-use.  */
   bool force_single_use;
+  /* Whether in the result expression this should be a leaf node
+ with any children simplified down to simple operands.  */
+  bool force_leaf;
   /* If non-zero, the group for optional handling.  */
   unsigned char opt_grp;
   virtual void gen_transform (FILE *f, int, const char *, bool, int,
@@ -2520,7 +2524,8 @@ expr::gen_transform (FILE *f, int indent, const char 
*dest, bool gimple,
   fprintf (f, ");\n");
   fprintf_indent (f, indent, "tem_op.resimplify (lseq, valueize);\n");
   fprintf_indent (f, indent,
- "_r%d = maybe_push_res_to_seq (&tem_op, lseq);\n", depth);
+ "_r%d = maybe_push_res_to_seq (&tem_op, %s);\n", depth,
+ !force_leaf ? "lseq" : "NULL");
   fprintf_indent (f, indent,
  "if (!_r%d) return false;\n",
  depth);
@@ -4240,6 +4245,14 @@ parser::parse_expr ()
   bool force_capture = false;
   const char *expr_type = NULL;
 
+  if (!parsing_match_operand
+  && token->type == CPP_NOT
+  && !(token->flags & PREV_WHITE))
+{
+  eat_token (CPP_NOT);
+  e->force_leaf = true;
+}
+
   if (token->type == CPP_COLON
   && !(token->flags & PREV_WHITE))
 {
-- 
2.26.2


Re: [PATCH v4] vect/rs6000: Support vector with length cost modeling

2020-07-31 Thread Kewen.Lin via Gcc-patches
Hi Richards,

on 2020/7/31 下午7:20, Richard Biener wrote:
> On Fri, Jul 31, 2020 at 1:03 PM Richard Sandiford
>  wrote:
>>
>> "Kewen.Lin"  writes:
> +  bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
> +  bool need_iterate_p
> +   = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)
> +  && !vect_known_niters_smaller_than_vf (loop_vinfo));
> +
> +  /* Init min/max, shift and minus cost relative to single
> +scalar_stmt. For now we only use length-based partial vectors on
> +Power, target specific cost tweaking may be needed for other
> +ports in future.  */
> +  unsigned int min_max_cost = 2;
> +  unsigned int shift_cost = 1, minus_cost = 1;

 Please instead add a scalar_min_max to vect_cost_for_stmt, and use
 scalar_stmt for shift and minus.  There shouldn't be any Power things
 hard-coded into target-independent code.

>>>
>>> Agree!  It's not good to leave them there.  I thought to wait and see
>>> if other targets which support vector with length can reuse this, or
>>> move it to target specific codes then if not sharable.  But anyway
>>> it looks not good, let's fix it.
> 
> In other generic places like this we simply use three generic scalar_stmt
> costs.  At least I don't see how min_max should be different from it
> when shift can be the same as minus.  Note this is also how we treat

Yeah, normally they (min/max/minus/shift) are taken as scalar_stmt, excepting
for fine-grain tuning like i386 port, they will use the same cost.  On Power9,
to implement min/max it takes double cycles of the normal scalar operations
like add/shift, I was trying to model it more fine-grained since we probably
generate a few min/max here, if the loop body cost is small, I was worried
the decision isn't good enough.  But yeah, in other generic places, the small
loop could also suffer this similar off, they are the same essentially.

> vectorization of MAX_EXPR - scalar cost is one scalar_stmt and
> vector cost is one vector_stmt.  As you say below the add_stmt_cost
> hook can adjust based on the actual GIMPLE stmt -- if one is
> available (which indeed it isn't here).
> 
> I'm somewhat lacking context here as well - we actually GIMPLE
> code-generate the min/max/shift/minus and only the eventual
> AND is defered to the target, right?
> 

Yes, min/max/shift/minus are all GIMPLE code, targets like Power
will have its target specific cost for shift which moves length
to high bits 0:7.

One typical case is as below:

   [local count: 105119324]:
  _26 = n_11(D) * 4;
  _37 = MAX_EXPR <_26, 16>;
  _38 = _37 + 18446744073709551600;
  _40 = MIN_EXPR <_26, 16>;

   [local count: 630715945]:
  # ivtmp_35 = PHI <0(3), ivtmp_36(4)>
  # loop_len_30 = PHI <_40(3), _44(4)>
  _19 = &MEM[base: a_12(D), index: ivtmp_35, offset: 0B];
  vect_24 = .LEN_LOAD (_19, 4B, loop_len_30);
  vect__3.7_23 = VIEW_CONVERT_EXPR(vect_24);
  _1 = &MEM[base: b_13(D), index: ivtmp_35, offset: 0B];
  vect_17 = .LEN_LOAD (_1, 4B, loop_len_30);
  vect__5.10_9 = VIEW_CONVERT_EXPR(vect_17);
  vect__7.11_8 = vect__5.10_9 + vect__3.7_23;
  vect_28 = VIEW_CONVERT_EXPR(vect__7.11_8);
  _2 = &MEM[base: c_14(D), index: ivtmp_35, offset: 0B];
  .LEN_STORE (_2, 4B, loop_len_30, vect_28);
  _42 = MIN_EXPR ;
  _43 = _38 - _42;
  _44 = MIN_EXPR <_43, 16>;
  ivtmp_36 = ivtmp_35 + 16;
  if (ivtmp_35 < _38)
goto ; [83.33%]
  else
goto ; [16.67%]


>>> I had some concerns on vect_cost_for_stmt way, since it seems to allow
>>> more computations similar to min/max to be added like this, in long
>>> term it probably leads to the situtation like: scalar_min_max,
>>> scalar_div_expr, scalar_mul_expr ... an enum (cost types) bloat, it
>>> seems not good to maintain.
>>
>> I guess doing that doesn't seem so bad to me :-)  I think it's been
>> a recurring problem that the current classification isn't fine-grained
>> enough for some cases.
> 
> But we eventually want to get rid of this classification enum in favor
> of the add_stmt_cost hook ...
> 

Nice, sounds like each target has to handle it fine-grain.  :) 
IIUC, the current modeling doesn't consider the instruction dependency and
execution resource etc. like scheduling, even if all costs are fine-grained
enough, the decision could be sub-optimal.

>>> I noticed that i386 port ix86_add_stmt_cost will check stmt_info->stmt,
>>> whether is assignment and the subcode of the expression, it provides the
>>> chance to check the statement more fine-grain, not just as normal
>>> scalar_stmt/vector_stmt.
>>>
>>> For the case here, we don't have the stmt_info, but we know the type
>>> of computation(expression), how about to extend the hook add_stmt_cost
>>> with one extra tree_code type argument, by default it can be some
>>> unmeaningful code, for some needs like here, we specify the tree_code
>>> as the code of computation, like {MIN,MAX}_EXPR, then target specific
>>> add_stmt_cost can check this tree_code and adjust the cost accord

Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Richard Sandiford
Marc Glisse  writes:
> On Fri, 31 Jul 2020, Richard Sandiford wrote:
>
>> Marc Glisse  writes:
>>> +/* (c ? a : b) op (c ? d : e)  -->  c ? (a op d) : (b op e) */
>>> + (simplify
>>> +  (op (vec_cond:s @0 @1 @2) (vec_cond:s @0 @3 @4))
>>> +  (with
>>> +   {
>>> + tree rhs1, rhs2 = NULL;
>>> + rhs1 = fold_binary (op, type, @1, @3);
>>> + if (rhs1 && is_gimple_val (rhs1))
>>> +   rhs2 = fold_binary (op, type, @2, @4);
>>> +   }
>>> +   (if (rhs2 && is_gimple_val (rhs2))
>>> +(vec_cond @0 { rhs1; } { rhs2; })
>>> +#endif
>>
>> This one looks dangerous for potentially-trapping ops.
>
> I would expect the operation not to be folded if it can trap. Is that too 
> optimistic?

Not sure TBH.  I was thinking of “trapping” in the sense of raising
an IEEE exception, rather than in the could-throw/must-end-bb sense.
I thought match.pd applied to things like FP addition as normal and
it was up to individual patterns to check the appropriate properties.

Thanks,
Richard


Re: [PATCH] Removal of HSA offloading from gcc and libgomp

2020-07-31 Thread Jakub Jelinek via Gcc-patches
On Mon, Jul 27, 2020 at 12:37:17PM +0200, Martin Jambor wrote:
> the email with the patch did not make it to the mailing list because the
> patch was too big, so I'm trying again, this time with the patch
> compressed and attached.
> 
> It is actually not exactly the same one but rather one I re-based on
> today's master.  I am still only in the process of testing it, but
> sending it anyway, hoping nothing broke in those few days.
> 
> It is also available at the GCC git server as branch
> refs/users/jamborm/heads/remove-hsa-200727, also viewable at
> 
> https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=log;h=refs/users/jamborm/heads/remove-hsa-200727

I've skimmed through it and didn't find anything I'd do differently, so
LGTM.  Ok for trunk whenever you're ready.

Thanks

Jakub



Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Richard Biener via Gcc-patches
On Fri, Jul 31, 2020 at 2:50 PM Richard Sandiford
 wrote:
>
> Marc Glisse  writes:
> > On Fri, 31 Jul 2020, Richard Sandiford wrote:
> >
> >> Marc Glisse  writes:
> >>> +/* (c ? a : b) op (c ? d : e)  -->  c ? (a op d) : (b op e) */
> >>> + (simplify
> >>> +  (op (vec_cond:s @0 @1 @2) (vec_cond:s @0 @3 @4))
> >>> +  (with
> >>> +   {
> >>> + tree rhs1, rhs2 = NULL;
> >>> + rhs1 = fold_binary (op, type, @1, @3);
> >>> + if (rhs1 && is_gimple_val (rhs1))
> >>> +   rhs2 = fold_binary (op, type, @2, @4);
> >>> +   }
> >>> +   (if (rhs2 && is_gimple_val (rhs2))
> >>> +(vec_cond @0 { rhs1; } { rhs2; })
> >>> +#endif
> >>
> >> This one looks dangerous for potentially-trapping ops.
> >
> > I would expect the operation not to be folded if it can trap. Is that too
> > optimistic?
>
> Not sure TBH.  I was thinking of “trapping” in the sense of raising
> an IEEE exception, rather than in the could-throw/must-end-bb sense.
> I thought match.pd applied to things like FP addition as normal and
> it was up to individual patterns to check the appropriate properties.

I think it can be indeed defered to the simplification of (op @0 @2)
because that would be invalid if it removes a IEEE exception.
The VEC_COND_EXPR itself cannot trap.

Richard.

> Thanks,
> Richard


Re: [PATCH v4] vect/rs6000: Support vector with length cost modeling

2020-07-31 Thread Richard Biener via Gcc-patches
On Fri, Jul 31, 2020 at 2:37 PM Kewen.Lin  wrote:
>
> Hi Richards,
>
> on 2020/7/31 下午7:20, Richard Biener wrote:
> > On Fri, Jul 31, 2020 at 1:03 PM Richard Sandiford
> >  wrote:
> >>
> >> "Kewen.Lin"  writes:
> > +  bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
> > +  bool need_iterate_p
> > +   = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)
> > +  && !vect_known_niters_smaller_than_vf (loop_vinfo));
> > +
> > +  /* Init min/max, shift and minus cost relative to single
> > +scalar_stmt. For now we only use length-based partial vectors on
> > +Power, target specific cost tweaking may be needed for other
> > +ports in future.  */
> > +  unsigned int min_max_cost = 2;
> > +  unsigned int shift_cost = 1, minus_cost = 1;
> 
>  Please instead add a scalar_min_max to vect_cost_for_stmt, and use
>  scalar_stmt for shift and minus.  There shouldn't be any Power things
>  hard-coded into target-independent code.
> 
> >>>
> >>> Agree!  It's not good to leave them there.  I thought to wait and see
> >>> if other targets which support vector with length can reuse this, or
> >>> move it to target specific codes then if not sharable.  But anyway
> >>> it looks not good, let's fix it.
> >
> > In other generic places like this we simply use three generic scalar_stmt
> > costs.  At least I don't see how min_max should be different from it
> > when shift can be the same as minus.  Note this is also how we treat
>
> Yeah, normally they (min/max/minus/shift) are taken as scalar_stmt, excepting
> for fine-grain tuning like i386 port, they will use the same cost.  On Power9,
> to implement min/max it takes double cycles of the normal scalar operations
> like add/shift, I was trying to model it more fine-grained since we probably
> generate a few min/max here, if the loop body cost is small, I was worried
> the decision isn't good enough.  But yeah, in other generic places, the small
> loop could also suffer this similar off, they are the same essentially.
>
> > vectorization of MAX_EXPR - scalar cost is one scalar_stmt and
> > vector cost is one vector_stmt.  As you say below the add_stmt_cost
> > hook can adjust based on the actual GIMPLE stmt -- if one is
> > available (which indeed it isn't here).
> >
> > I'm somewhat lacking context here as well - we actually GIMPLE
> > code-generate the min/max/shift/minus and only the eventual
> > AND is defered to the target, right?
> >
>
> Yes, min/max/shift/minus are all GIMPLE code, targets like Power
> will have its target specific cost for shift which moves length
> to high bits 0:7.
>
> One typical case is as below:
>
>[local count: 105119324]:
>   _26 = n_11(D) * 4;
>   _37 = MAX_EXPR <_26, 16>;
>   _38 = _37 + 18446744073709551600;
>   _40 = MIN_EXPR <_26, 16>;
>
>[local count: 630715945]:
>   # ivtmp_35 = PHI <0(3), ivtmp_36(4)>
>   # loop_len_30 = PHI <_40(3), _44(4)>
>   _19 = &MEM[base: a_12(D), index: ivtmp_35, offset: 0B];
>   vect_24 = .LEN_LOAD (_19, 4B, loop_len_30);
>   vect__3.7_23 = VIEW_CONVERT_EXPR(vect_24);
>   _1 = &MEM[base: b_13(D), index: ivtmp_35, offset: 0B];
>   vect_17 = .LEN_LOAD (_1, 4B, loop_len_30);
>   vect__5.10_9 = VIEW_CONVERT_EXPR(vect_17);
>   vect__7.11_8 = vect__5.10_9 + vect__3.7_23;
>   vect_28 = VIEW_CONVERT_EXPR(vect__7.11_8);
>   _2 = &MEM[base: c_14(D), index: ivtmp_35, offset: 0B];
>   .LEN_STORE (_2, 4B, loop_len_30, vect_28);
>   _42 = MIN_EXPR ;
>   _43 = _38 - _42;
>   _44 = MIN_EXPR <_43, 16>;
>   ivtmp_36 = ivtmp_35 + 16;
>   if (ivtmp_35 < _38)
> goto ; [83.33%]
>   else
> goto ; [16.67%]
>
>
> >>> I had some concerns on vect_cost_for_stmt way, since it seems to allow
> >>> more computations similar to min/max to be added like this, in long
> >>> term it probably leads to the situtation like: scalar_min_max,
> >>> scalar_div_expr, scalar_mul_expr ... an enum (cost types) bloat, it
> >>> seems not good to maintain.
> >>
> >> I guess doing that doesn't seem so bad to me :-)  I think it's been
> >> a recurring problem that the current classification isn't fine-grained
> >> enough for some cases.
> >
> > But we eventually want to get rid of this classification enum in favor
> > of the add_stmt_cost hook ...
> >
>
> Nice, sounds like each target has to handle it fine-grain.  :)
> IIUC, the current modeling doesn't consider the instruction dependency and
> execution resource etc. like scheduling, even if all costs are fine-grained
> enough, the decision could be sub-optimal.

That's what the finish_cost hook is for - the target can collect
all stmts during add_stmt and then apply adjustments at the end
(IIRC power does already for shift operation resource constraints).

> >>> I noticed that i386 port ix86_add_stmt_cost will check stmt_info->stmt,
> >>> whether is assignment and the subcode of the expression, it provides the
> >>> chance to check the statement more fine-grain, not just as normal
> >>> scalar_stmt/ve

Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Marc Glisse

On Fri, 31 Jul 2020, Richard Sandiford wrote:


Marc Glisse  writes:

On Fri, 31 Jul 2020, Richard Sandiford wrote:


Marc Glisse  writes:

+/* (c ? a : b) op (c ? d : e)  -->  c ? (a op d) : (b op e) */
+ (simplify
+  (op (vec_cond:s @0 @1 @2) (vec_cond:s @0 @3 @4))
+  (with
+   {
+ tree rhs1, rhs2 = NULL;
+ rhs1 = fold_binary (op, type, @1, @3);
+ if (rhs1 && is_gimple_val (rhs1))
+   rhs2 = fold_binary (op, type, @2, @4);
+   }
+   (if (rhs2 && is_gimple_val (rhs2))
+(vec_cond @0 { rhs1; } { rhs2; })
+#endif


This one looks dangerous for potentially-trapping ops.


I would expect the operation not to be folded if it can trap. Is that too
optimistic?


Not sure TBH.  I was thinking of “trapping” in the sense of raising
an IEEE exception, rather than in the could-throw/must-end-bb sense.


That's what I understood from your message :-)


I thought match.pd applied to things like FP addition as normal and
it was up to individual patterns to check the appropriate properties.


Yes, and in this case I am delegating that to fold_binary, which already 
performs this check.


I tried with this C++ program

typedef double vecf __attribute__((vector_size(16)));
typedef long long veci __attribute__((vector_size(16)));
vecf f(veci c){
  return (c?1.:2.)/(c?3.:7.);
}

the folding happens by default, but not with -frounding-math, which seems 
like exactly what we want.


--
Marc Glisse


Re: VEC_COND_EXPR optimizations

2020-07-31 Thread Marc Glisse

On Fri, 31 Jul 2020, Marc Glisse wrote:


On Fri, 31 Jul 2020, Richard Sandiford wrote:


Marc Glisse  writes:

On Fri, 31 Jul 2020, Richard Sandiford wrote:


Marc Glisse  writes:

+/* (c ? a : b) op (c ? d : e)  -->  c ? (a op d) : (b op e) */
+ (simplify
+  (op (vec_cond:s @0 @1 @2) (vec_cond:s @0 @3 @4))
+  (with
+   {
+ tree rhs1, rhs2 = NULL;
+ rhs1 = fold_binary (op, type, @1, @3);
+ if (rhs1 && is_gimple_val (rhs1))
+   rhs2 = fold_binary (op, type, @2, @4);
+   }
+   (if (rhs2 && is_gimple_val (rhs2))
+(vec_cond @0 { rhs1; } { rhs2; })
+#endif


This one looks dangerous for potentially-trapping ops.


I would expect the operation not to be folded if it can trap. Is that too
optimistic?


Not sure TBH.  I was thinking of “trapping” in the sense of raising
an IEEE exception, rather than in the could-throw/must-end-bb sense.


That's what I understood from your message :-)


I thought match.pd applied to things like FP addition as normal and
it was up to individual patterns to check the appropriate properties.


Yes, and in this case I am delegating that to fold_binary, which already 
performs this check.


I tried with this C++ program

typedef double vecf __attribute__((vector_size(16)));
typedef long long veci __attribute__((vector_size(16)));
vecf f(veci c){
 return (c?1.:2.)/(c?3.:7.);
}

the folding happens by default, but not with -frounding-math, which seems 
like exactly what we want.


That was for rounding. -ftrapping-math does not disable folding of

typedef double vecf __attribute__((vector_size(16)));
typedef long long veci __attribute__((vector_size(16)));
vecf f(veci c){
vecf z={};
  return (z+1)/(z+3);
}

despite a possible inexact flag, so it doesn't disable vec_cond_expr 
folding either.


(not sure we want to fix this unless -fno-trapping-math becomes the 
default)


--
Marc Glisse


Re: [PATCH v4] vect/rs6000: Support vector with length cost modeling

2020-07-31 Thread Kewen.Lin via Gcc-patches
on 2020/7/31 下午9:01, Richard Biener wrote:
> On Fri, Jul 31, 2020 at 2:37 PM Kewen.Lin  wrote:
>>
>> Hi Richards,
>>
>> on 2020/7/31 下午7:20, Richard Biener wrote:
>>> On Fri, Jul 31, 2020 at 1:03 PM Richard Sandiford
>>>  wrote:

 "Kewen.Lin"  writes:
>>> +  bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
>>> +  bool need_iterate_p
>>> +   = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)
>>> +  && !vect_known_niters_smaller_than_vf (loop_vinfo));
>>> +
>>> +  /* Init min/max, shift and minus cost relative to single
>>> +scalar_stmt. For now we only use length-based partial vectors on
>>> +Power, target specific cost tweaking may be needed for other
>>> +ports in future.  */
>>> +  unsigned int min_max_cost = 2;
>>> +  unsigned int shift_cost = 1, minus_cost = 1;
>>
>> Please instead add a scalar_min_max to vect_cost_for_stmt, and use
>> scalar_stmt for shift and minus.  There shouldn't be any Power things
>> hard-coded into target-independent code.
>>
>
> Agree!  It's not good to leave them there.  I thought to wait and see
> if other targets which support vector with length can reuse this, or
> move it to target specific codes then if not sharable.  But anyway
> it looks not good, let's fix it.
>>>
>>> In other generic places like this we simply use three generic scalar_stmt
>>> costs.  At least I don't see how min_max should be different from it
>>> when shift can be the same as minus.  Note this is also how we treat
>>
>> Yeah, normally they (min/max/minus/shift) are taken as scalar_stmt, excepting
>> for fine-grain tuning like i386 port, they will use the same cost.  On 
>> Power9,
>> to implement min/max it takes double cycles of the normal scalar operations
>> like add/shift, I was trying to model it more fine-grained since we probably
>> generate a few min/max here, if the loop body cost is small, I was worried
>> the decision isn't good enough.  But yeah, in other generic places, the small
>> loop could also suffer this similar off, they are the same essentially.
>>
>>> vectorization of MAX_EXPR - scalar cost is one scalar_stmt and
>>> vector cost is one vector_stmt.  As you say below the add_stmt_cost
>>> hook can adjust based on the actual GIMPLE stmt -- if one is
>>> available (which indeed it isn't here).
>>>
>>> I'm somewhat lacking context here as well - we actually GIMPLE
>>> code-generate the min/max/shift/minus and only the eventual
>>> AND is defered to the target, right?
>>>
>>
>> Yes, min/max/shift/minus are all GIMPLE code, targets like Power
>> will have its target specific cost for shift which moves length
>> to high bits 0:7.
>>
>> One typical case is as below:
>>
>>[local count: 105119324]:
>>   _26 = n_11(D) * 4;
>>   _37 = MAX_EXPR <_26, 16>;
>>   _38 = _37 + 18446744073709551600;
>>   _40 = MIN_EXPR <_26, 16>;
>>
>>[local count: 630715945]:
>>   # ivtmp_35 = PHI <0(3), ivtmp_36(4)>
>>   # loop_len_30 = PHI <_40(3), _44(4)>
>>   _19 = &MEM[base: a_12(D), index: ivtmp_35, offset: 0B];
>>   vect_24 = .LEN_LOAD (_19, 4B, loop_len_30);
>>   vect__3.7_23 = VIEW_CONVERT_EXPR(vect_24);
>>   _1 = &MEM[base: b_13(D), index: ivtmp_35, offset: 0B];
>>   vect_17 = .LEN_LOAD (_1, 4B, loop_len_30);
>>   vect__5.10_9 = VIEW_CONVERT_EXPR(vect_17);
>>   vect__7.11_8 = vect__5.10_9 + vect__3.7_23;
>>   vect_28 = VIEW_CONVERT_EXPR(vect__7.11_8);
>>   _2 = &MEM[base: c_14(D), index: ivtmp_35, offset: 0B];
>>   .LEN_STORE (_2, 4B, loop_len_30, vect_28);
>>   _42 = MIN_EXPR ;
>>   _43 = _38 - _42;
>>   _44 = MIN_EXPR <_43, 16>;
>>   ivtmp_36 = ivtmp_35 + 16;
>>   if (ivtmp_35 < _38)
>> goto ; [83.33%]
>>   else
>> goto ; [16.67%]
>>
>>
> I had some concerns on vect_cost_for_stmt way, since it seems to allow
> more computations similar to min/max to be added like this, in long
> term it probably leads to the situtation like: scalar_min_max,
> scalar_div_expr, scalar_mul_expr ... an enum (cost types) bloat, it
> seems not good to maintain.

 I guess doing that doesn't seem so bad to me :-)  I think it's been
 a recurring problem that the current classification isn't fine-grained
 enough for some cases.
>>>
>>> But we eventually want to get rid of this classification enum in favor
>>> of the add_stmt_cost hook ...
>>>
>>
>> Nice, sounds like each target has to handle it fine-grain.  :)
>> IIUC, the current modeling doesn't consider the instruction dependency and
>> execution resource etc. like scheduling, even if all costs are fine-grained
>> enough, the decision could be sub-optimal.
> 
> That's what the finish_cost hook is for - the target can collect
> all stmts during add_stmt and then apply adjustments at the end
> (IIRC power does already for shift operation resource constraints).
> 

oh, forgot that we have that hook, though I just imagined some generic codes
would do this.  Good to know we have a way to make it.  Ni

Re: [PATCH] Amend match.pd syntax with force-simplified results

2020-07-31 Thread Marc Glisse

On Fri, 31 Jul 2020, Richard Biener wrote:


This adds a ! marker to result expressions that should simplify
(and if not fail the simplification).  This can for example be
used like

(simplify
 (plus (vec_cond:s @0 @1 @2) @3)
 (vec_cond @0 (plus! @1 @3) (plus! @2 @3)))

to make the simplification only apply in case both plus operations
in the result end up simplified to a simple operand.


Thanks.

The ! syntax seems fine. If we run out, we can always introduce an 
attribute-like syntax: (plus[force_leaf] @1 @2), but we aren't there yet. 
And either this feature will see a lot of use and deserve its short 
syntax, or it won't and it will be easy to reclaim '!' for something else.


I am wondering about GENERIC. IIUC, '!' is ignored for GENERIC. Should we 
protect some/most uses of this syntax with #if GIMPLE? In my case, I think 
resimplify might simply return non-0 because it swapped the operands, 
which should not be sufficient to enable the transformation.


--
Marc Glisse


Re: Refactor peel_iters_{pro,epi}logue cost model handlings

2020-07-31 Thread Kewen.Lin via Gcc-patches
on 2020/7/31 下午6:57, Richard Sandiford wrote:
> "Kewen.Lin"  writes:
>> Hi Richard,
>>
>> on 2020/7/27 下午9:10, Richard Sandiford wrote:
>>> "Kewen.Lin"  writes:
 Hi,

 As Richard S. suggested in the thread:

 https://gcc.gnu.org/pipermail/gcc-patches/2020-July/550633.html

 this patch is separated from the one of that thread, mainly to refactor the
 existing peel_iters_{pro,epi}logue cost model handlings.

 I've addressed Richard S.'s review comments there, moreover because of one
 failure of aarch64 testing, I updated it a bit more to keep the logic 
 unchanged
 as before first (refactor_cost.diff).
>>>
>>> Heh, nice when a clean-up exposes an existing bug. ;-)  I agree the
>>> updates look correct.  E.g. if vf is 1, we should assume that there
>>> are no peeled iterations even if the number of iterations is unknown.
>>>
>>> Both patches are OK with some very minor nits (sorry):
>>
>> Thanks for the comments!  I've addressed them and commit refactor_cost.diff
>> via r11-2378.
>>
>> For adjust.diff, it works well on powerpc64le-linux-gnu (bootstrap/regtest),
>> but got one failure on aarch64 as below:
>>
>> PASS->FAIL: gcc.target/aarch64/sve/cost_model_2.c -march=armv8.2-a+sve  
>> scan-assembler-times \\tst1w\\tz 1
>>
>> I spent some time investigating it and thought it's expected.  As you 
>> mentioned
>> above, the patch can fix the cases with VF 1, the VF of this case is exactly 
>> one.  :)
>>
>> Without the proposed adjustment, the cost for V16QI looks like:
>>
>>   Vector inside of loop cost: 1
>>   Vector prologue cost: 4
>>   Vector epilogue cost: 3
>>   Scalar iteration cost: 4
>>   Scalar outside cost: 6
>>   Vector outside cost: 7
>>   prologue iterations: 0
>>   epilogue iterations: 0
>>
>> After the change, the cost becomes to:
>>
>> Vector inside of loop cost: 1
>> Vector prologue cost: 1
>> Vector epilogue cost: 0
>> Scalar iteration cost: 4
>> Scalar outside cost: 6
>> Vector outside cost: 1
>> prologue iterations: 0
>> epilogue iterations: 0
>>
>> which outperforms VNx16QI that isn't affected by this patch with partial 
>> vectors.
> 
> Hmm.  V16QI outperforming VNx16QI is kind-of bogus in this case,
> since the store is a full-vector store for both Advanced SIMD and
> 128-bit SVE.  But we do currently still generate the SVE loop in
> a more general form (since doing otherwise introduces other problems)
> so I guess the result is self-consistent.
> 
> More importantly, even if the costs were equal, V16QI would still
> win for 128-bit SVE.
> 
> So yeah, while I had my doubts at first, I agree this is the right fix.
> 

Thanks for the explanation!  Committed in r11-2453.

BR,
Kewen


Re: [PATCH] Amend match.pd syntax with force-simplified results

2020-07-31 Thread Richard Biener
On Fri, 31 Jul 2020, Marc Glisse wrote:

> On Fri, 31 Jul 2020, Richard Biener wrote:
> 
> > This adds a ! marker to result expressions that should simplify
> > (and if not fail the simplification).  This can for example be
> > used like
> >
> > (simplify
> >  (plus (vec_cond:s @0 @1 @2) @3)
> >  (vec_cond @0 (plus! @1 @3) (plus! @2 @3)))
> >
> > to make the simplification only apply in case both plus operations
> > in the result end up simplified to a simple operand.
> 
> Thanks.
> 
> The ! syntax seems fine. If we run out, we can always introduce an
> attribute-like syntax: (plus[force_leaf] @1 @2), but we aren't there yet. And
> either this feature will see a lot of use and deserve its short syntax, or it
> won't and it will be easy to reclaim '!' for something else.
> 
> I am wondering about GENERIC. IIUC, '!' is ignored for GENERIC. Should we
> protect some/most uses of this syntax with #if GIMPLE? In my case, I think
> resimplify might simply return non-0 because it swapped the operands, which
> should not be sufficient to enable the transformation.

I think the resimplify logic also only works on GIMPLE.  But you are
right - I forgot about GENERIC.  But it should be straight-forward
to adjust its code generation from

 {
tree _o1[2], _r1;
_o1[0] = captures[2];
_o1[1] = unshare_expr (captures[4]);
_r1 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (_o1[0]), 
_o1[0], _o1[1]);
res_op1 = _r1;
  }
  tree res_op2;
  {
tree _o1[2], _r1;
_o1[0] = captures[3];
_o1[1] = captures[4];
_r1 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (_o1[0]), 
_o1[0], _o1[1]);
res_op2 = _r1;
  }
  tree _r;
  _r = fold_build3_loc (loc, VEC_COND_EXPR, type, res_op0, 
res_op1, res_op2);

to add a

  if (!CONSTANT_CLASS_P (_r1) || DECL_P (_r1))
return NULL_TREE;

or maybe if (EXPR_P (_r1))?  On GENRIC it's a bit more difficult
since (plus! @0 @1) should be OK for say @0 == a + b and @1 == 0
simplifying to @0 itself.  Likewise @0 == a + 1 and @1 == 1
simplifying to a + 2 should be OK (we've changed a leaf).  Or
maybe that's not OK - who knows...

Of course we'd better change fold_build.. to fold_binary as well.

Or we simply automatically disable those patterns for GENERIC
(though that would probably be unexpected).

Sorry for not thinking about GENERIC - seems like a (small) mess
there ;)

Richard.


[committed] libstdc++: Fix use of newlocale in std:::from_chars

2020-07-31 Thread Jonathan Wakely via Gcc-patches
libstdc++-v3/ChangeLog:

* src/c++17/floating_from_chars.cc (from_chars_impl): Use
LC_ALL_MASK not LC_ALL.

Tested x86_64-linux and powerpc-aix. Committed to trunk.


commit 4143efc1eed44050201b20c78d0206bc266e30c4
Author: Jonathan Wakely 
Date:   Fri Jul 31 14:36:56 2020

libstdc++: Fix use of newlocale in std:::from_chars

libstdc++-v3/ChangeLog:

* src/c++17/floating_from_chars.cc (from_chars_impl): Use
LC_ALL_MASK not LC_ALL.

diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc 
b/libstdc++-v3/src/c++17/floating_from_chars.cc
index 26b69a38521..d52c0a937b9 100644
--- a/libstdc++-v3/src/c++17/floating_from_chars.cc
+++ b/libstdc++-v3/src/c++17/floating_from_chars.cc
@@ -286,7 +286,7 @@ namespace
   ptrdiff_t
   from_chars_impl(const char* str, T& value, errc& ec) noexcept
   {
-if (locale_t loc = ::newlocale(LC_ALL, "C", (locale_t)0)) [[likely]]
+if (locale_t loc = ::newlocale(LC_ALL_MASK, "C", (locale_t)0)) [[likely]]
   {
locale_t orig = ::uselocale(loc);
 


Re: [PATCH] Amend match.pd syntax with force-simplified results

2020-07-31 Thread Marc Glisse

On Fri, 31 Jul 2020, Richard Biener wrote:


Or we simply automatically disable those patterns for GENERIC
(though that would probably be unexpected).


Since the definition is not clear, whatever we do will be unexpected at 
least in some cases. Disabling it for GENERIC for now seems ok to me, we 
can always extend it later...


--
Marc Glisse


[PATCH v5] vect/rs6000: Support vector with length cost modeling

2020-07-31 Thread Kewen.Lin via Gcc-patches
Hi Richard,

New version v5 is attached.

v5 main changes against v4:
  1) use _stmt instead of _cnt to avoid confusion
  2) factor out function vect_rgroup_iv_might_wrap_p
  3) use generic scalar_stmt for min/max stmt

Does this look better?  Thanks in advance!

BR,
Kewen
-

gcc/ChangeLog:

* config/rs6000/rs6000.c (rs6000_adjust_vect_cost_per_loop): New
function.
(rs6000_finish_cost): Call rs6000_adjust_vect_cost_per_loop.
* tree-vect-loop.c (vect_estimate_min_profitable_iters): Add cost
modeling for vector with length.
(vect_rgroup_iv_might_wrap_p): New function, factored out from...
* tree-vect-loop-manip.c (vect_set_loop_controls_directly): ...this.
Update function comment.
* tree-vect-stmts.c (vect_gen_len): Update function comment.
* tree-vectorizer.h (vect_rgroup_iv_might_wrap_p): New declare.
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 009afc5f894..86ef584e09b 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5177,6 +5177,34 @@ rs6000_add_stmt_cost (class vec_info *vinfo, void *data, 
int count,
   return retval;
 }
 
+/* For some target specific vectorization cost which can't be handled per stmt,
+   we check the requisite conditions and adjust the vectorization cost
+   accordingly if satisfied.  One typical example is to model shift cost for
+   vector with length by counting number of required lengths under condition
+   LOOP_VINFO_FULLY_WITH_LENGTH_P.  */
+
+static void
+rs6000_adjust_vect_cost_per_loop (rs6000_cost_data *data)
+{
+  struct loop *loop = data->loop_info;
+  gcc_assert (loop);
+  loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
+
+  if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+{
+  rgroup_controls *rgc;
+  unsigned int num_vectors_m1;
+  unsigned int shift_cnt = 0;
+  FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
+   if (rgc->type)
+ /* Each length needs one shift to fill into bits 0-7.  */
+ shift_cnt += num_vectors_m1 + 1;
+
+  rs6000_add_stmt_cost (loop_vinfo, (void *) data, shift_cnt, scalar_stmt,
+   NULL, NULL_TREE, 0, vect_body);
+}
+}
+
 /* Implement targetm.vectorize.finish_cost.  */
 
 static void
@@ -5186,7 +5214,10 @@ rs6000_finish_cost (void *data, unsigned *prologue_cost,
   rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
 
   if (cost_data->loop_info)
-rs6000_density_test (cost_data);
+{
+  rs6000_adjust_vect_cost_per_loop (cost_data);
+  rs6000_density_test (cost_data);
+}
 
   /* Don't vectorize minimum-vectorization-factor, simple copy loops
  that require versioning for any reason.  The vectorization is at
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 490e7befea3..2a0d3b1840d 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -412,7 +412,10 @@ vect_maybe_permute_loop_masks (gimple_seq *seq, 
rgroup_controls *dest_rgm,
 
This means that we cannot guarantee that such an induction variable
would ever hit a value that produces a set of all-false masks or zero
-   lengths for RGC.  */
+   lengths for RGC.
+
+   Note that please check cost modeling whether needed to be updated in
+   function vect_estimate_min_profitable_iters if any changes.  */
 
 static tree
 vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
@@ -711,8 +714,6 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
   else
 niters = gimple_convert (&preheader_seq, compare_type, niters);
 
-  widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
-
   /* Iterate over all the rgroups and fill in their controls.  We could use
  the first control from any rgroup for the loop condition; here we
  arbitrarily pick the last.  */
@@ -739,11 +740,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
 
/* See whether zero-based IV would ever generate all-false masks
   or zero length before wrapping around.  */
-   unsigned nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
-   bool might_wrap_p
- = (iv_limit == -1
-|| (wi::min_precision (iv_limit * nitems_per_iter, UNSIGNED)
-> compare_precision));
+   bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
 
/* Set up all controls for this group.  */
test_ctrl = vect_set_loop_controls_directly (loop, loop_vinfo,
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 43ec4fb04d5..dea9ca6778f 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -3798,6 +3798,58 @@ vect_estimate_min_profitable_iters (loop_vec_info 
loop_vinfo,
   (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks - 1,
vector_stmt, NULL, NULL_TREE, 0, vect_body);
 }
+  else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))

Re: [PATCH V2] aarch64: Use Q-reg loads/stores in movmem expansion

2020-07-31 Thread Richard Sandiford
Sudakshina Das  writes:
> Hi
>
> This is my attempt at reviving the old patch 
> https://gcc.gnu.org/pipermail/gcc-patches/2019-January/514632.html
>
> I have followed on Kyrill's comment upstream on the link above and I am using 
> the recommended option iii that he mentioned.
> "1) Adjust the copy_limit to 256 bits after checking 
> AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS in the tuning.
>  2) Adjust aarch64_copy_one_block_and_progress_pointers to handle 256-bit 
> moves. by iii:
>iii) Emit explicit V4SI (or any other 128-bit vector mode) pairs ldp/stps. 
> This wouldn't need any adjustments to
> MD patterns, but would make 
> aarch64_copy_one_block_and_progress_pointers more complex as it would now have
> two paths, where one handles two adjacent memory addresses in one 
> calls."
>
> With this patch the following test
>
> #define N 8
> extern int src[N], dst[N];
>
> void
> foo (void)
> {
>   __builtin_memcpy (dst, src, N * sizeof (int));
> }
>
> which was originally giving
> foo:
> adrpx1, src
> add x1, x1, :lo12:src
> ldp x4, x5, [x1]
> adrpx0, dst
> add x0, x0, :lo12:dst
> ldp x2, x3, [x1, 16]
> stp x4, x5, [x0]
> stp x2, x3, [x0, 16]
> ret
>
>
> changes to the following
> foo:
> adrpx1, src
> add x1, x1, :lo12:src
> adrpx0, dst
> add x0, x0, :lo12:dst
> ldp q1, q0, [x1]
> stp q1, q0, [x0]
> ret
>
> This gives about 1.3% improvement on 523.xalancbmk_r in SPEC2017 and an 
> overall code size reduction on most
> SPEC2017 Int benchmarks on Neoverse N1 due to more LDP/STP Q pair registers.

Sorry for the slow review.  LGTM with a very minor nit (sorry)…

> @@ -21150,9 +21177,12 @@ aarch64_expand_cpymem (rtx *operands)
>/* Convert n to bits to make the rest of the code simpler.  */
>n = n * BITS_PER_UNIT;
>  
> -  /* Maximum amount to copy in one go.  The AArch64 back-end has integer 
> modes
> - larger than TImode, but we should not use them for loads/stores here.  
> */
> -  const int copy_limit = GET_MODE_BITSIZE (TImode);
> +  /* Maximum amount to copy in one go.  We allow 256-bit chunks based on the
> + AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS tuning parameter and TARGET_SIMD.  
> */
> +  const int copy_limit = ((aarch64_tune_params.extra_tuning_flags
> +& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS)
> +   || !TARGET_SIMD)
> +  ? GET_MODE_BITSIZE (TImode) :  256;

Should only be one space before “256”.

I guess at some point we should consider handling fixed-length SVE too,
but that's only worth it for -msve-vector-bits=512 and higher.

Thanks,
Richard


[Patch, fortran] PR96320 - gfortran 8-10 shape mismatch in assumed-length dummy argument character array

2020-07-31 Thread Paul Richard Thomas via Gcc-patches
Hi All,

This is my first foray into gfortran for quite a little while so I am going
cautiously on this 'obvious' patch. The comment in the patch and the
ChangLog are clear enough that no further explanation is needed.

Regtests on FC31.x86_64 - OK for trunk?

I am a bit reluctant to get into backporting just yet because I am still
groping my way round git. However, I will do it when I feel a bit braver!

Regards

Paul

2020-07-31  Paul Thomas  

PR fortran/96320
* interface.c (gfc_check_dummy_characteristics): If a module
procedure arrives with assumed shape in the interface and
deferred shape in the procedure itself, update the latter and
copy the lower bounds.

2020-07-31  Paul Thomas  

PR fortran/96320
* gfortran.dg/module_procedure_4.f90 : New test.
diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c
index e51820918b8..7985fc70fd4 100644
--- a/gcc/fortran/interface.c
+++ b/gcc/fortran/interface.c
@@ -1466,6 +1466,19 @@ gfc_check_dummy_characteristics (gfc_symbol *s1, gfc_symbol *s2,
   int i, compval;
   gfc_expr *shape1, *shape2;
 
+  /* Sometimes the ambiguity between deferred shape and assumed shape
+	 does not get resolved in module procedures, where the only explicit
+	 declaration of the dummy is in the interface.  */
+  if (s1->ns->proc_name && s1->ns->proc_name->attr.module_procedure
+	  && s1->as->type == AS_ASSUMED_SHAPE
+	  && s2->as->type == AS_DEFERRED)
+	{
+	  s2->as->type = AS_ASSUMED_SHAPE;
+	  for (i = 0; i < s2->as->rank; i++)
+	if (s1->as->lower[i] != NULL)
+	  s2->as->lower[i] = gfc_copy_expr (s1->as->lower[i]);
+	}
+
   if (s1->as->type != s2->as->type)
 	{
 	  snprintf (errmsg, err_len, "Shape mismatch in argument '%s'",
@@ -2617,7 +2630,7 @@ compare_parameter (gfc_symbol *formal, gfc_expr *actual,
   || (actual->rank == 0 && formal->attr.dimension
 	  && gfc_is_coindexed (actual)))
 {
-  if (where 
+  if (where
 	  && (!formal->attr.artificial || (!formal->maybe_array
 	   && !maybe_dummy_array_arg (actual
 	{
@@ -2708,7 +2721,7 @@ compare_parameter (gfc_symbol *formal, gfc_expr *actual,
 
   if (ref == NULL && actual->expr_type != EXPR_NULL)
 {
-  if (where 
+  if (where
 	  && (!formal->attr.artificial || (!formal->maybe_array
 	   && !maybe_dummy_array_arg (actual
 	{
@@ -3965,7 +3978,7 @@ gfc_procedure_use (gfc_symbol *sym, gfc_actual_arglist **ap, locus *where)
   if (!gfc_compare_actual_formal (ap, dummy_args, 0, sym->attr.elemental,
   sym->attr.proc == PROC_ST_FUNCTION, where))
 return false;
- 
+
   if (!check_intents (dummy_args, *ap))
 return false;
 
! { dg-do run }
!
! Test the fix for PR96320 in which the assumed shape of 'arg' in the
! interface for 'bar' was mirrored by the 'arg' in the module procedure
! incorrectly have deferred shape.
!
! Contributed by Damian Rouson  
!
module foobar
  type foo
  contains
procedure, nopass :: bar1
procedure, nopass :: bar2
procedure, nopass :: bar3
  end type

  interface

module subroutine bar1(arg)
  character(len=*) arg(:)
end subroutine

module subroutine bar2(arg)
  character(len=*) arg(3:)
end subroutine

module subroutine bar3(arg)
  character(len=*) arg(2)
end subroutine

  end interface
contains

  module procedure bar1
if (lbound(arg, 1) .ne. 1) stop 1
if (arg(3) .ne. 'hijk') stop 2
  end procedure

! Make sure that the lower bound of an assumed shape array dummy,
! if defined, is passed to the module procedure.

  module procedure bar2
if (lbound(arg, 1) .ne. 3) stop 3
if (arg(3) .ne. 'abcd') stop 4
  end procedure

! This makes sure that an dummy with explicit shape has the upper
! bound correctly set in the module procedure.

  module procedure bar3
if (lbound(arg, 1) .ne. 1) stop 5
if (arg(3) .ne. 'hijk') stop 6   ! { dg-warning "is out of bounds" }
  end procedure

end module

  use foobar
  character(4) :: list(3) = ['abcd', 'efgh' , 'hijk']
  type(foo) :: f
  call f%bar1(list)
  call f%bar2(list)
  call f%bar3(list)
end


Re: [stage1][PATCH] Make TOPN counter dynamically allocated.

2020-07-31 Thread Jan Hubicka
> On 7/31/20 10:47 AM, Jan Hubicka wrote:
> > I think this needs to be restricted to targets that have dl library
> 
> Hm, I can't find a dg-require that is used for similar test-cases.
OK, lets get it in and see if it breaks :)

Honza
> 
> Martin


Re: [PATCH v4] dse: Remove partial load after full store for high part access[PR71309]

2020-07-31 Thread Richard Sandiford
Sorry for the slow reply.

luoxhu  writes:
> Hi Richard,
>
> This is the updated version that could pass all regression test on
> Power9-LE. 
> Just need another  "maybe_lt (GET_MODE_SIZE (new_mode), access_size)"
> before generating shift for store_info->const_rhs to ensure correct
> constant is generated, take testsuite/gfortran1/equiv_2.x for example:
>
> equiv_2.x-equiv_2.f90.264r.cse2:
> 5: r119:DI=0x6867666564636261
> 6: [sfp:DI+0x20]=r119:DI
> 18: r123:SI=0x65646362
> 19: r124:SI=[sfp:DI+0x21]
> 20: r125:CC=cmp(r124:SI,r123:SI)
>
> Bad code due to get SImode subreg0 and shift right 8 bits got 0x646362 in
> insn #32, equiv_2.x-equiv_2.f90.265r.dse1:
> 5:  r119:DI=0x6867666564636261
> 32: r126:SI=0x646362
> 6:  [sfp:DI+0x20]=r119:DI
> 18: r123:SI=0x65646362
> 19: r124:SI=r126:SI
> 20: r125:CC=cmp(r124:SI,r123:SI)
>
> Good code, get 0x65646362 in insn #32, equiv_2.x-equiv_2.f90.265r.dse1:
> 5:  r119:DI=0x6867666564636261
> 32: r126:SI=0x65646362
> 6:  [sfp:DI+0x20]=r119:DI
> 18: r123:SI=0x65646362
> 19: r124:SI=r126:SI
> 20: r125:CC=cmp(r124:SI,r123:SI)
>
> So the new_mode size should also be GE than access_size for const_rhs shift.
> It seems a bit more complicated now...

Yeah.  The structure of this code makes it hard to make changes to it. :-/

Since the const_rhs code is only trying to fold the operation at compile
time, without regard for what the target supports at runtime, I think it
*does* make sense to use smallest_int_mode_for_size there.  I.e. we should
be able to hoist the code out of the loop and do:

  if (store_info->const_rhs)
{
  auto new_mode = smallest_int_mode_for_size (access_size * BITS_PER_UNIT);
  auto byte = subreg_lowpart_offset (new_mode, store_mode);
  rtx ret = simplify_subreg (new_mode, store_info->const_rhs,
 store_mode, byte);
  if (ret && CONSTANT_P (ret))
…
}

  if (require_cst)
return NULL_RTX;

  …the current loop…

I don't see any reason to restrict the const_rhs case to BITS_PER_WORD.
And if the simplify_subreg above fails for new_mode, I can't think it
would succeed for a wider mode (which would have to provide *more*
constant data than the failed subreg).

> @@ -1728,8 +1728,7 @@ find_shift_sequence (poly_int64 access_size,
>   the machine.  */
>  
>opt_scalar_int_mode new_mode_iter;
> -  FOR_EACH_MODE_FROM (new_mode_iter,
> -   smallest_int_mode_for_size (access_size * BITS_PER_UNIT))
> +  FOR_EACH_MODE_IN_CLASS (new_mode_iter, MODE_INT)

Sorry, I now realise it would be better to start at:

smallest_int_mode_for_size (GET_MODE_BITSIZE (read_mode))

Thanks,
Richard


Re: [PATCH] avoid -Wnonnull on synthesized condition in static_cast (PR 96003)

2020-07-31 Thread Martin Sebor via Gcc-patches

On 7/30/20 1:47 PM, Jason Merrill wrote:

On 7/30/20 12:25 PM, Martin Sebor wrote:

On 7/29/20 2:27 PM, Jason Merrill wrote:

On 7/23/20 3:08 PM, Martin Sebor wrote:

Another test case added to the bug since I posted the patch shows
that the no-warning bit set by the C++ front end is easily lost
during early folding, triggering the -Wnonull again despite
the suppression.  The attached revision tweaks the folder in just
this one case to let the suppression take effect in this situation.

In light of how pervasive this potential for stripping looks (none
of the other folders preserves the bit) the tweak feels like a band-
aid rather than a general solution.  But implementing something
better (and mainly developing tests to verify that it works in
general) would probably be quite the project.  So I leave it for
some other time.


How about in check_function_arguments_recurse COND_EXPR handling, 
checking for TREE_NO_WARNING on the condition?  Then we wouldn't need 
to deal with setting and copying it on the COND_EXPR itself.


The condition is already checked at the beginning of the function.


I mean:


   if (TREE_CODE (param) == COND_EXPR)
 {
+  if (TREE_NO_WARNING (TREE_OPERAND (param, 0)))
+   return;
+
   /* Simplify to avoid warning for an impossible case.  */


But your patch is OK as is.


It only occurred to me after I replied that that's what you meant.
I had considered it but using on the no-warning bit on one expression
to decide whether to warn for another seemed a bit fragile.  I checked
in the original and final patch in r11-2457.

Martin




But the latest trunk doesn't seem to need the fold-const.c change
anymore to suppress the warning and the original patch is good
enough.  The updated test should catch the regression if
the COND_EXPR should again lose the no warning bit.

Martin





On 7/17/20 1:00 PM, Martin Sebor wrote:

The recent enhancement to treat the implicit this pointer argument
as nonnull in member functions triggers spurious -Wnonnull for
the synthesized conditional expression the C++ front end replaces
the pointer with in some static_cast expressions.  The front end
already sets the no-warning bit for the test but not for the whole
conditional expression, so the attached fix extends the same solution
to it.

The consequence of this fix is that user-written code like this:

   static_cast(p ? p : 0)->f ();
or
   static_cast(p ? p : nullptr)->f ();

don't trigger the warning because they are both transformed into
the same expression as:

   static_cast(p)->f ();

What still does trigger it is this:

   static_cast(p ? p : (T*)0)->f ();

because here it's the inner COND_EXPR's no-warning bit that's set
(the outer one is clear), whereas in the former expressions it's
the other way around.  It would be nice if this worked consistently
but I didn't see an easy way to do that and more than a quick fix
seems outside the scope for this bug.

Another case reported by someone else in the same bug involves
a dynamic_cast.  A simplified test case goes something like this:

   if (dynamic_cast(p))
 dynamic_cast(p)->f ();

The root cause is the same: the front end emitting the COND_EXPR

   ((p != 0) ? ((T*)__dynamic_cast(p, (& _ZTI1B), (& _ZTI1C), 0)) : 0)

I decided not to suppress the warning in this case because doing
so would also suppress it in unconditional calls with the result
of the cast:

   dynamic_cast(p)->f ();

and that doesn't seem helpful.  Instead, I'd suggest to make
the second cast in the if statement to reference to T&:

   if (dynamic_cast(p))
 dynamic_cast(*p).f ();

Martin











[committed] d: Fix regression, all 32-bit execution tests FAIL: internal error printing module cycle

2020-07-31 Thread Iain Buclaw via Gcc-patches
Hi,

This patch fixes a regression caused by 6ee874f1353.

For 32-bit btr(), BIT_NOT_EXPR was being generated twice, something that
was not seen with the 64-bit variant.  Removed the second call to fix
the generated code.

Bootstrapped on x86_64-linux-gnu and checked for regressions on
-m32 and -mx32 multilib configurations.  Committed to mainline.

Regards
Iain.

---
gcc/d/ChangeLog:

PR d/96393
* intrinsics.cc (expand_intrinsic_bt): Don't generate BIT_NOT_EXPR for
btr32 intrinsic.
---
 gcc/d/intrinsics.cc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/gcc/d/intrinsics.cc b/gcc/d/intrinsics.cc
index 8eec0af60ee..486db127747 100644
--- a/gcc/d/intrinsics.cc
+++ b/gcc/d/intrinsics.cc
@@ -351,9 +351,6 @@ expand_intrinsic_bt (intrinsic_code intrinsic, tree callexp)
 }
 
   /* ptr[bitnum / size] op= mask;  */
-  if (intrinsic == INTRINSIC_BTR)
-bitnum = fold_build1 (BIT_NOT_EXPR, TREE_TYPE (bitnum), bitnum);
-
   ptr = modify_expr (ptr, fold_build2 (code, TREE_TYPE (ptr), ptr, bitnum));
 
   /* Store the condition result in a temporary, and return expressions in
-- 
2.25.1



[committed] d: Split up the grouped compilable and runnable tests.

2020-07-31 Thread Iain Buclaw via Gcc-patches
Hi,

This patch breaks up the big monolithic tests in gdc.dg test directory.

The majority of tests in runnable are really compilable/ICE tests, and
have have dg-do adjusted where necessary.  Tests that had a dependency
on Phobos have also been reproduced and reduced with all imports
stripped from the test.

The end result is a collection of tests that only check the compiler bug
that was being fixed, rather than the library, and a reduction in time
spent running all tests.

Regression tested on x86_64-linux-gnu with -m32 and -mx32 multilib
configurations.  Committed to mainline.

Regards
Iain.

---
gcc/testsuite/ChangeLog:

* gdc.dg/compilable.d: Removed.
* gdc.dg/gdc108.d: New test.
* gdc.dg/gdc115.d: New test.
* gdc.dg/gdc121.d: New test.
* gdc.dg/gdc122.d: New test.
* gdc.dg/gdc127.d: New test.
* gdc.dg/gdc131.d: New test.
* gdc.dg/gdc133.d: New test.
* gdc.dg/gdc141.d: New test.
* gdc.dg/gdc142.d: New test.
* gdc.dg/gdc15.d: New test.
* gdc.dg/gdc17.d: New test.
* gdc.dg/gdc170.d: New test.
* gdc.dg/gdc171.d: New test.
* gdc.dg/gdc179.d: New test.
* gdc.dg/gdc183.d: New test.
* gdc.dg/gdc186.d: New test.
* gdc.dg/gdc187.d: New test.
* gdc.dg/gdc19.d: New test.
* gdc.dg/gdc191.d: New test.
* gdc.dg/gdc194.d: New test.
* gdc.dg/gdc196.d: New test.
* gdc.dg/gdc198.d: New test.
* gdc.dg/gdc200.d: New test.
* gdc.dg/gdc204.d: New test.
* gdc.dg/gdc210.d: New test.
* gdc.dg/gdc212.d: New test.
* gdc.dg/gdc213.d: New test.
* gdc.dg/gdc218.d: New test.
* gdc.dg/gdc223.d: New test.
* gdc.dg/gdc231.d: New test.
* gdc.dg/gdc239.d: New test.
* gdc.dg/gdc24.d: New test.
* gdc.dg/gdc240.d: New test.
* gdc.dg/gdc241.d: New test.
* gdc.dg/gdc242a.d: New test.
* gdc.dg/gdc242b.d: New test.
* gdc.dg/gdc248.d: New test.
* gdc.dg/gdc250.d: New test.
* gdc.dg/gdc251.d: New test.
* gdc.dg/gdc253a.d: New test.
* gdc.dg/gdc253b.d: New test.
* gdc.dg/gdc255.d: New test.
* gdc.dg/gdc256.d: New test.
* gdc.dg/gdc261.d: New test.
* gdc.dg/gdc27.d: New test.
* gdc.dg/gdc273.d: New test.
* gdc.dg/gdc280.d: New test.
* gdc.dg/gdc284.d: New test.
* gdc.dg/gdc285.d: New test.
* gdc.dg/gdc286.d: New test.
* gdc.dg/gdc300.d: New test.
* gdc.dg/gdc309.d: New test.
* gdc.dg/gdc31.d: New test.
* gdc.dg/gdc35.d: New test.
* gdc.dg/gdc36.d: New test.
* gdc.dg/gdc37.d: New test.
* gdc.dg/gdc4.d: New test.
* gdc.dg/gdc43.d: New test.
* gdc.dg/gdc47.d: New test.
* gdc.dg/gdc51.d: New test.
* gdc.dg/gdc57.d: New test.
* gdc.dg/gdc66.d: New test.
* gdc.dg/gdc67.d: New test.
* gdc.dg/gdc71.d: New test.
* gdc.dg/gdc77.d: New test.
* gdc.dg/imports/gdc239.d: Remove phobos dependency.
* gdc.dg/imports/gdc241a.d: Updated imports.
* gdc.dg/imports/gdc241b.d: Likewise.
* gdc.dg/imports/gdc251a.d: Likewise.
* gdc.dg/imports/gdc253.d: Rename to...
* gdc.dg/imports/gdc253a.d: ...this.
* gdc.dg/imports/gdc253b.d: New.
* gdc.dg/imports/gdc36.d: New.
* gdc.dg/imports/runnable.d: Removed.
* gdc.dg/link.d: Removed.
* gdc.dg/runnable.d: Removed.
* gdc.dg/runnable2.d: Removed.
* gdc.dg/simd.d: Remove phobos dependency.
---
 gcc/testsuite/gdc.dg/compilable.d |  444 --
 gcc/testsuite/gdc.dg/gdc108.d |   19 +
 gcc/testsuite/gdc.dg/gdc115.d |   16 +
 gcc/testsuite/gdc.dg/gdc121.d |4 +
 gcc/testsuite/gdc.dg/gdc122.d |   36 +
 gcc/testsuite/gdc.dg/gdc127.d |6 +
 gcc/testsuite/gdc.dg/gdc131.d |   15 +
 gcc/testsuite/gdc.dg/gdc133.d |   16 +
 gcc/testsuite/gdc.dg/gdc141.d |   14 +
 gcc/testsuite/gdc.dg/gdc142.d |   15 +
 gcc/testsuite/gdc.dg/gdc15.d  |   35 +
 gcc/testsuite/gdc.dg/gdc17.d  |   37 +
 gcc/testsuite/gdc.dg/gdc170.d |   21 +
 gcc/testsuite/gdc.dg/gdc171.d |   38 +
 gcc/testsuite/gdc.dg/gdc179.d |   32 +
 gcc/testsuite/gdc.dg/gdc183.d |   60 +
 gcc/testsuite/gdc.dg/gdc186.d |   60 +
 gcc/testsuite/gdc.dg/gdc187.d |   40 +
 gcc/testsuite/gdc.dg/gdc19.d  |8 +
 gcc/testsuite/gdc.dg/gdc191.d |  201 +++
 gcc/testsuite/gdc.dg/gdc194.d |9 +
 gcc/testsuite/gdc.dg/gdc196.d |   21 +
 gcc/testsuite/gdc.dg/gdc198.d |   71 +
 gcc/testsuite/gdc.dg/gdc200.d |  

[committed] libstdc++: Remove duplicate dg-do directive

2020-07-31 Thread Jonathan Wakely via Gcc-patches
Also add an effective target to clarify it should only run for C++17 and
later.

libstdc++-v3/ChangeLog:

* testsuite/20_util/time_point_cast/rounding.cc: Remove
duplicate dg-do directive and add c++17 effective target.

Tested x86_64-linux, committed to trunk.


commit 351f60794c116d25a78da5220f3da74b4268cb8d
Author: Jonathan Wakely 
Date:   Fri Jul 31 17:51:00 2020

libstdc++: Remove duplicate dg-do directive

Also add an effective target to clarify it should only run for C++17 and
later.

libstdc++-v3/ChangeLog:

* testsuite/20_util/time_point_cast/rounding.cc: Remove
duplicate dg-do directive and add c++17 effective target.

diff --git a/libstdc++-v3/testsuite/20_util/time_point_cast/rounding.cc 
b/libstdc++-v3/testsuite/20_util/time_point_cast/rounding.cc
index 7cf4931645e..9918b83bac1 100644
--- a/libstdc++-v3/testsuite/20_util/time_point_cast/rounding.cc
+++ b/libstdc++-v3/testsuite/20_util/time_point_cast/rounding.cc
@@ -15,10 +15,8 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// { dg-do run { target c++11 } }
-
 // { dg-options "-std=gnu++17" }
-// { dg-do compile }
+// { dg-do compile { target c++17 } }
 
 #include 
 


[committed] libstdc++: Add -Wno-deprecated for tests that warn in C++20

2020-07-31 Thread Jonathan Wakely via Gcc-patches
libstdc++-v3/ChangeLog:

* testsuite/20_util/tuple/78939.cc: Suppress warnings about
deprecation of volatile-qualified structured bindings in C++20.
* testsuite/20_util/variable_templates_for_traits.cc: Likewise
for deprecation of is_pod in C++20

Tested x86_64-linux, committed to trunk.


commit 95edead9aab7a70636f87ea041f05469dc41d9a9
Author: Jonathan Wakely 
Date:   Fri Jul 31 17:51:00 2020

libstdc++: Add -Wno-deprecated for tests that warn in C++20

libstdc++-v3/ChangeLog:

* testsuite/20_util/tuple/78939.cc: Suppress warnings about
deprecation of volatile-qualified structured bindings in C++20.
* testsuite/20_util/variable_templates_for_traits.cc: Likewise
for deprecation of is_pod in C++20

diff --git a/libstdc++-v3/testsuite/20_util/tuple/78939.cc 
b/libstdc++-v3/testsuite/20_util/tuple/78939.cc
index d238f75bb8a..b2ff324b232 100644
--- a/libstdc++-v3/testsuite/20_util/tuple/78939.cc
+++ b/libstdc++-v3/testsuite/20_util/tuple/78939.cc
@@ -16,6 +16,7 @@
 // .
 
 // { dg-options "-std=gnu++17" }
+// { dg-additional-options "-Wno-deprecated" { target c++2a } }
 // { dg-do compile { target c++17 } }
 
 // PR libstdc++/78939
@@ -36,7 +37,7 @@ int
 test02()
 {
   A a{};
-  volatile auto [i, j] = a;
+  volatile auto [i, j] = a; // deprecated in C++20
   return i + j;
 }
 
@@ -44,6 +45,6 @@ int
 test03()
 {
   A a{};
-  const volatile auto [i, j] = a;
+  const volatile auto [i, j] = a; // deprecated in C++20
   return i + j;
 }
diff --git a/libstdc++-v3/testsuite/20_util/variable_templates_for_traits.cc 
b/libstdc++-v3/testsuite/20_util/variable_templates_for_traits.cc
index 0f1625a8cb6..9c1f82b5c86 100644
--- a/libstdc++-v3/testsuite/20_util/variable_templates_for_traits.cc
+++ b/libstdc++-v3/testsuite/20_util/variable_templates_for_traits.cc
@@ -1,5 +1,6 @@
 // { dg-options "-std=gnu++17" }
-// { dg-do compile }
+// { dg-additional-options "-Wno-deprecated" { target c++2a } }
+// { dg-do compile { target c++17 } }
 
 // Copyright (C) 2014-2020 Free Software Foundation, Inc.
 //
@@ -142,6 +143,7 @@ static_assert(is_standard_layout_v
 static_assert(!is_standard_layout_v
  && !is_standard_layout::value, "");
 
+// Deprecated in C++20
 static_assert(is_pod_v
  && is_pod::value, "");
 static_assert(!is_pod_v


[committed] libstdc++: Adjust tests that give different results in C++20

2020-07-31 Thread Jonathan Wakely via Gcc-patches
libstdc++-v3/ChangeLog:

* testsuite/20_util/is_aggregate/value.cc: Adjust for changes to
definition of aggregates in C++20.
* testsuite/20_util/optional/requirements.cc: Adjust for
defaulted comparisons in C++20.

Tested x86_64-linux, committed to trunk.


commit 8e2592a88821511aa45c3325246f3b08a88fa063
Author: Jonathan Wakely 
Date:   Fri Jul 31 17:51:00 2020

libstdc++: Adjust tests that give different results in C++20

libstdc++-v3/ChangeLog:

* testsuite/20_util/is_aggregate/value.cc: Adjust for changes to
definition of aggregates in C++20.
* testsuite/20_util/optional/requirements.cc: Adjust for
defaulted comparisons in C++20.

diff --git a/libstdc++-v3/testsuite/20_util/is_aggregate/value.cc 
b/libstdc++-v3/testsuite/20_util/is_aggregate/value.cc
index bef9dc4ca07..085eb557419 100644
--- a/libstdc++-v3/testsuite/20_util/is_aggregate/value.cc
+++ b/libstdc++-v3/testsuite/20_util/is_aggregate/value.cc
@@ -45,8 +45,6 @@ void test01()
UnionType>(true), "");
   static_assert(test_category(true), "");
-  static_assert(test_category(true), "");
   static_assert(test_category(true), "");
   static_assert(test_category(true), "");
   static_assert(test_category(true), "");
-  pos();
+#if __cplusplus == 201703L
+  static_assert(test_category(true), "");
+  pos();
+#endif
 
   // Negative tests.
   static_assert(test_category(false), "");
   neg();
+#if __cplusplus > 201703L
+  // In C++20 aggregates cannot have user-declared constructors.
+  static_assert(test_category(false), "");
+  neg();
+#endif
 }
diff --git a/libstdc++-v3/testsuite/20_util/optional/requirements.cc 
b/libstdc++-v3/testsuite/20_util/optional/requirements.cc
index 560e6f7691f..d8d52ab5367 100644
--- a/libstdc++-v3/testsuite/20_util/optional/requirements.cc
+++ b/libstdc++-v3/testsuite/20_util/optional/requirements.cc
@@ -312,7 +312,10 @@ struct JustEq {};
 bool operator==(const JustEq&, const JustEq&);
 
 static_assert(is_eq_comparable>::value, "");
+#if __cplusplus == 201703L
+// In C++20 operator!= can be synthesized from operator==
 static_assert(!is_neq_comparable>::value, "");
+#endif
 static_assert(!is_lt_comparable>::value, "");
 static_assert(!is_gt_comparable>::value, "");
 static_assert(!is_le_comparable>::value, "");


[committed] libstdc++: Remove accidental -std=gnu++17 from test

2020-07-31 Thread Jonathan Wakely via Gcc-patches
This was probably copied from a std::filesystem test and the -std option
wasn't removed.

libstdc++-v3/ChangeLog:

* testsuite/experimental/filesystem/filesystem_error/cons.cc:
Remove -std=gnu++17 option.

Tested x86_64-linux, committed to trunk.


commit ed0b4bb29a50d6be0e4b6411b3cc9f22967f1313
Author: Jonathan Wakely 
Date:   Fri Jul 31 18:02:10 2020

libstdc++: Remove accidental -std=gnu++17 from test

This was probably copied from a std::filesystem test and the -std option
wasn't removed.

libstdc++-v3/ChangeLog:

* testsuite/experimental/filesystem/filesystem_error/cons.cc:
Remove -std=gnu++17 option.

diff --git 
a/libstdc++-v3/testsuite/experimental/filesystem/filesystem_error/cons.cc 
b/libstdc++-v3/testsuite/experimental/filesystem/filesystem_error/cons.cc
index 19dcc2a19bf..0c7420c1461 100644
--- a/libstdc++-v3/testsuite/experimental/filesystem/filesystem_error/cons.cc
+++ b/libstdc++-v3/testsuite/experimental/filesystem/filesystem_error/cons.cc
@@ -15,7 +15,7 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// { dg-options "-std=gnu++17 -lstdc++fs" }
+// { dg-options "-lstdc++fs" }
 // { dg-do run { target c++11 } }
 // { dg-require-filesystem-ts "" }
 


Re: [PATCH] Define TARGET_TRULY_NOOP_TRUNCATION to false.

2020-07-31 Thread Tom de Vries
On 7/16/20 12:02 PM, Roger Sayle wrote:
> 
> Many thanks to Richard Biener for approving the midde-end
> patch that cleared the way for this one.  This nvptx patch
> defines the target hook TARGET_TRULY_NOOP_TRUNCATION to
> false, indicating that integer truncations require explicit
> instructions.  nvptx.c already defines TARGET_MODES_TIEABLE_P
> and TARGET_CAN_CHANGE_MODE_CLASS to false, and as (previously)
> documented that may require TARGET_TRULY_NOOP_TRUNCATION to
> be defined likewise.
> 
> This patch decreases the number of unexpected failures in
> the testsuite by 10, and increases the number of expected
> passes by 4, including these previous FAILs/ICEs:
> gcc.c-torture/compile/opout.c
> gcc.dg/torture/pr79125.c
> gcc.dg/tree-ssa/pr92085-1.c
> 

That's great :)

I did an investigation with a test program doing a truncation (
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96401 ) to get the context
clear in my head.

The patch looks good to me, on the basis of the rationale and the fact
that it fixes long-standing ICEs.

[ FWIW, my investigation showed that we may have a benefit from
TARGET_MODES_TIEABLE_P == true, so perhaps part of that rationale might
disappear, but I don't think that's relevant at this point. ]

> Unfortunately there is one testsuite failure that used to
> pass gcc.target/nvptx/v2si-cvt.c, but this isn't an ICE or
> incorrect code.  As explained in this test, the scan-assembler
> already isn't testing what it should.  Given that there are
> several (nvptx) patches pending review that affect code
> generation of this example, and (perhaps) more on the way,
> I propose letting this test FAIL for now until the dust
> settles and it becomes clear what instruction(s) should be
> generated (certainly not a cvt.u32.u32).
> 

I've filed the regression at
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96403.

> This patch has been tested on nvptx-none hosted on
> x86_64-pc-linux-gnu with "make" and "make check" with
> fewer ICEs and no wrong code regressions.
> Ok for mainline?
> 

Committed to trunk, with the FAILing tests replaced by a mention of the
regression PR.

Thanks,
- Tom


Re: [PATCH] c++: Fixing the wording of () aggregate-init [PR92812]

2020-07-31 Thread Marek Polacek via Gcc-patches
On Wed, Jul 29, 2020 at 07:21:46PM -0400, Jason Merrill via Gcc-patches wrote:
> On 7/20/20 7:28 PM, Marek Polacek wrote:
> > P1975R0 tweaks the static_cast wording: it says that "An expression e can be
> > explicitly converted to a type T if [...] T is an aggregate type having a 
> > first
> > element x and there is an implicit conversion sequence from e to the type of
> > x."  This already works for classes, e.g.:
> > 
> >struct Aggr { int x; int y; };
> >Aggr a = static_cast(1);
> > 
> > albeit I noticed a -Wmissing-field-initializer warning which is unlikely to 
> > be
> > helpful in this context, as there's nothing like static_cast(1, 2)
> > to quash that warning.
> 
> You could write Aggr{1,2} instead?
> 
> This warning could be helpful if they didn't realize that what they were
> writing is initializing one element of an aggregate.

Ok, I can drop it.  Though explicit cast usually suppress warnings like that.

> > However, the proposal also mentions "If T is ``array of unknown bound of 
> > U'',
> > this direct-initialization defines the type of the expression as U[1]" which
> > suggest that this should work for arrays (they're aggregates too, after 
> > all).
> > Ville, can you confirm that these
> > 
> >int (&&r)[3] = static_cast(42);
> >int (&&r2)[1] = static_cast(42);
> > 
> > are supposed to work now?  There's no {} variant to check.  Thanks.
> 
> I'll review the discussion of this later.

It's become clear that the array cases are supposed to work, so I think
let's drop this patch and I'll post a new patch when I get the array cases
working.

Thanks,
Marek



Re: PING [Patch][Middle-end]Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]

2020-07-31 Thread Uros Bizjak via Gcc-patches
22:05, tor., 28. jul. 2020 je oseba Qing Zhao 
napisala:
>
>
> Richard and Uros,
>
> Could you please review the change that H.J and I rewrote based on your
comments in the previous round of discussion?
>
> This patch is a nice security enhancement for GCC that has been requested
by security people for quite some time.
>
> Thanks a lot for your time.

I'll be away from the keyboard for the next week, but the patch needs a
middle end approval first.

That said, x86 parts looks OK.

Uros.
> Qing
>
> > On Jul 14, 2020, at 9:45 AM, Qing Zhao via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:
> >
> > Hi, Gcc team,
> >
> > This patch is a follow-up on the previous patch and corresponding
discussion:
> > https://gcc.gnu.org/pipermail/gcc-patches/2020-May/545101.html <
https://gcc.gnu.org/pipermail/gcc-patches/2020-May/545101.html>
> >
> > From the previous round of discussion, the major issues raised were:
> >
> > A. should be rewritten by using regsets infrastructure.
> > B. Put the patch into middle-end instead of x86 backend.
> >
> > This new patch is rewritten based on the above 2 comments.  The major
changes compared to the previous patch are:
> >
> > 1. Change the names of the option and attribute from
> > -mzero-caller-saved-regs=[skip|used-gpr|all-gpr|used|all]  and
zero_caller_saved_regs("skip|used-gpr|all-gpr||used|all”)
> > to:
> > -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]   and
zero_call_used_regs("skip|used-gpr|all-gpr||used|all”)
> > Add the new option and  new attribute in general.
> > 2. The main code generation part is moved from i386 backend to
middle-end;
> > 3. Add 4 target-hooks;
> > 4. Implement these 4 target-hooks on i386 backend.
> > 5. On a target that does not implement the target hook, issue error for
the new option, issue warning for the new attribute.
> >
> > The patch is as following:
> >
> > [PATCH] Add -fzero-call-used-regs=[skip|used-gpr|all-gpr|used|all]
> > command-line option and
> > zero_call_used_regs("skip|used-gpr|all-gpr||used|all") function
attribue:
> >
> >  1. -fzero-call-used-regs=skip and zero_call_used_regs("skip")
> >
> >  Don't zero call-used registers upon function return.
> >
> >  2. -fzero-call-used-regs=used-gpr and zero_call_used_regs("used-gpr")
> >
> >  Zero used call-used general purpose registers upon function return.
> >
> >  3. -fzero-call-used-regs=all-gpr and zero_call_used_regs("all-gpr")
> >
> >  Zero all call-used general purpose registers upon function return.
> >
> >  4. -fzero-call-used-regs=used and zero_call_used_regs("used")
> >
> >  Zero used call-used registers upon function return.
> >
> >  5. -fzero-call-used-regs=all and zero_call_used_regs("all")
> >
> >  Zero all call-used registers upon function return.
> >
> > The feature is implemented in middle-end. But currently is only valid
on X86.
> >
> > Tested on x86-64 and aarch64 with bootstrapping GCC trunk, making
> > -fzero-call-used-regs=used-gpr, -fzero-call-used-regs=all-gpr
> > -fzero-call-used-regs=used, and -fzero-call-used-regs=all enabled
> > by default on x86-64.
> >
> > Please take a look and let me know any more comment?
> >
> > thanks.
> >
> > Qing
> >
> >
> > 
> >
> > gcc/ChangeLog:
> >
> > 2020-07-13  qing zhao  >
> > 2020-07-13  H.J. Lu mailto:hjl.to...@gmail.com>>
> >
> >   * common.opt: Add new option -fzero-call-used-regs.
> >   * config/i386/i386.c (ix86_zero_call_used_regno_p): New function.
> >   (ix86_zero_call_used_regno_mode): Likewise.
> >   (ix86_zero_all_vector_registers): Likewise.
> >   (ix86_expand_prologue): Replace gen_prologue_use with
> >   gen_pro_epilogue_use.
> >   (TARGET_ZERO_CALL_USED_REGNO_P): Define.
> >   (TARGET_ZERO_CALL_USED_REGNO_MODE): Define.
> >   (TARGET_PRO_EPILOGUE_USE): Define.
> >   (TARGET_ZERO_ALL_VECTOR_REGISTERS): Define.
> >   * config/i386/i386.md: Replace UNSPECV_PROLOGUE_USE
> >   with UNSPECV_PRO_EPILOGUE_USE.
> >   * coretypes.h (enum zero_call_used_regs): New type.
> >   * doc/extend.texi: Document the new zero_call_used_regs attribute.
> >   * doc/invoke.texi: Document the new -fzero-call-used-regs option.
> >   * doc/tm.texi: Regenerate.
> >   * doc/tm.texi.in (TARGET_ZERO_CALL_USED_REGNO_P): New hook.
> >   (TARGET_ZERO_CALL_USED_REGNO_MODE): Likewise.
> >   (TARGET_PRO_EPILOGUE_USE): Likewise.
> >   (TARGET_ZERO_ALL_VECTOR_REGISTERS): Likewise.
> >   * function.c (is_live_reg_at_exit): New function.
> >   (gen_call_used_regs_seq): Likewise.
> >   (make_epilogue_seq): Call gen_call_used_regs_seq.
> >   * function.h (is_live_reg_at_exit): Declare.
> >   * target.def (zero_call_used_regno_p): New hook.
> >   (zero_call_used_regno_mode): Likewise.
> >   (pro_epilogue_use): Likewise.
> >   (zero_all_vector_registers): Likewise.
> >   * targhooks.c (default_zero_call_used_regno_p): New function.
> >   (default_zero_call_used_regno_mode): Likewise.
> >   * 

Re: [PATCH] debug/96383 - emit debug info for used external functions

2020-07-31 Thread David Edelsohn via Gcc-patches
On Thu, Jul 30, 2020 at 11:55:19AM +0200, Richard Biener wrote:

> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>
> OK for trunk and backports?

I'd go with it for trunk and 10.2.1 now and consider further backports
later.  Maybe even defer the 10.2.1 backport for two weeks.

I believe that this patch broke bootstrap on AIX when using DWARF.  I
am experiencing a new comparison failure.  All files differ.  The
difference is in the debugging information.

Thanks, David


[committed] libstdc++: Remove condition around friend declaration (PR 96382)

2020-07-31 Thread Jonathan Wakely via Gcc-patches
libstdc++-v3/ChangeLog:

PR libstdc++/96382
* include/bits/stl_iterator.h (reverse_iterator): Friend
declaration should not depend on __cplusplus.

Tested x86_64-linux, committed to trunk.


commit 8abab28bb5c0cd80063518d47494cb6078767b89
Author: Jonathan Wakely 
Date:   Fri Jul 31 19:55:28 2020

libstdc++: Remove condition around friend declaration (PR 96382)

libstdc++-v3/ChangeLog:

PR libstdc++/96382
* include/bits/stl_iterator.h (reverse_iterator): Friend
declaration should not depend on __cplusplus.

diff --git a/libstdc++-v3/include/bits/stl_iterator.h 
b/libstdc++-v3/include/bits/stl_iterator.h
index 60bb40a659f..08e25d842dd 100644
--- a/libstdc++-v3/include/bits/stl_iterator.h
+++ b/libstdc++-v3/include/bits/stl_iterator.h
@@ -129,10 +129,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  typename iterator_traits<_Iterator>::pointer,
   typename iterator_traits<_Iterator>::reference>
 {
-#if __cplusplus >= 201103L
   template
friend class reverse_iterator;
-#endif
 
 #if __cpp_lib_concepts
   // _GLIBCXX_RESOLVE_LIB_DEFECTS


[committed 1/5] libstdc++: Ensure c++NN effective target present in all C++17 tests

2020-07-31 Thread Jonathan Wakely via Gcc-patches
Also reorder some directives so that the dg-options setting -std=gnu++17
comes before the dg-do that requires c++17.

Tested powerpc64le-linux, committed to trunk.


commit 6458742a15f899a7d4e7c57bc649119e8f7aa086
Author: Jonathan Wakely 
Date:   Fri Jul 31 19:58:02 2020

libstdc++: Ensure c++NN effective target present in all C++17 tests

Also reorder some directives so that the dg-options setting -std=gnu++17
comes before the dg-do that requires c++17.

libstdc++-v3/ChangeLog:

* testsuite/17_intro/headers/c++2017/all_attributes.cc: Add
c++17 effective-target.
* testsuite/17_intro/headers/c++2017/all_no_exceptions.cc:
Likewise.
* testsuite/17_intro/headers/c++2017/all_no_rtti.cc: Likewise.
* testsuite/17_intro/headers/c++2017/all_pedantic_errors.cc:
Likewise.
* testsuite/17_intro/headers/c++2017/operator_names.cc:
Likewise.
* testsuite/17_intro/headers/c++2017/stdc++.cc: Likewise.
* testsuite/17_intro/headers/c++2017/stdc++_multiple_inclusion.cc:
Likewise.
* testsuite/18_support/uncaught_exceptions/uncaught_exceptions.cc:
Likewise.
* testsuite/19_diagnostics/error_code/is_error_code_v.cc:
Likewise.
* testsuite/20_util/any/assign/1.cc: Likewise.
* testsuite/20_util/any/assign/2.cc: Likewise.
* testsuite/20_util/any/assign/emplace.cc: Likewise.
* testsuite/20_util/any/assign/exception.cc: Likewise.
* testsuite/20_util/any/assign/self.cc: Likewise.
* testsuite/20_util/any/cons/1.cc: Likewise.
* testsuite/20_util/any/cons/2.cc: Likewise.
* testsuite/20_util/any/cons/aligned.cc: Likewise.
* testsuite/20_util/any/cons/explicit.cc: Likewise.
* testsuite/20_util/any/cons/in_place.cc: Likewise.
* testsuite/20_util/any/cons/nontrivial.cc: Likewise.
* testsuite/20_util/any/make_any.cc: Likewise.
* testsuite/20_util/any/misc/any_cast.cc: Likewise.
* testsuite/20_util/any/misc/any_cast_no_rtti.cc: Likewise.
* testsuite/20_util/any/misc/swap.cc: Likewise.
* testsuite/20_util/any/modifiers/1.cc: Likewise.
* testsuite/20_util/any/observers/type.cc: Likewise.
* testsuite/20_util/any/requirements.cc: Likewise.
* testsuite/20_util/any/typedefs.cc: Likewise.
* testsuite/20_util/as_const/1.cc: Likewise.
* testsuite/20_util/as_const/rvalue_neg.cc: Likewise.
* testsuite/20_util/bind/is_placeholder_v.cc: Likewise.
* testsuite/20_util/bool_constant/requirements.cc: Likewise.
* 
testsuite/20_util/duration/requirements/treat_as_floating_point_v.cc:
Likewise.
* testsuite/20_util/duration_cast/rounding.cc: Likewise.
* 
testsuite/20_util/enable_shared_from_this/members/weak_from_this.cc:
Likewise.
* testsuite/20_util/function_objects/invoke/59768.cc: Likewise.
* testsuite/20_util/function_objects/not_fn/1.cc: Likewise.
* testsuite/20_util/function_objects/searchers.cc: Likewise.
* testsuite/20_util/in_place/requirements.cc: Likewise.
* 
testsuite/20_util/is_invocable/requirements/explicit_instantiation.cc:
Likewise.
* testsuite/20_util/is_invocable/requirements/typedefs.cc:
Likewise.
* testsuite/20_util/is_invocable/value.cc: Likewise.
* 
testsuite/20_util/is_nothrow_invocable/requirements/explicit_instantiation.cc:
Likewise.
* testsuite/20_util/is_nothrow_invocable/requirements/typedefs.cc:
Likewise.
* 
testsuite/20_util/is_nothrow_swappable/requirements/explicit_instantiation.cc:
Likewise.
* testsuite/20_util/is_nothrow_swappable/requirements/typedefs.cc:
Likewise.
* testsuite/20_util/is_nothrow_swappable/value.cc: Likewise.
* 
testsuite/20_util/is_nothrow_swappable_with/requirements/explicit_instantiation.cc:
Likewise.
* 
testsuite/20_util/is_nothrow_swappable_with/requirements/typedefs.cc:
Likewise.
* testsuite/20_util/is_nothrow_swappable_with/value.cc:
Likewise.
* 
testsuite/20_util/is_swappable/requirements/explicit_instantiation.cc:
Likewise.
* testsuite/20_util/is_swappable/requirements/typedefs.cc:
Likewise.
* testsuite/20_util/is_swappable/value.cc: Likewise.
* 
testsuite/20_util/is_swappable_with/requirements/explicit_instantiation.cc:
Likewise.
* testsuite/20_util/is_swappable_with/requirements/typedefs.cc:
Likewise.
* testsuite/20_util/is_swappable_with/value.cc: Likewise.
 

[committed 2/5] libstdc++: Use c++NN_only effective target to tests

2020-07-31 Thread Jonathan Wakely via Gcc-patches
Some tests really are only intended for a specific -std mode, so add a
target selector to make that explicit.

Also reorder the dg-do directives to come after the dg-options ones, so
that the target selector in the dg-do directive is applied after the
dg-options that sets the -std option.

libstdc++-v3/ChangeLog:

* testsuite/20_util/reference_wrapper/83427.cc: Adjust
effective-target to specific language mode only.
* testsuite/24_iterators/headers/iterator/range_access_c++11.cc:
Likewise.
* testsuite/24_iterators/headers/iterator/range_access_c++14.cc:
Likewise.
* testsuite/24_iterators/headers/iterator/synopsis_c++11.cc:
Likewise.
* testsuite/24_iterators/headers/iterator/synopsis_c++14.cc:
Likewise.
* testsuite/26_numerics/valarray/69116.cc:
Likewise.
* testsuite/30_threads/headers/condition_variable/std_c++0x_neg.cc:
Remove whitespace at end of file.
* testsuite/30_threads/headers/future/std_c++0x_neg.cc:
Likewise.

Tested powerpc64le-linux, committed to trunk.


commit 566f42273467817a31a7a1ae058ba759837eec22
Author: Jonathan Wakely 
Date:   Fri Jul 31 19:58:02 2020

libstdc++: Use c++NN_only effective target to tests

Some tests really are only intended for a specific -std mode, so add a
target selector to make that explicit.

Also reorder the dg-do directives to come after the dg-options ones, so
that the target selector in the dg-do directive is applied after the
dg-options that sets the -std option.

libstdc++-v3/ChangeLog:

* testsuite/20_util/reference_wrapper/83427.cc: Adjust
effective-target to specific language mode only.
* testsuite/24_iterators/headers/iterator/range_access_c++11.cc:
Likewise.
* testsuite/24_iterators/headers/iterator/range_access_c++14.cc:
Likewise.
* testsuite/24_iterators/headers/iterator/synopsis_c++11.cc:
Likewise.
* testsuite/24_iterators/headers/iterator/synopsis_c++14.cc:
Likewise.
* testsuite/26_numerics/valarray/69116.cc:
Likewise.
* testsuite/30_threads/headers/condition_variable/std_c++0x_neg.cc:
Remove whitespace at end of file.
* testsuite/30_threads/headers/future/std_c++0x_neg.cc:
Likewise.

diff --git a/libstdc++-v3/testsuite/20_util/reference_wrapper/83427.cc 
b/libstdc++-v3/testsuite/20_util/reference_wrapper/83427.cc
index 3dbd0917621..613da848d72 100644
--- a/libstdc++-v3/testsuite/20_util/reference_wrapper/83427.cc
+++ b/libstdc++-v3/testsuite/20_util/reference_wrapper/83427.cc
@@ -16,7 +16,7 @@
 // .
 
 // { dg-options "-std=gnu++17" }
-// { dg-do compile { target c++17 } }
+// { dg-do compile { target c++17_only } }
 
 #include 
 
diff --git 
a/libstdc++-v3/testsuite/24_iterators/headers/iterator/range_access_c++11.cc 
b/libstdc++-v3/testsuite/24_iterators/headers/iterator/range_access_c++11.cc
index aed0ed45406..099288716e3 100644
--- a/libstdc++-v3/testsuite/24_iterators/headers/iterator/range_access_c++11.cc
+++ b/libstdc++-v3/testsuite/24_iterators/headers/iterator/range_access_c++11.cc
@@ -1,5 +1,5 @@
 // { dg-options "-std=gnu++11" }
-// { dg-do compile }
+// { dg-do compile { target c++11_only } }
 
 // Copyright (C) 2010-2020 Free Software Foundation, Inc.
 //
diff --git 
a/libstdc++-v3/testsuite/24_iterators/headers/iterator/range_access_c++14.cc 
b/libstdc++-v3/testsuite/24_iterators/headers/iterator/range_access_c++14.cc
index 2523dc3d696..f5923386b79 100644
--- a/libstdc++-v3/testsuite/24_iterators/headers/iterator/range_access_c++14.cc
+++ b/libstdc++-v3/testsuite/24_iterators/headers/iterator/range_access_c++14.cc
@@ -1,5 +1,5 @@
 // { dg-options "-std=gnu++14" }
-// { dg-do compile }
+// { dg-do compile { target c++14_only } }
 
 // Copyright (C) 2016-2020 Free Software Foundation, Inc.
 //
diff --git 
a/libstdc++-v3/testsuite/24_iterators/headers/iterator/synopsis_c++11.cc 
b/libstdc++-v3/testsuite/24_iterators/headers/iterator/synopsis_c++11.cc
index da7821e20ee..1fd115abd87 100644
--- a/libstdc++-v3/testsuite/24_iterators/headers/iterator/synopsis_c++11.cc
+++ b/libstdc++-v3/testsuite/24_iterators/headers/iterator/synopsis_c++11.cc
@@ -1,5 +1,5 @@
 // { dg-options "-std=gnu++11" }
-// { dg-do compile }
+// { dg-do compile { target c++11_only } }
 // { dg-require-normal-namespace "" }
 
 // Copyright (C) 2016-2020 Free Software Foundation, Inc.
diff --git 
a/libstdc++-v3/testsuite/24_iterators/headers/iterator/synopsis_c++14.cc 
b/libstdc++-v3/testsuite/24_iterators/headers/iterator/synopsis_c++14.cc
index c2dd942f6e7..f296c852e76 100644
--- a/libstdc++-v3/testsuite/24_iterators/headers/iterator/synopsis_c++14.cc
+++ b/libstdc++-v3/testsuite/24_iterators/headers/iterator/synopsis_c++14.cc
@@ -1,5 +1,5 @@
 // { dg-options "-std=gnu++14" }
-// { dg-

[committed 3/5] libstdc++: Add dg-require-effective-target to std::span assert tests

2020-07-31 Thread Jonathan Wakely via Gcc-patches
The current dg directives say that the tests can run for any standard
mode, but should fail for C++20. What we want is that they only run for
C++20, and are always expected to fail.

libstdc++-v3/ChangeLog:

* testsuite/23_containers/span/back_assert_neg.cc: Split c++2a
effective-target from xfail selector.
* testsuite/23_containers/span/first_2_assert_neg.cc: Likewise.
* testsuite/23_containers/span/first_assert_neg.cc: Likewise.
* testsuite/23_containers/span/front_assert_neg.cc: Likewise.
* testsuite/23_containers/span/index_op_assert_neg.cc: Likewise.
* testsuite/23_containers/span/last_2_assert_neg.cc: Likewise.
* testsuite/23_containers/span/last_assert_neg.cc: Likewise.
* testsuite/23_containers/span/subspan_2_assert_neg.cc:
Likewise.
* testsuite/23_containers/span/subspan_3_assert_neg.cc:
Likewise.
* testsuite/23_containers/span/subspan_4_assert_neg.cc:
Likewise.
* testsuite/23_containers/span/subspan_5_assert_neg.cc:
Likewise.
* testsuite/23_containers/span/subspan_6_assert_neg.cc:
Likewise.
* testsuite/23_containers/span/subspan_assert_neg.cc: Likewise.

Tested powerpc64le-linux, committed to trunk.


commit a92e0f58d83eff14bd95cf655903debb1d1a03ad
Author: Jonathan Wakely 
Date:   Fri Jul 31 19:58:02 2020

libstdc++: Add dg-require-effective-target to std::span assert tests

The current dg directives say that the tests can run for any standard
mode, but should fail for C++20. What we want is that they only run for
C++20, and are always expected to fail.

libstdc++-v3/ChangeLog:

* testsuite/23_containers/span/back_assert_neg.cc: Split c++2a
effective-target from xfail selector.
* testsuite/23_containers/span/first_2_assert_neg.cc: Likewise.
* testsuite/23_containers/span/first_assert_neg.cc: Likewise.
* testsuite/23_containers/span/front_assert_neg.cc: Likewise.
* testsuite/23_containers/span/index_op_assert_neg.cc: Likewise.
* testsuite/23_containers/span/last_2_assert_neg.cc: Likewise.
* testsuite/23_containers/span/last_assert_neg.cc: Likewise.
* testsuite/23_containers/span/subspan_2_assert_neg.cc:
Likewise.
* testsuite/23_containers/span/subspan_3_assert_neg.cc:
Likewise.
* testsuite/23_containers/span/subspan_4_assert_neg.cc:
Likewise.
* testsuite/23_containers/span/subspan_5_assert_neg.cc:
Likewise.
* testsuite/23_containers/span/subspan_6_assert_neg.cc:
Likewise.
* testsuite/23_containers/span/subspan_assert_neg.cc: Likewise.

diff --git a/libstdc++-v3/testsuite/23_containers/span/back_assert_neg.cc 
b/libstdc++-v3/testsuite/23_containers/span/back_assert_neg.cc
index 76f2a7eb286..f536f91deee 100644
--- a/libstdc++-v3/testsuite/23_containers/span/back_assert_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/span/back_assert_neg.cc
@@ -16,7 +16,8 @@
 // .
 
 // { dg-options "-std=gnu++2a" }
-// { dg-do run { xfail c++2a } }
+// { dg-do run { xfail *-*-* } }
+// { dg-require-effective-target c++2a }
 
 #undef _GLIBCXX_ASSERTIONS
 #define _GLIBCXX_ASSERTIONS
diff --git a/libstdc++-v3/testsuite/23_containers/span/first_2_assert_neg.cc 
b/libstdc++-v3/testsuite/23_containers/span/first_2_assert_neg.cc
index 0019fca5b8b..d959e5c8d1a 100644
--- a/libstdc++-v3/testsuite/23_containers/span/first_2_assert_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/span/first_2_assert_neg.cc
@@ -16,7 +16,8 @@
 // .
 
 // { dg-options "-std=gnu++2a" }
-// { dg-do run { xfail c++2a } }
+// { dg-do run { xfail *-*-* } }
+// { dg-require-effective-target c++2a }
 
 #undef _GLIBCXX_ASSERTIONS
 #define _GLIBCXX_ASSERTIONS
diff --git a/libstdc++-v3/testsuite/23_containers/span/first_assert_neg.cc 
b/libstdc++-v3/testsuite/23_containers/span/first_assert_neg.cc
index 7b93abc9a92..096f78c6405 100644
--- a/libstdc++-v3/testsuite/23_containers/span/first_assert_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/span/first_assert_neg.cc
@@ -16,7 +16,8 @@
 // .
 
 // { dg-options "-std=gnu++2a" }
-// { dg-do run { xfail c++2a } }
+// { dg-do run { xfail *-*-* } }
+// { dg-require-effective-target c++2a }
 
 #undef _GLIBCXX_ASSERTIONS
 #define _GLIBCXX_ASSERTIONS
diff --git a/libstdc++-v3/testsuite/23_containers/span/front_assert_neg.cc 
b/libstdc++-v3/testsuite/23_containers/span/front_assert_neg.cc
index 62dec39c6a0..8687f18a87f 100644
--- a/libstdc++-v3/testsuite/23_containers/span/front_assert_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/span/front_assert_neg.cc
@@ -16,7 +16,8 @@
 // .
 
 // { dg-options "-std=gnu++2a" }
-// { dg-do run { xfail c++2a } }
+// { dg-do run { xfail *-*-* } }
+// {

[committed 4/5] libstdc++: Ensure c++NN effective-target present in more tests

2020-07-31 Thread Jonathan Wakely via Gcc-patches
Add effective-target keywords to tests that would fail for certain
standard modes without the -std=gnu++NN option.

libstdc++-v3/ChangeLog:

* testsuite/18_support/set_terminate.cc: Require C++11 or
higher.
* testsuite/28_regex/simple_c++11.cc: Likewise.
* testsuite/tr1/headers/c++200x/complex.cc: Likewise.
* testsuite/24_iterators/headers/iterator/synopsis.cc:
Require C++14 or lower.

Tested powerpc64le-linux, committed to trunk.


commit 9d613af2b45d910b37044cdbd5f8bb2ac279d06d
Author: Jonathan Wakely 
Date:   Fri Jul 31 19:58:03 2020

libstdc++: Ensure c++NN effective-target present in more tests

Add effective-target keywords to tests that would fail for certain
standard modes without the -std=gnu++NN option.

libstdc++-v3/ChangeLog:

* testsuite/18_support/set_terminate.cc: Require C++11 or
higher.
* testsuite/28_regex/simple_c++11.cc: Likewise.
* testsuite/tr1/headers/c++200x/complex.cc: Likewise.
* testsuite/24_iterators/headers/iterator/synopsis.cc:
Require C++14 or lower.

diff --git a/libstdc++-v3/testsuite/18_support/set_terminate.cc 
b/libstdc++-v3/testsuite/18_support/set_terminate.cc
index 4df6e068f58..b832f30a365 100644
--- a/libstdc++-v3/testsuite/18_support/set_terminate.cc
+++ b/libstdc++-v3/testsuite/18_support/set_terminate.cc
@@ -16,7 +16,7 @@
 // .
 
 // { dg-options "-std=gnu++11" }
-// { dg-do run }
+// { dg-do run { target c++11 } }
 
 #include 
 #include 
diff --git a/libstdc++-v3/testsuite/24_iterators/headers/iterator/synopsis.cc 
b/libstdc++-v3/testsuite/24_iterators/headers/iterator/synopsis.cc
index e0b364c3834..53bd9c9891c 100644
--- a/libstdc++-v3/testsuite/24_iterators/headers/iterator/synopsis.cc
+++ b/libstdc++-v3/testsuite/24_iterators/headers/iterator/synopsis.cc
@@ -1,5 +1,5 @@
 // { dg-options "-std=gnu++98" }
-// { dg-do compile }
+// { dg-do compile { target { ! c++17 } } }
 // { dg-require-normal-namespace "" }
 
 // Copyright (C) 2007-2020 Free Software Foundation, Inc.
diff --git a/libstdc++-v3/testsuite/28_regex/simple_c++11.cc 
b/libstdc++-v3/testsuite/28_regex/simple_c++11.cc
index 056966b238f..8b3fda5c98d 100644
--- a/libstdc++-v3/testsuite/28_regex/simple_c++11.cc
+++ b/libstdc++-v3/testsuite/28_regex/simple_c++11.cc
@@ -16,7 +16,7 @@
 // .
 
 // { dg-options "-std=gnu++11" }
-// { dg-do compile }
+// { dg-do compile { target c++11 } }
 
 #include 
 
diff --git a/libstdc++-v3/testsuite/tr1/headers/c++200x/complex.cc 
b/libstdc++-v3/testsuite/tr1/headers/c++200x/complex.cc
index 9eb9d33301c..652b534cd69 100644
--- a/libstdc++-v3/testsuite/tr1/headers/c++200x/complex.cc
+++ b/libstdc++-v3/testsuite/tr1/headers/c++200x/complex.cc
@@ -1,5 +1,5 @@
-// { dg-do compile }
 // { dg-options "-std=gnu++11" }
+// { dg-do compile { target c++11 } }
 
 // Copyright (C) 2011-2020 Free Software Foundation, Inc.
 //


[committed 5/5] libstdc++: Avoid using __float128 in strict modes

2020-07-31 Thread Jonathan Wakely via Gcc-patches
libstdc++-v3/ChangeLog:

* testsuite/26_numerics/numbers/float128.cc: Check
__STRICT_ANSI__ before using __float128.
* 
testsuite/std/concepts/concepts.lang/concept.arithmetic/floating_point.cc:
Likewise.

Tested powerpc64le-linux, committed to trunk.


commit dc8c00966ef89581447f549ebdea98dd72003fd8
Author: Jonathan Wakely 
Date:   Fri Jul 31 19:58:03 2020

libstdc++: Avoid using __float128 in strict modes

libstdc++-v3/ChangeLog:

* testsuite/26_numerics/numbers/float128.cc: Check
__STRICT_ANSI__ before using __float128.
* 
testsuite/std/concepts/concepts.lang/concept.arithmetic/floating_point.cc:
Likewise.

diff --git a/libstdc++-v3/testsuite/26_numerics/numbers/float128.cc 
b/libstdc++-v3/testsuite/26_numerics/numbers/float128.cc
index 51ce21570e0..421af7af67f 100644
--- a/libstdc++-v3/testsuite/26_numerics/numbers/float128.cc
+++ b/libstdc++-v3/testsuite/26_numerics/numbers/float128.cc
@@ -20,7 +20,7 @@
 
 #include 
 
-#if defined(_GLIBCXX_USE_FLOAT128)
+#if !defined(__STRICT_ANSI__) && defined(_GLIBCXX_USE_FLOAT128)
 void
 test01()
 {
diff --git 
a/libstdc++-v3/testsuite/std/concepts/concepts.lang/concept.arithmetic/floating_point.cc
 
b/libstdc++-v3/testsuite/std/concepts/concepts.lang/concept.arithmetic/floating_point.cc
index 1e7d649b6a7..9617889505f 100644
--- 
a/libstdc++-v3/testsuite/std/concepts/concepts.lang/concept.arithmetic/floating_point.cc
+++ 
b/libstdc++-v3/testsuite/std/concepts/concepts.lang/concept.arithmetic/floating_point.cc
@@ -23,7 +23,7 @@
 static_assert( std::floating_point );
 static_assert( std::floating_point );
 static_assert( std::floating_point );
-#ifdef _GLIBCXX_USE_FLOAT128
+#if !defined(__STRICT_ANSI__) && defined(_GLIBCXX_USE_FLOAT128)
 static_assert( std::floating_point<__float128> );
 #endif
 


[committed 1/2] libstdc++: Fix tests that fail for C++98

2020-07-31 Thread Jonathan Wakely via Gcc-patches
libstdc++-v3/ChangeLog:

* testsuite/27_io/basic_istream/ignore/char/94749.cc: Use 0
instead of nullptr.
* testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc:
Likewise.

Tested powerpc64le-linux, committed to trunk.


commit 8011f718e241febd6b7a9dae01cde49817f299c4
Author: Jonathan Wakely 
Date:   Fri Jul 31 19:58:03 2020

libstdc++: Fix tests that fail for C++98

libstdc++-v3/ChangeLog:

* testsuite/27_io/basic_istream/ignore/char/94749.cc: Use 0
instead of nullptr.
* testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc:
Likewise.

diff --git a/libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/94749.cc 
b/libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/94749.cc
index 32b604d5b15..21097c2bff1 100644
--- a/libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/94749.cc
+++ b/libstdc++-v3/testsuite/27_io/basic_istream/ignore/char/94749.cc
@@ -165,7 +165,7 @@ test05()
   VERIFY(in.gcount() == std::numeric_limits::max());
   VERIFY(in.get() == T::eof());
 
-  delete in.rdbuf(nullptr);
+  delete in.rdbuf(0);
 }
 
 void
@@ -210,7 +210,7 @@ test06()
   VERIFY(in.gcount() == std::numeric_limits::max());
   VERIFY(in.get() == T::eof());
 
-  delete in.rdbuf(nullptr);
+  delete in.rdbuf(0);
 }
 
 int
diff --git a/libstdc++-v3/testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc 
b/libstdc++-v3/testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc
index 23d3a0ad170..2473588d307 100644
--- a/libstdc++-v3/testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc
+++ b/libstdc++-v3/testsuite/27_io/basic_istream/ignore/wchar_t/94749.cc
@@ -165,7 +165,7 @@ test05()
   VERIFY(in.gcount() == std::numeric_limits::max());
   VERIFY(in.get() == T::eof());
 
-  delete in.rdbuf(nullptr);
+  delete in.rdbuf(0);
 }
 
 void
@@ -210,7 +210,7 @@ test06()
   VERIFY(in.gcount() == std::numeric_limits::max());
   VERIFY(in.get() == T::eof());
 
-  delete in.rdbuf(nullptr);
+  delete in.rdbuf(0);
 }
 
 int


[committed 2/2] libstdc++: Fix test that fails for C++98

2020-07-31 Thread Jonathan Wakely via Gcc-patches

Local classes have no linkage so cannot be used as template arguments in
C++98.

libstdc++-v3/ChangeLog:

* 
testsuite/20_util/specialized_algorithms/uninitialized_fill_n/sizes.cc:
Move struct to namespace scope.


Tested powerpc64le-linux, committed to trunk.
commit f07fa7a31c89811ad9ffdd9831177cc815f098d2
Author: Jonathan Wakely 
Date:   Fri Jul 31 19:58:03 2020

libstdc++: Fix test that fails for C++98

Local classes have no linkage so cannot be used as template arguments in
C++98.

libstdc++-v3/ChangeLog:

* testsuite/20_util/specialized_algorithms/uninitialized_fill_n/sizes.cc:
Move struct to namespace scope.

diff --git a/libstdc++-v3/testsuite/20_util/specialized_algorithms/uninitialized_fill_n/sizes.cc b/libstdc++-v3/testsuite/20_util/specialized_algorithms/uninitialized_fill_n/sizes.cc
index eb957e148da..f6e7a83c58d 100644
--- a/libstdc++-v3/testsuite/20_util/specialized_algorithms/uninitialized_fill_n/sizes.cc
+++ b/libstdc++-v3/testsuite/20_util/specialized_algorithms/uninitialized_fill_n/sizes.cc
@@ -31,19 +31,19 @@ test01()
   VERIFY( i[3] == 0 );
 }
 
+// The standard only requires that n>0 and --n are valid expressions.
+struct Size
+{
+  int value;
+
+  void operator--() { --value; }
+
+  int operator>(void*) { return value != 0; }
+};
+
 void
 test02()
 {
-  // The standard only requires that n>0 and --n are valid expressions.
-  struct Size
-  {
-int value;
-
-void operator--() { --value; }
-
-int operator>(void*) { return value != 0; }
-  };
-
   int i[5] = { };
   Size n = {4};
   std::uninitialized_fill_n(i, n, 0xdcba);


Re: [PATCH] debug/96383 - emit debug info for used external functions

2020-07-31 Thread Richard Biener
On July 31, 2020 8:44:49 PM GMT+02:00, David Edelsohn  wrote:
>On Thu, Jul 30, 2020 at 11:55:19AM +0200, Richard Biener wrote:
>
>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>>
>> OK for trunk and backports?
>
>I'd go with it for trunk and 10.2.1 now and consider further backports
>later.  Maybe even defer the 10.2.1 backport for two weeks.
>
>I believe that this patch broke bootstrap on AIX when using DWARF.  I
>am experiencing a new comparison failure.  All files differ.  The
>difference is in the debugging information.

Can you open a bug and attach two sample files so I can see how they differ?

Thanks, 
Richard. 

>Thanks, David



Re: [PATCH] c++: Use error_at rather than warning_at for missing return in constexpr functions [PR96182]

2020-07-31 Thread Jason Merrill via Gcc-patches

On 7/31/20 3:52 AM, Jakub Jelinek wrote:

On Wed, Jul 29, 2020 at 03:30:07PM -0400, Jason Merrill via Gcc-patches wrote:

On 7/14/20 4:50 AM, Jakub Jelinek wrote:

For C++11 we already emit an error if a constexpr function doesn't contain
a return statement, because in C++11 that is the only thing it needs to
contain, but for C++14 we would normally issue a -Wreturn-type warning.

As mentioned by Jonathan, such constexpr functions are invalid, no
diagnostics required, because there doesn't exist any arguments for
which it would result in valid constant expression.

This raises it to an error in such cases.  The !LAMBDA_TYPE_P case
is to avoid error on g++.dg/pr81194.C where the user didn't write
constexpr anywhere and the operator() is compiler generated.


We set DECL_DECLARED_CONSTEXPR_P on lambdas earlier in finish_function if
suitable; can we move this diagnostic up before that?


That works too.  Bootstrapped/regtested on x86_64-linux and i686-linux, ok
for trunk then?


OK.


2020-07-31  Jakub Jelinek  

PR c++/96182
* decl.c (finish_function): In constexpr functions use for C++14 and
later error instead of warning if no return statement is present and
diagnose it regardless of warn_return_type.  Move the warn_return_type
diagnostics earlier in the function.

* g++.dg/cpp1y/constexpr-96182.C: New test.
* g++.dg/other/error35.C (S::g()): Add return statement.
* g++.dg/cpp1y/pr63996.C (foo): Likewise.
* g++.dg/cpp1y/constexpr-return2.C (f): Likewise.
* g++.dg/cpp1y/var-templ44.C (make_array): Add throw 1.

--- gcc/cp/decl.c.jj2020-07-29 11:57:23.340517489 +0200
+++ gcc/cp/decl.c   2020-07-30 20:44:33.634951396 +0200
@@ -17112,6 +17112,51 @@ finish_function (bool inline_p)
  DECL_ATTRIBUTES (fndecl)))
omp_declare_variant_finalize (fndecl, attr);
  
+  /* Complain if there's just no return statement.  */

+  if ((warn_return_type
+   || (cxx_dialect >= cxx14
+  && DECL_DECLARED_CONSTEXPR_P (fndecl)))
+  && !VOID_TYPE_P (TREE_TYPE (fntype))
+  && !dependent_type_p (TREE_TYPE (fntype))
+  && !current_function_returns_value && !current_function_returns_null
+  /* Don't complain if we abort or throw.  */
+  && !current_function_returns_abnormally
+  /* Don't complain if there's an infinite loop.  */
+  && !current_function_infinite_loop
+  /* Don't complain if we are declared noreturn.  */
+  && !TREE_THIS_VOLATILE (fndecl)
+  && !DECL_NAME (DECL_RESULT (fndecl))
+  && !TREE_NO_WARNING (fndecl)
+  /* Structor return values (if any) are set by the compiler.  */
+  && !DECL_CONSTRUCTOR_P (fndecl)
+  && !DECL_DESTRUCTOR_P (fndecl)
+  && targetm.warn_func_return (fndecl))
+{
+  gcc_rich_location richloc (input_location);
+  /* Potentially add a "return *this;" fix-it hint for
+assignment operators.  */
+  if (IDENTIFIER_ASSIGN_OP_P (DECL_NAME (fndecl)))
+   {
+ tree valtype = TREE_TYPE (DECL_RESULT (fndecl));
+ if (TREE_CODE (valtype) == REFERENCE_TYPE
+ && current_class_ref
+ && same_type_ignoring_top_level_qualifiers_p
+ (TREE_TYPE (valtype), TREE_TYPE (current_class_ref))
+ && global_dc->option_enabled (OPT_Wreturn_type,
+   global_dc->lang_mask,
+   global_dc->option_state))
+   add_return_star_this_fixit (&richloc, fndecl);
+   }
+  if (cxx_dialect >= cxx14
+ && DECL_DECLARED_CONSTEXPR_P (fndecl))
+   error_at (&richloc, "no return statement in % function "
+   "returning non-void");
+  else if (warning_at (&richloc, OPT_Wreturn_type,
+  "no return statement in function returning "
+  "non-void"))
+   TREE_NO_WARNING (fndecl) = 1;
+}
+
/* Lambda closure members are implicitly constexpr if possible.  */
if (cxx_dialect >= cxx17
&& LAMBDA_TYPE_P (CP_DECL_CONTEXT (fndecl)))
@@ -17163,44 +17208,6 @@ finish_function (bool inline_p)
   to the FUNCTION_DECL node itself.  */
BLOCK_SUPERCONTEXT (DECL_INITIAL (fndecl)) = fndecl;
  
-  /* Complain if there's just no return statement.  */

-  if (warn_return_type
-  && !VOID_TYPE_P (TREE_TYPE (fntype))
-  && !dependent_type_p (TREE_TYPE (fntype))
-  && !current_function_returns_value && !current_function_returns_null
-  /* Don't complain if we abort or throw.  */
-  && !current_function_returns_abnormally
-  /* Don't complain if there's an infinite loop.  */
-  && !current_function_infinite_loop
-  /* Don't complain if we are declared noreturn.  */
-  && !TREE_THIS_VOLATILE (fndecl)
-  && !DECL_NAME (DECL_RESULT (fndecl))
-  && !TREE_NO_WARNING (fndecl)
-  /* Structor return values (if any) are set by the compiler.

Re: [PATCH] c++: Add __builtin_bit_cast to implement std::bit_cast [PR93121]

2020-07-31 Thread Jason Merrill via Gcc-patches

On 7/31/20 6:06 AM, Jakub Jelinek wrote:

On Fri, Jul 31, 2020 at 10:54:46AM +0100, Jonathan Wakely wrote:

Does the standard require that somewhere?  Because that is not what the
compiler implements right now.


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78620


But does that imply that all CONSTRUCTORs without CONSTRUCTOR_NO_CLEARING
need to be treated that way?  I mean, aren't such CONSTRUCTORs used also for
other initializations?


Yes, they are also used to represent constant values of classes that are 
initialized by constexpr constructor.



And, are the default copy constructors or assignment operators supposed to
also copy the padding bits, or do they become unspecified again through
that?


For a non-union class, a defaulted copy is defined as memberwise copy, 
not a copy of the entire object representation.  So I guess strictly 
speaking the padding bits do become unspecified.  But I think if the 
copy is trivial, in practice all implementations do copy the object 
representation; perhaps the specification should adjust accordingly.


Jason



Re: [PATCH] c++: decl_constant_value and unsharing [PR96197]

2020-07-31 Thread Jason Merrill via Gcc-patches

On 7/31/20 2:07 AM, Richard Biener wrote:

On Thu, Jul 30, 2020 at 8:55 PM Patrick Palka via Gcc-patches
 wrote:


On Thu, 30 Jul 2020, Jason Merrill wrote:


On 7/30/20 9:49 AM, Patrick Palka wrote:

On Thu, 30 Jul 2020, Jason Merrill wrote:


On 7/21/20 3:07 PM, Patrick Palka wrote:

In the testcase from the PR we are seeing excessive memory use (> 5GB)
during constexpr evaluation, almost all of which is due to the call to
decl_constant_value in the VAR_DECL/CONST_DECL branch of
cxx_eval_constant_expression.  We reach here every time we evaluate an
ARRAY_REF of a constexpr VAR_DECL, which in this testcase is quite
often, and from there decl_constant_value makes an unshared copy of the
VAR_DECL's initializer, even though the unsharing is not needed at this
call site (because it is up to callers of cxx_eval_constant_expression
to unshare).

To fix this, this patch moves the responsibility of unsharing the result
of decl_constant_value, decl_really_constant_value and
scalar_constant_value from the callee to the caller.


How about creating another entry point that doesn't unshare, and using
that in
constexpr evaluation?


Is something like this what you have in mind?  This adds a defaulted
bool parameter to the three routines that controls unsharing (except for
decl_constant_value, which instead needs a new overload if we don't want
to touch its common declaration in c-common.h.)  Bootstrap and regtest
in progress.


That looks good, though I don't think we need the added parameter for
scalar_constant_value.


Hmm, I guess it should always be cheap to unshare an scalar initializer.
So consider the parameter removed for scalar_constant_value.




-- >8 --

Subject: [PATCH] c++: decl_constant_value and unsharing [PR96197]

In the testcase from the PR we are seeing excessive memory use (> 5GB)
during constexpr evaluation, almost all of which is due to the call to
decl_constant_value in the VAR_DECL/CONST_DECL branch of
cxx_eval_constant_expression.  We reach here every time we evaluate an
ARRAY_REF of a constexpr VAR_DECL, which in this testcase is quite
often, and from there decl_constant_value makes an unshared copy of the
VAR_DECL's initializer, even though the unsharing is not needed at this
call site (because callers of cxx_eval_constant_expression already
unshare its result when necessary).

To fix this excessive unsharing, this patch introduces a new defaulted
parameter unshare_p to scalar_constant_value, decl_really_constant_value
and decl_constant_value to allow callers to decide if these routines
should unshare their result before returning.  (Since decl_constant_value
is declared in c-common.h, it instead gets a new overload declared in
cp-tree.h.)

As a simplification, this patch also moves the call to unshare_expr in
constant_value_1 outside of the loop, since calling unshare_expr on a
DECL_P should be a no-op.

Additionally, in unify there is one call to scalar_constant_value that
seems to be dead code since we apparently don't see unlowered
enumerators anymore, so this patch replaces it with an appropriate
gcc_assert.


I'll also push this change as a separate patch, now that
scalar_constant_value is unrelated to the rest of the patch.

Here is the main patch that I guess I'll commit after a full bootstrap
and regtest:


Might be a good candidate for backporting to GCC 10 as well after
a while - it at least looks simple enough.


Agreed.




-- >8 --

Subject: [PATCH] c++: decl_constant_value and unsharing [PR96197]

In the testcase from the PR we are seeing excessive memory use (> 5GB)
during constexpr evaluation, almost all of which is due to the call to
decl_constant_value in the VAR_DECL/CONST_DECL branch of
cxx_eval_constant_expression.  We reach here every time we evaluate an
ARRAY_REF of a constexpr VAR_DECL, which in this testcase is quite
often, and from there decl_constant_value makes an unshared copy of the
VAR_DECL's initializer, even though the unsharing is not needed at this
call site (because callers of cxx_eval_constant_expression already
unshare its result when necessary).

To fix this excessive unsharing, this patch introduces a new defaulted
parameter unshare_p to decl_really_constant_value and
decl_constant_value to allow callers to choose whether these routines
should unshare the returned result.  (Since decl_constant_value is
declared in c-common.h, we introduce a new overload declared in
cp-tree.h instead of changing its existing declaration.)

As a simplification, this patch also moves the call to unshare_expr in
constant_value_1 outside of the loop, since calling unshare_expr on a
DECL_P should be a no-op.

Now that the the calls to decl_constant_value and
decl_really_constant_value from cxx_eval_constant_expression no longer
unshare their result, memory use during constexpr evaluation for the
testcase from the PR falls from ~5GB to 15MB according to -ftime-report.

gcc/cp/ChangeLog:

 PR c++/96197
 * constexpr.c (cxx_eval_constant_expression) :

Re: [PATCH] c: Fix bogus vector initialisation error [PR96377]

2020-07-31 Thread Joseph Myers
On Fri, 31 Jul 2020, Richard Sandiford wrote:

> Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.
> OK to instal?

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


[committed] wwwdocs: Move www.validlab.com to https.

2020-07-31 Thread Gerald Pfeifer
Pushed.

Gerald
---
 htdocs/readings.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/htdocs/readings.html b/htdocs/readings.html
index 2488ca9d..f417ed6d 100644
--- a/htdocs/readings.html
+++ b/htdocs/readings.html
@@ -598,11 +598,11 @@ names.
 
 
 
-  http://www.validlab.com/goldberg/paper.pdf";>What Every
+  https://www.validlab.com/goldberg/paper.pdf";>What Every
   Computer Scientist Should Know about Floating-Point Arithmetic
   by David Goldberg, including Doug Priest's supplement (PDF format)
 
-  http://www.validlab.com/goldberg/addendum.html";>Differences
+  https://www.validlab.com/goldberg/addendum.html";>Differences
   Among IEEE 754 Implementations
   by Doug Priest (included in the PostScript-format document above)
 
-- 
2.27.0


Re: std::vector code cleanup fixes optimizations

2020-07-31 Thread François Dumont via Gcc-patches

On 17/07/20 12:36 pm, Jonathan Wakely wrote:

On 16/12/19 08:18 +0100, François Dumont wrote:
A small refresh on this patch now tested also for versioned namespace 
which require printers.py to be updated.


Note that this simplification works also for normal mode so I can 
apply it independently from the stl_bvector.h part.



    * include/bits/stl_bvector.h
    [_GLIBCXX_INLINE_VERSION](_Bvector_impl_data::_M_start): 
Define as

    _Bit_type*.
    (_Bvector_impl_data(const _Bvector_impl_data&)): Default.
    (_Bvector_impl_data(_Bvector_impl_data&&)): Delegate to latter.
    (_Bvector_impl_data::operator=(const _Bvector_impl_data&)): 
Default.

(_Bvector_impl_data::_M_move_data(_Bvector_impl_data&&)): Use latter.
    (_Bvector_impl_data::_M_reset()): Likewise.
    (_Bvector_impl_data::_M_swap_data): New.
    (_Bvector_impl::_Bvector_impl(_Bvector_impl&&)): Implement 
explicitely.
    (_Bvector_impl::_Bvector_impl(_Bit_alloc_type&&, 
_Bvector_impl&&)): New.
    (_Bvector_base::_Bvector_base(_Bvector_base&&, const 
allocator_type&)):

    New, use latter.
    (vector::vector(vector&&, const allocator_type&, true_type)): 
New, use

    latter.
    (vector::vector(vector&&, const allocator_type&, false_type)): 
New.

    (vector::vector(vector&&, const allocator_type&)): Use latters.
    (vector::vector(const vector&, const allocator_type&)): Adapt.
    [__cplusplus >= 201103](vector::vector(_InputIt, _InputIt,
    const allocator_type&)): Use _M_initialize_range.
    (vector::operator[](size_type)): Use iterator operator[].
    (vector::operator[](size_type) const): Use const_iterator 
operator[].
    (vector::swap(vector&)): Add assertions on allocators. Use 
_M_swap_data.

    [__cplusplus >= 201103](vector::insert(const_iterator, _InputIt,
    _InputIt)): Use _M_insert_range.
    (vector::_M_initialize(size_type)): Adapt.
    [__cplusplus >= 201103](vector::_M_initialize_dispatch): Remove.
    [__cplusplus >= 201103](vector::_M_insert_dispatch): Remove.
    * python/libstdcxx/v6/printers.py 
(StdVectorPrinter._iterator): Stop

    using start _M_offset.
    (StdVectorPrinter.to_string): Likewise.
    * testsuite/23_containers/vector/bool/allocator/swap.cc: Adapt.
    * 
testsuite/23_containers/vector/bool/cons/noexcept_move_construct.cc:

    Add check.

François


On 6/24/19 9:31 PM, François Dumont wrote:

Hi

    Any feedback regarding this patch ?

Thanks

On 5/14/19 7:46 AM, François Dumont wrote:

Hi

    This is the patch on vector to:

- Optimize sizeof in Versioned namespace mode. We could go one step 
further by removing _M_p from _M_finish and just transform it into 
an offset but it is a little bit more impacting for the code.


- Implement the swap optimization already done on main std::vector 
template class.


- Fix move constructor so that it is noexcept no matter allocator 
move constructor noexcept qualification


- Optimize move constructor with allocator when allocator type is 
always equal.


- Use shortcuts in C++11 by skipping the _M_XXX_dispatch methods. 
Those are now defined only in pre-C++11 mode, I can't see any abi 
issue in doing so.


    * include/bits/stl_bvector.h
   [_GLIBCXX_INLINE_VERSION](_Bvector_impl_data::_M_start): 
Define as

    _Bit_type*.
    (_Bvector_impl_data(const _Bvector_impl_data&)): Default.
    (_Bvector_impl_data(_Bvector_impl_data&&)): Delegate to latter.
    (_Bvector_impl_data::operator=(const _Bvector_impl_data&)): 
Default.

(_Bvector_impl_data::_M_move_data(_Bvector_impl_data&&)): Use latter.
    (_Bvector_impl_data::_M_reset()): Likewise.
    (_Bvector_impl_data::_M_swap_data): New.
   (_Bvector_impl::_Bvector_impl(_Bvector_impl&&)): Implement 
explicitely.
   (_Bvector_impl::_Bvector_impl(_Bit_alloc_type&&, 
_Bvector_impl&&)): New.
   (_Bvector_base::_Bvector_base(_Bvector_base&&, const 
allocator_type&)):

    New, use latter.
    (vector::vector(vector&&, const allocator_type&, 
true_type)): New, use

    latter.
    (vector::vector(vector&&, const allocator_type&, 
false_type)): New.

    (vector::vector(vector&&, const allocator_type&)): Use latters.
    (vector::vector(const vector&, const allocator_type&)): Adapt.
    [__cplusplus >= 201103](vector::vector(_InputIt, _InputIt,
    const allocator_type&)): Use _M_initialize_range.
    (vector::operator[](size_type)): Use iterator operator[].
    (vector::operator[](size_type) const): Use const_iterator 
operator[].

    (vector::swap(vector&)): Adapt.
    (vector::_M_initialize(size_type)): Add assertions on 
allocators.

    Use _M_swap_data.
    [__cplusplus >= 201103](vector::insert(const_iterator, 
_InputIt,

    _InputIt)): Use _M_insert_range.
    [__cplusplus >= 201103](vector::_M_initialize_dispatch): 
Remove.

    [__cplusplus >= 201103](vector::_M_insert_dispatch): Remove.
    * tes

[committed] libstdc++: ParallelSTL is now part of oneAPI DPC++ Library

2020-07-31 Thread Gerald Pfeifer
Pushed.

(Something was off with the ChangeLog detection I'm afraid.  I first got
an error message and what ended up in the commit didn't look completely
consistent.)

Gerald


2020-07-31  Gerald Pfeifer  

* doc/xml/manual/status_cxx2017.xml: ParallelSTL is now part
of oneAPI DPC++ Library on Github.
* doc/html/manual/status.html: Regenerate.
---
 libstdc++-v3/doc/html/manual/status.html   | 2 +-
 libstdc++-v3/doc/xml/manual/status_cxx2017.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/doc/html/manual/status.html 
b/libstdc++-v3/doc/html/manual/status.html
index a16fddd6136..59c0bd8892d 100644
--- a/libstdc++-v3/doc/html/manual/status.html
+++ b/libstdc++-v3/doc/html/manual/status.html
@@ -906,7 +906,7 @@ since C++14 and the implementation is complete.
28
   
Algorithms
-  28.1General28.2Header  synopsis28.3Algorithms requirements28.4Parallel algorithms??Using https://github.com/intel/parallelstl"; 
target="_top">PSTL28.5Non-modifying sequence operationsY??28.6Mutating sequence operationsY??28.7Sorting 
and related operationsY??28.8align="left">C library algorithmsYalign="left">??
+  28.1General28.2Header  synopsis28.3Algorithms requirements28.4Parallel algorithms??Using https://github.com/oneapi-src/oneDPL"; 
target="_top">oneAPI DPC++ Library28.5Non-modifying sequence 
operationsY??28.6Mutating sequence operationsY??28.7Sorting and related operations
 Y??28.8C library algorithmsY??
   29
   
Numerics
diff --git a/libstdc++-v3/doc/xml/manual/status_cxx2017.xml 
b/libstdc++-v3/doc/xml/manual/status_cxx2017.xml
index 0f03126db1c..0929ee948e0 100644
--- a/libstdc++-v3/doc/xml/manual/status_cxx2017.xml
+++ b/libstdc++-v3/doc/xml/manual/status_cxx2017.xml
@@ -1882,7 +1882,7 @@ since C++14 and the implementation is complete.
   28.4
   Parallel algorithms
   
-  Using http://www.w3.org/1999/xlink"; 
xlink:href="https://github.com/intel/parallelstl";>PSTL
+  Using http://www.w3.org/1999/xlink"; 
xlink:href="https://github.com/oneapi-src/oneDPL";>oneAPI DPC++ 
Library
 
 
   28.5
-- 
2.27.0


Re: [committed] libstdc++: ParallelSTL is now part of oneAPI DPC++ Library

2020-07-31 Thread Jonathan Wakely via Gcc-patches

On 31/07/20 23:23 +0200, Gerald Pfeifer wrote:

Pushed.

(Something was off with the ChangeLog detection I'm afraid.  I first got
an error message and what ended up in the commit didn't look completely
consistent.)


I noticed the other day that this link was redirecting, but I don't
think we want this change.

We use the PSTL as it was when we imported (i.e. forked) it. We don't
use whatever it's now part of.

A better link would be:
https://github.com/llvm/llvm-project/tree/master/pstl
which is the upstream for our code (even though we're shipping it and
LLVM isn't).



Re: std::vector code cleanup fixes optimizations

2020-07-31 Thread Jonathan Wakely via Gcc-patches

On 31/07/20 23:03 +0200, François Dumont via Libstdc++ wrote:

On 17/07/20 12:36 pm, Jonathan Wakely wrote:

On 16/12/19 08:18 +0100, François Dumont wrote:
diff --git 
a/libstdc++-v3/testsuite/23_containers/vector/bool/allocator/swap.cc 
b/libstdc++-v3/testsuite/23_containers/vector/bool/allocator/swap.cc

index de441426532..745fdc85cf6 100644
--- a/libstdc++-v3/testsuite/23_containers/vector/bool/allocator/swap.cc
+++ b/libstdc++-v3/testsuite/23_containers/vector/bool/allocator/swap.cc
@@ -28,19 +28,17 @@ namespace __gnu_test
  // It is undefined behaviour to swap() containers with unequal 
allocators
  // if the allocator doesn't propagate, so ensure the allocators 
compare
  // equal, while still being able to test propagation via 
get_personality().

+  template
    bool
-  operator==(const propagating_allocator&,
-         const propagating_allocator&)
-  {
-    return true;
-  }
+    operator==(const propagating_allocator&,
+           const propagating_allocator&)
+    { return true; }

+  template
    bool
-  operator!=(const propagating_allocator&,
-         const propagating_allocator&)
-  {
-    return false;
-  }
+    operator!=(const propagating_allocator&,
+           const propagating_allocator&)
+    { return false; }


Why does this test need to be adapted?

Because I replicate and adapt the static assertion from the main 
vector implementation which is:


#if __cplusplus >= 201103L
__glibcxx_assert(_Bit_alloc_traits::propagate_on_container_swap::value
             || _M_get_Bit_allocator() == __x._M_get_Bit_allocator());
#endif

in the case of vector the compared allocators are not 
propagating_allocator but propagating_allocatorlong, false>.


Ah yes, of course. Thanks.



Building with __glibcxx_assert was showing this.

François





Re: [committed] libstdc++: Constrain reverse_iterator and move_iterator conversions [LWG 3435]

2020-07-31 Thread Martin Jambor
Hi Jonathan,

On Wed, Jul 22 2020, Jonathan Wakely via Gcc-patches wrote:
> libstdc++-v3/ChangeLog:
>
>   * include/bits/stl_iterator.h (reverse_iterator): Constrain
>   converting constructor and converting assignment operator.
>   Access source iterator's data member directly instead of
>   calling base().
>   (move_iterator): Likewise.
>   * testsuite/24_iterators/move_iterator/dr3435.cc: New test.
>   * testsuite/24_iterators/reverse_iterator/dr3435.cc: New test.
>
> Tested powerpc64le-linux, committed to trunk.
>

with this patch, I fail to build 541.leela_r from SPEC INT 2017 even
when I specifically pass -std=c++98 to all compiler invocations.  Is
that expected?

A simple reproducer is:

-- aaa.cpp --
#include 

typedef unsigned long long int uint64;
std::vector ko_hash_history;
uint64 ko_hash;

bool bar(std::vector::const_reverse_iterator fisrst,
 std::vector::const_reverse_iterator last,
 std::vector::const_reverse_iterator res);

bool foo(void) {
std::vector::const_reverse_iterator first = 
ko_hash_history.rbegin();
std::vector::const_reverse_iterator last = ko_hash_history.rend();  
std::vector::const_reverse_iterator res;
  
return bar(first, last, res);
}
--

$ /home/mjambor/gcc/mine/inst/bin/g++ -std=c++98 -m64 -c -O2 -g aaa.cpp

In file included from 
/home/mjambor/gcc/mine/inst/include/c++/11.0.0/bits/stl_algobase.h:67,
 from /home/mjambor/gcc/mine/inst/include/c++/11.0.0/vector:60,
 from aaa.cpp:1:
/home/mjambor/gcc/mine/inst/include/c++/11.0.0/bits/stl_iterator.h: In 
instantiation of ‘std::reverse_iterator<_Iterator>::reverse_iterator(const 
std::reverse_iterator<_Iter>&) [with _Iter = __gnu_cxx::__normal_iterator >; _Iterator = 
__gnu_cxx::__normal_iterator >]’:
aaa.cpp:12:80:   required from here
/home/mjambor/gcc/mine/inst/include/c++/11.0.0/bits/stl_iterator.h:203:23: 
error: ‘__gnu_cxx::__normal_iterator > std::reverse_iterator<__gnu_cxx::__normal_iterator > >::current’ is 
protected within this context
  203 | : current(__x.current) { }
  |   ^~~
/home/mjambor/gcc/mine/inst/include/c++/11.0.0/bits/stl_iterator.h:146:17: 
note: declared protected here
  146 |   _Iterator current;
  | ^~~

...which is exactly the error I get from leela.  The error goes away
when I use -std=c++11 instead (I promise I'll switch to building my
benchmarks with that or c++14 over the remaining of this year :-).

Is there a problem in the test (and the benchmark)?  Should I file a PR?

Thanks,

Martin


Re: [PATCH] RISC-V/libgcc: Reduce the size of RV64 millicode by 6 bytes

2020-07-31 Thread Maciej W. Rozycki via Gcc-patches
On Thu, 30 Jul 2020, Andrew Waterman wrote:

> IIRC, I didn't use this approach originally because I wanted to avoid
> the additional dynamic instruction.  But I agree that code size is the
> more important metric for users of this feature.  LGTM.

 Applied now, thanks for your review.

  Maciej


[PATCH] improve memcmp and memchr constant folding (PR 78257)

2020-07-31 Thread Martin Sebor via Gcc-patches

The folders for these functions (and some others) call c_getsr
which relies on string_constant to return the representation of
constant strings.  Because the function doesn't handle constants
of other types, including aggregates, memcmp or memchr calls
involving those are not folded when they could be.

The attached patch extends the algorithm used by string_constant
to also handle constant aggregates involving elements or members
of the same types as native_encode_expr.  (The change restores
the empty initializer optimization inadvertently disabled in
the fix for pr96058.)

To avoid accidentally misusing either string_constant or c_getstr
with non-strings I have introduced a pair of new functions to get
the representation of those: byte_representation and getbyterep.

Tested on x86_64-linux.

Martin
PR tree-optimization/78257 - missing memcmp optimization with constant arrays

gcc/ChangeLog:

	PR middle-end/78257
	* builtins.c (expand_builtin_memory_copy_args): Rename called function.
	(expand_builtin_stpcpy_1): Remove argument from call.
	(expand_builtin_memcmp): Rename called function.
	(inline_expand_builtin_bytecmp): Same.
	* expr.c (convert_to_bytes): New function.
	(constant_byte_string): New function (formerly string_constant).
	(string_constant): Call constant_byte_string.
	(byte_representation): New function.
	* expr.h (byte_representation): Declare.
	* fold-const-call.c (fold_const_call): Rename called function.
	* fold-const.c (c_getstr): Remove an argument.
	(getbyterep): Define a new function.
	* fold-const.h (c_getstr): Remove an argument.
	(getbyterep): Declare a new function.
	* gimple-fold.c (gimple_fold_builtin_memory_op): Rename callee.
	(gimple_fold_builtin_string_compare): Same.
	(gimple_fold_builtin_memchr): Same.

gcc/testsuite/ChangeLog:

	PR middle-end/78257
	* gcc.dg/memchr.c: New test.
	* gcc.dg/memcmp-2.c: New test.
	* gcc.dg/memcmp-3.c: New test.
	* gcc.dg/memcmp-4.c: New test.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 228db78f32b..b872119c1cb 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -4447,7 +4447,7 @@ expand_builtin_memory_copy_args (tree dest, tree src, tree len,
   /* Try to get the byte representation of the constant SRC points to,
  with its byte size in NBYTES.  */
   unsigned HOST_WIDE_INT nbytes;
-  const char *rep = c_getstr (src, &nbytes);
+  const char *rep = getbyterep (src, &nbytes);
 
   /* If the function's constant bound LEN_RTX is less than or equal
  to the byte size of the representation of the constant argument,
@@ -4455,7 +4455,7 @@ expand_builtin_memory_copy_args (tree dest, tree src, tree len,
  the bytes from memory and only store the computed constant.
  This works in the overlap (memmove) case as well because
  store_by_pieces just generates a series of stores of constants
- from the representation returned by c_getstr().  */
+ from the representation returned by getbyterep().  */
   if (rep
   && CONST_INT_P (len_rtx)
   && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= nbytes
@@ -4704,7 +4704,7 @@ expand_builtin_stpcpy_1 (tree exp, rtx target, machine_mode mode)
 	 because the latter will potentially produce pessimized code
 	 when used to produce the return value.  */
   c_strlen_data lendata = { };
-  if (!c_getstr (src, NULL)
+  if (!c_getstr (src)
 	  || !(len = c_strlen (src, 0, &lendata, 1)))
 	return expand_movstr (dst, src, target,
 			  /*retmode=*/ RETURN_END_MINUS_ONE);
@@ -5357,11 +5357,11 @@ expand_builtin_memcmp (tree exp, rtx target, bool result_eq)
  when the function's result is used for equality to zero, ARG1)
  points to, with its byte size in NBYTES.  */
   unsigned HOST_WIDE_INT nbytes;
-  const char *rep = c_getstr (arg2, &nbytes);
+  const char *rep = getbyterep (arg2, &nbytes);
   if (result_eq && rep == NULL)
 {
   /* For equality to zero the arguments are interchangeable.  */
-  rep = c_getstr (arg1, &nbytes);
+  rep = getbyterep (arg1, &nbytes);
   if (rep != NULL)
 	std::swap (arg1_rtx, arg2_rtx);
 }
@@ -7811,8 +7811,8 @@ inline_expand_builtin_bytecmp (tree exp, rtx target)
   /* Get the object representation of the initializers of ARG1 and ARG2
  as strings, provided they refer to constant objects, with their byte
  sizes in LEN1 and LEN2, respectively.  */
-  const char *bytes1 = c_getstr (arg1, &len1);
-  const char *bytes2 = c_getstr (arg2, &len2);
+  const char *bytes1 = getbyterep (arg1, &len1);
+  const char *bytes2 = getbyterep (arg2, &len2);
 
   /* Fail if neither argument refers to an initialized constant.  */
   if (!bytes1 && !bytes2)
diff --git a/gcc/expr.c b/gcc/expr.c
index a150fa0d3b5..a124df54655 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -11594,15 +11594,103 @@ is_aligning_offset (const_tree offset, const_tree exp)
   /* This must now be the address of EXP.  */
   return TREE_CODE (offset) == ADDR_EXPR && TREE_OPERAND (offset, 0) == exp;
 }
-
-/* Return the tree node if an ARG corresponds to

gcc.dg/loop-8.c: Skip for mmix.

2020-07-31 Thread Hans-Peter Nilsson
Committed.

This test fails for mmix for (almost) the same reason it would fail
for e.g. mipsel-elf: the end-condition of the loop tests against a
register set to a constant, and that register is (one of) the
"unexpected IV" moved out of the loop "without introducing a new
temporary register" and making the dump contain more than one
"Decided", causing a non-matching loop2 dump.  The test should
probably have been restricted to just the original target for which a
problem was observed to be fixed.

gcc/testsuite:
* gcc.dg/loop-8.c: Skip for mmix.

--- gcc/gcc/testsuite/gcc.dg/loop-8.c.orig  Mon Jan 13 22:30:47 2020
+++ gcc/gcc/testsuite/gcc.dg/loop-8.c   Sat Aug  1 03:07:52 2020
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */
-/* { dg-skip-if "unexpected IV" { "hppa*-*-* mips*-*-* visium-*-* powerpc*-*-* 
riscv*-*-*" } } */
+/* { dg-skip-if "unexpected IV" { "hppa*-*-* mips*-*-* visium-*-* powerpc*-*-* 
riscv*-*-* mmix-*-*" } } */
 /* Load immediate on condition is available from z13 on and prevents moving
the load out of the loop, so always run this test with -march=zEC12 that
does not have load immediate on condition.  */