add rv64im{,c,fc} multilibs

2021-02-23 Thread Alexandre Oliva


We've had customer demand for these multilibs.  We'd be happy to
maintain this change locally, but I thought I'd contribute the patch,
just in case there's wider interest in them.  WDYT?


for  gcc/ChangeLog

* config/riscv/t-elf-multilib: Add multilibs for rv64im,
rv64imc, and rv64imfc/lp64f.
---
 gcc/config/riscv/t-elf-multilib |   11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/t-elf-multilib b/gcc/config/riscv/t-elf-multilib
index 19f9434616c2d..b268e26c954c8 100644
--- a/gcc/config/riscv/t-elf-multilib
+++ b/gcc/config/riscv/t-elf-multilib
@@ -1,6 +1,6 @@
 # This file was generated by multilib-generator with the command:
-#  ./multilib-generator rv32i-ilp32--c rv32im-ilp32--c rv32iac-ilp32-- 
rv32imac-ilp32-- rv32imafc-ilp32f-rv32imafdc- rv64imac-lp64-- rv64imafdc-lp64d--
-MULTILIB_OPTIONS = 
march=rv32i/march=rv32ic/march=rv32im/march=rv32imc/march=rv32iac/march=rv32imac/march=rv32imafc/march=rv32imafdc/march=rv32gc/march=rv64imac/march=rv64imafdc/march=rv64gc
 mabi=ilp32/mabi=ilp32f/mabi=lp64/mabi=lp64d
+#  ./multilib-generator rv32i-ilp32--c rv32im-ilp32--c rv32iac-ilp32-- 
rv32imac-ilp32-- rv32imafc-ilp32f-rv32imafdc- rv64im-lp64-- rv64imc-lp64-- 
rv64imfc-lp64f-- rv64imac-lp64-- rv64imafdc-lp64d--
+MULTILIB_OPTIONS = 
march=rv32i/march=rv32ic/march=rv32im/march=rv32imc/march=rv32iac/march=rv32imac/march=rv32imafc/march=rv32imafdc/march=rv32gc/march=rv64im/march=rv64imc/march=rv64imfc/march=rv64imac/march=rv64imafdc/march=rv64gc
 mabi=ilp32/mabi=ilp32f/mabi=lp64/mabi=lp64f/mabi=lp64d
 MULTILIB_DIRNAMES = rv32i \
 rv32ic \
 rv32im \
@@ -10,17 +10,24 @@ rv32imac \
 rv32imafc \
 rv32imafdc \
 rv32gc \
+rv64im \
+rv64imc \
+rv64imfc \
 rv64imac \
 rv64imafdc \
 rv64gc ilp32 \
 ilp32f \
 lp64 \
+lp64f \
 lp64d
 MULTILIB_REQUIRED = march=rv32i/mabi=ilp32 \
 march=rv32im/mabi=ilp32 \
 march=rv32iac/mabi=ilp32 \
 march=rv32imac/mabi=ilp32 \
 march=rv32imafc/mabi=ilp32f \
+march=rv64im/mabi=lp64 \
+march=rv64imc/mabi=lp64 \
+march=rv64imfc/mabi=lp64f \
 march=rv64imac/mabi=lp64 \
 march=rv64imafdc/mabi=lp64d
 MULTILIB_REUSE = march.rv32i/mabi.ilp32=march.rv32ic/mabi.ilp32 \


-- 
Alexandre Oliva, happy hacker  https://FSFLA.org/blogs/lxo/
   Free Software Activist GNU Toolchain Engineer
Vim, Vi, Voltei pro Emacs -- GNUlius Caesar


[PATCH] config.sub, config.guess : Import upstream 2021-01-25.

2021-02-23 Thread Kito Cheng
Hi

Does it update config.sub and config.guess, I know it's already
stage 4, but the config.* stuff update should be harmless things,
and we need this for RISC-V big-endian support, which is already
supported in binutils 2.36.

This imports from:

sha1 6faca61810d335c7837f320733fe8e15a1431fc2

ChangeLog:

* config.guess: Import latest upstream.
* config.sub: Import latest upstream.
---
 config.guess | 48 +++-
 config.sub   | 23 +++
 2 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/config.guess b/config.guess
index 0fc11edb2d1..1972fda8eb0 100755
--- a/config.guess
+++ b/config.guess
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright 1992-2020 Free Software Foundation, Inc.
+#   Copyright 1992-2021 Free Software Foundation, Inc.
 
-timestamp='2020-11-07'
+timestamp='2021-01-25'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -27,7 +27,7 @@ timestamp='2020-11-07'
 # Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
 #
 # You can get the latest version of this script from:
-# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
 #
 # Please send patches to .
 
@@ -50,7 +50,7 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright 1992-2020 Free Software Foundation, Inc.
+Copyright 1992-2021 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -138,9 +138,7 @@ UNAME_VERSION=$( (uname -v) 2>/dev/null) || 
UNAME_VERSION=unknown
 
 case "$UNAME_SYSTEM" in
 Linux|GNU|GNU/*)
-   # If the system lacks a compiler, then just pick glibc.
-   # We could probably try harder.
-   LIBC=gnu
+   LIBC=unknown
 
set_cc_for_build
cat <<-EOF > "$dummy.c"
@@ -149,16 +147,30 @@ Linux|GNU|GNU/*)
LIBC=uclibc
#elif defined(__dietlibc__)
LIBC=dietlibc
+   #elif defined(__GLIBC__)
+   LIBC=gnu
#else
#include 
+   /* First heuristic to detect musl libc.  */
#ifdef __DEFINED_va_list
LIBC=musl
-   #else
-   LIBC=gnu
#endif
#endif
EOF
eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 
's, ,,g')"
+
+   # Second heuristic to detect musl libc.
+   if [ "$LIBC" = unknown ] &&
+  command -v ldd >/dev/null &&
+  ldd --version 2>&1 | grep -q ^musl; then
+   LIBC=musl
+   fi
+
+   # If the system lacks a compiler, then just pick glibc.
+   # We could probably try harder.
+   if [ "$LIBC" = unknown ]; then
+   LIBC=gnu
+   fi
;;
 esac
 
@@ -176,10 +188,9 @@ case 
"$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
#
# Note: NetBSD doesn't particularly care about the vendor
# portion of the name.  We always set it to "unknown".
-   sysctl="sysctl -n hw.machine_arch"
UNAME_MACHINE_ARCH=$( (uname -p 2>/dev/null || \
-   "/sbin/$sysctl" 2>/dev/null || \
-   "/usr/sbin/$sysctl" 2>/dev/null || \
+   /sbin/sysctl -n hw.machine_arch 2>/dev/null || \
+   /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \
echo unknown))
case "$UNAME_MACHINE_ARCH" in
aarch64eb) machine=aarch64_be-unknown ;;
@@ -984,6 +995,9 @@ EOF
 k1om:Linux:*:*)
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
+loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
+   echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+   exit ;;
 m32r*:Linux:*:*)
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
@@ -1072,7 +1086,7 @@ EOF
 ppcle:Linux:*:*)
echo powerpcle-unknown-linux-"$LIBC"
exit ;;
-riscv32:Linux:*:* | riscv64:Linux:*:*)
+riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | 
riscv64be:Linux:*:*)
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
 s390:Linux:*:* | s390x:Linux:*:*)
@@ -1468,8 +1482,8 @@ EOF
 i*86:rdos:*:*)
echo "$UNAME_MACHINE"-pc-rdos
exit ;;
-i*86:AROS:*:*)
-   echo "$UNAME_MACHINE"-pc-aros
+*:AROS:*:*)
+   echo "$UNAME_MACHINE"-unknown-aros
exit ;;
 x86_64:VMkernel:*:*)
echo "$UNAME_MACHINE"-unknown-esx
@@ -1638,9 +1652,9 @@ This script (version $timestamp), has failed to recognize 
the
 operating system you are using. If your script is old, overwrite *all*
 copies of config.guess and config.sub with the latest versions from:
 
-  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+  https://git.savannah

Re: [PATCH] config.sub, config.guess : Import upstream 2021-01-25.

2021-02-23 Thread Richard Biener
On Tue, 23 Feb 2021, Kito Cheng wrote:

> Hi
> 
> Does it update config.sub and config.guess, I know it's already
> stage 4, but the config.* stuff update should be harmless things,
> and we need this for RISC-V big-endian support, which is already
> supported in binutils 2.36.
> 
> This imports from:
> 
> sha1 6faca61810d335c7837f320733fe8e15a1431fc2

OK.

> ChangeLog:
> 
> * config.guess: Import latest upstream.
> * config.sub: Import latest upstream.
> ---
>  config.guess | 48 +++-
>  config.sub   | 23 +++
>  2 files changed, 46 insertions(+), 25 deletions(-)
> 
> diff --git a/config.guess b/config.guess
> index 0fc11edb2d1..1972fda8eb0 100755
> --- a/config.guess
> +++ b/config.guess
> @@ -1,8 +1,8 @@
>  #! /bin/sh
>  # Attempt to guess a canonical system name.
> -#   Copyright 1992-2020 Free Software Foundation, Inc.
> +#   Copyright 1992-2021 Free Software Foundation, Inc.
>  
> -timestamp='2020-11-07'
> +timestamp='2021-01-25'
>  
>  # This file is free software; you can redistribute it and/or modify it
>  # under the terms of the GNU General Public License as published by
> @@ -27,7 +27,7 @@ timestamp='2020-11-07'
>  # Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
>  #
>  # You can get the latest version of this script from:
> -# 
> https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
> +# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
>  #
>  # Please send patches to .
>  
> @@ -50,7 +50,7 @@ version="\
>  GNU config.guess ($timestamp)
>  
>  Originally written by Per Bothner.
> -Copyright 1992-2020 Free Software Foundation, Inc.
> +Copyright 1992-2021 Free Software Foundation, Inc.
>  
>  This is free software; see the source for copying conditions.  There is NO
>  warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
> @@ -138,9 +138,7 @@ UNAME_VERSION=$( (uname -v) 2>/dev/null) || 
> UNAME_VERSION=unknown
>  
>  case "$UNAME_SYSTEM" in
>  Linux|GNU|GNU/*)
> - # If the system lacks a compiler, then just pick glibc.
> - # We could probably try harder.
> - LIBC=gnu
> + LIBC=unknown
>  
>   set_cc_for_build
>   cat <<-EOF > "$dummy.c"
> @@ -149,16 +147,30 @@ Linux|GNU|GNU/*)
>   LIBC=uclibc
>   #elif defined(__dietlibc__)
>   LIBC=dietlibc
> + #elif defined(__GLIBC__)
> + LIBC=gnu
>   #else
>   #include 
> + /* First heuristic to detect musl libc.  */
>   #ifdef __DEFINED_va_list
>   LIBC=musl
> - #else
> - LIBC=gnu
>   #endif
>   #endif
>   EOF
>   eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 
> 's, ,,g')"
> +
> + # Second heuristic to detect musl libc.
> + if [ "$LIBC" = unknown ] &&
> +command -v ldd >/dev/null &&
> +ldd --version 2>&1 | grep -q ^musl; then
> + LIBC=musl
> + fi
> +
> + # If the system lacks a compiler, then just pick glibc.
> + # We could probably try harder.
> + if [ "$LIBC" = unknown ]; then
> + LIBC=gnu
> + fi
>   ;;
>  esac
>  
> @@ -176,10 +188,9 @@ case 
> "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
>   #
>   # Note: NetBSD doesn't particularly care about the vendor
>   # portion of the name.  We always set it to "unknown".
> - sysctl="sysctl -n hw.machine_arch"
>   UNAME_MACHINE_ARCH=$( (uname -p 2>/dev/null || \
> - "/sbin/$sysctl" 2>/dev/null || \
> - "/usr/sbin/$sysctl" 2>/dev/null || \
> + /sbin/sysctl -n hw.machine_arch 2>/dev/null || \
> + /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \
>   echo unknown))
>   case "$UNAME_MACHINE_ARCH" in
>   aarch64eb) machine=aarch64_be-unknown ;;
> @@ -984,6 +995,9 @@ EOF
>  k1om:Linux:*:*)
>   echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
>   exit ;;
> +loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
> + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
> + exit ;;
>  m32r*:Linux:*:*)
>   echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
>   exit ;;
> @@ -1072,7 +1086,7 @@ EOF
>  ppcle:Linux:*:*)
>   echo powerpcle-unknown-linux-"$LIBC"
>   exit ;;
> -riscv32:Linux:*:* | riscv64:Linux:*:*)
> +riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | 
> riscv64be:Linux:*:*)
>   echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
>   exit ;;
>  s390:Linux:*:* | s390x:Linux:*:*)
> @@ -1468,8 +1482,8 @@ EOF
>  i*86:rdos:*:*)
>   echo "$UNAME_MACHINE"-pc-rdos
>   exit ;;
> -i*86:AROS:*:*)
> - echo "$UNAME_MACHINE"-pc-aros
> +*:AROS:*:*)
> + echo "$UNAME_MACHINE"-unknown-aros
>   exit ;;
>  x86_64:VMkernel:*:*)
>   echo "$UNAME_MACHINE"-unknown-esx
> @@ -1638,9 +1652,9 @@ This script (version $timestamp), has failed to 
> recognize the
>  operating system you are using.

[committed] libstdc++: Fix up constexpr std::char_traits::compare [PR99181]

2021-02-23 Thread Jakub Jelinek via Gcc-patches
Hi!

Because of LWG 467, std::char_traits::lt compares the values
cast to unsigned char rather than char, so even when char is signed
we get unsigned comparision.  std::char_traits::compare uses
__builtin_memcmp and that works the same, but during constexpr evaluation
we were calling __gnu_cxx::char_traits::compare.  As
char_traits::lt is not virtual, __gnu_cxx::char_traits::compare
used __gnu_cxx::char_traits::lt rather than
std::char_traits::lt and thus compared chars as signed if char is
signed.
This change fixes it by inlining __gnu_cxx::char_traits::compare
into std::char_traits::compare by hand, so that it calls the right
lt method.

Bootstrapped/regtested on x86_64-linux and i686-linux, acked in the PR by
Jonathan, committed to trunk.

2021-02-23  Jakub Jelinek  

PR libstdc++/99181
* include/bits/char_traits.h (char_traits::compare): For
constexpr evaluation don't call
__gnu_cxx::char_traits::compare but do the comparison loop
directly.

* testsuite/21_strings/char_traits/requirements/char/99181.cc: New
test.

--- libstdc++-v3/include/bits/char_traits.h.jj  2021-01-04 10:26:03.558953845 
+0100
+++ libstdc++-v3/include/bits/char_traits.h 2021-02-22 15:20:50.822160989 
+0100
@@ -349,7 +349,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__builtin_constant_p(__n)
&& __constant_char_array_p(__s1, __n)
&& __constant_char_array_p(__s2, __n))
- return __gnu_cxx::char_traits::compare(__s1, __s2, __n);
+ {
+   for (size_t __i = 0; __i < __n; ++__i)
+ if (lt(__s1[__i], __s2[__i]))
+   return -1;
+ else if (lt(__s2[__i], __s1[__i]))
+   return 1;
+   return 0;
+ }
 #endif
return __builtin_memcmp(__s1, __s2, __n);
   }
--- libstdc++-v3/testsuite/21_strings/char_traits/requirements/char/99181.cc.jj 
2021-02-22 14:59:56.414144985 +0100
+++ libstdc++-v3/testsuite/21_strings/char_traits/requirements/char/99181.cc
2021-02-22 15:19:41.398938812 +0100
@@ -0,0 +1,40 @@
+// { dg-options "-std=gnu++17" }
+// { dg-do run { target c++17 } }
+
+// Copyright (C) 2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+#include 
+#include 
+
+void test01()
+{
+  const char *a = "\x7f";
+  const char *b = "\x80";
+  int c = std::char_traits::compare(a, b, 2);
+  constexpr int d = std::char_traits::compare("\x7f", "\x80", 2);
+
+  VERIFY( c && (c < 0) == (static_cast(a[0])
+  < static_cast(b[0])) );
+  VERIFY( d && (c < 0) == (d < 0) );
+}
+
+int main()
+{
+  test01();
+  return 0;
+}


Jakub



[PATCH] fold-const: Fix ICE in fold_read_from_constant_string on invalid code [PR99204]

2021-02-23 Thread Jakub Jelinek via Gcc-patches
Hi!

fold_read_from_constant_string and expand_expr_real_1 have code to optimize
constant reads from string (tree vs. rtl).
If the STRING_CST array type has zero low bound, index is fold converted to
sizetype and so the compare_tree_int works fine, but if it has some other
low bound, it calls size_diffop_loc and that function from 2 sizetype
operands creates a ssizetype difference.  expand_expr_real_1 then uses
tree_fits_uhwi_p + compare_tree_int and so works fine, but fold-const.c
only checked if index is INTEGER_CST and calls compare_tree_int, which means
for negative index it will succeed and result in UB in the compiler.

This patch just follows what expand_expr_real_1 is doing.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-02-22  Jakub Jelinek  

PR tree-optimization/99204
* fold-const.c (fold_read_from_constant_string): Check that
tree_fits_uhwi_p (index) rather than just that index is INTEGER_CST.

* gfortran.dg/pr99204.f90: New test.

--- gcc/fold-const.c.jj 2021-01-04 10:25:39.031231582 +0100
+++ gcc/fold-const.c2021-02-22 19:46:57.167342075 +0100
@@ -15433,7 +15433,7 @@ fold_read_from_constant_string (tree exp
   if (string
  && TYPE_MODE (TREE_TYPE (exp)) == TYPE_MODE (TREE_TYPE (TREE_TYPE 
(string)))
  && TREE_CODE (string) == STRING_CST
- && TREE_CODE (index) == INTEGER_CST
+ && tree_fits_uhwi_p (index)
  && compare_tree_int (index, TREE_STRING_LENGTH (string)) < 0
  && is_int_mode (TYPE_MODE (TREE_TYPE (TREE_TYPE (string))),
  &char_mode)
--- gcc/testsuite/gfortran.dg/pr99204.f90.jj2021-02-22 19:52:55.749431186 
+0100
+++ gcc/testsuite/gfortran.dg/pr99204.f90   2021-02-22 19:51:07.410612678 
+0100
@@ -0,0 +1,10 @@
+! PR tree-optimization/99204
+! { dg-do compile }
+! { dg-options "-O2 -w" }
+
+program pr99204
+  character :: c
+  integer :: i = -12345678
+  c = 'abc'(i:i)
+  print *, c
+end

Jakub



Re: [PATCH,rs6000] [v2] Optimize pcrel access of globals

2021-02-23 Thread Richard Biener via Gcc-patches
On Tue, Feb 23, 2021 at 4:48 AM acsawdey--- via Gcc-patches
 wrote:
>
> From: Aaron Sawdey 
>
> This patch implements a RTL pass that looks for pc-relative loads of the
> address of an external variable using the PCREL_GOT relocation and a
> single load or store that uses that external address.
>
> Produced by a cast of thousands:
>  * Michael Meissner
>  * Peter Bergner
>  * Bill Schmidt
>  * Alan Modra
>  * Segher Boessenkool
>  * Aaron Sawdey
>
> This incorporates the changes requested in Segher's review. A few things I
> did not change were the insn-at-a-time scan that could be done with DF, and
> I did not change to using statistics.[ch] for the counters struct. I did try
> to improve the naming, and rewrote a number of comments to make them 
> consistent
> with the code, and generally tried to make things more readable.
>
> OK for trunk if bootstrap/regtest passes?

stage1 please?

> Thanks!
>Aaron
>
> gcc/ChangeLog:
>
> * config.gcc: Add pcrel-opt.o.
> * config/rs6000/pcrel-opt.c: New file.
> * config/rs6000/pcrel-opt.md: New file.
> * config/rs6000/predicates.md: Add d_form_memory predicate.
> * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_PCREL_OPT.
> * config/rs6000/rs6000-passes.def: Add pass_pcrel_opt.
> * config/rs6000/rs6000-protos.h: Add reg_to_non_prefixed(),
> pcrel_opt_valid_mem_p(), output_pcrel_opt_reloc(),
> and make_pass_pcrel_opt().
> * config/rs6000/rs6000.c (reg_to_non_prefixed): Make global.
> (rs6000_option_override_internal): Add pcrel-opt.
> (rs6000_delegitimize_address): Support pcrel-opt.
> (rs6000_opt_masks): Add pcrel-opt.
> (pcrel_opt_valid_mem_p): New function.
> (reg_to_non_prefixed): Make global.
> (rs6000_asm_output_opcode): Reset next_insn_prefixed_p.
> (output_pcrel_opt_reloc): New function.
> * config/rs6000/rs6000.md (loads_extern_addr): New attr.
> (pcrel_extern_addr): Set loads_extern_addr.
> Add include for pcrel-opt.md.
> * config/rs6000/rs6000.opt: Add -mpcrel-opt.
> * config/rs6000/t-rs6000: Add rules for pcrel-opt.c and
> pcrel-opt.md.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/pcrel-opt-inc-di.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-df.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-di.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-hi.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-qi.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-sf.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-si.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-vector.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-df.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-di.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-hi.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-qi.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-sf.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-si.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-vector.c: New test.
> ---
>  gcc/config.gcc|   8 +-
>  gcc/config/rs6000/pcrel-opt.md| 399 
>  gcc/config/rs6000/predicates.md   |  21 +
>  gcc/config/rs6000/rs6000-cpus.def |   2 +
>  gcc/config/rs6000/rs6000-passes.def   |   8 +
>  gcc/config/rs6000/rs6000-pcrel-opt.c  | 924 ++
>  gcc/config/rs6000/rs6000-protos.h |   4 +
>  gcc/config/rs6000/rs6000.c| 111 ++-
>  gcc/config/rs6000/rs6000.md   |   8 +-
>  gcc/config/rs6000/rs6000.opt  |   4 +
>  gcc/config/rs6000/t-rs6000|   7 +-
>  .../gcc.target/powerpc/pcrel-opt-inc-di.c |  17 +
>  .../gcc.target/powerpc/pcrel-opt-ld-df.c  |  36 +
>  .../gcc.target/powerpc/pcrel-opt-ld-di.c  |  42 +
>  .../gcc.target/powerpc/pcrel-opt-ld-hi.c  |  42 +
>  .../gcc.target/powerpc/pcrel-opt-ld-qi.c  |  42 +
>  .../gcc.target/powerpc/pcrel-opt-ld-sf.c  |  42 +
>  .../gcc.target/powerpc/pcrel-opt-ld-si.c  |  41 +
>  .../gcc.target/powerpc/pcrel-opt-ld-vector.c  |  36 +
>  .../gcc.target/powerpc/pcrel-opt-st-df.c  |  36 +
>  .../gcc.target/powerpc/pcrel-opt-st-di.c  |  36 +
>  .../gcc.target/powerpc/pcrel-opt-st-hi.c  |  42 +
>  .../gcc.target/powerpc/pcrel-opt-st-qi.c  |  42 +
>  .../gcc.target/powerpc/pcrel-opt-st-sf.c  |  36 +
>  .../gcc.target/powerpc/pcrel-opt-st-si.c  |  41 +
>  .../gcc.target/powerpc/pcrel-opt-st-vector.c  |  36 +
>  26 files changed, 2054 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/config/rs6000/pcrel-opt.md
>  create mode 100644 gcc/config/rs6000/rs6000-pcrel-opt.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c
>  create mode 100

Re: [PATCH] fold-const: Fix ICE in fold_read_from_constant_string on invalid code [PR99204]

2021-02-23 Thread Richard Biener
On Tue, 23 Feb 2021, Jakub Jelinek wrote:

> Hi!
> 
> fold_read_from_constant_string and expand_expr_real_1 have code to optimize
> constant reads from string (tree vs. rtl).
> If the STRING_CST array type has zero low bound, index is fold converted to
> sizetype and so the compare_tree_int works fine, but if it has some other
> low bound, it calls size_diffop_loc and that function from 2 sizetype
> operands creates a ssizetype difference.  expand_expr_real_1 then uses
> tree_fits_uhwi_p + compare_tree_int and so works fine, but fold-const.c
> only checked if index is INTEGER_CST and calls compare_tree_int, which means
> for negative index it will succeed and result in UB in the compiler.
> 
> This patch just follows what expand_expr_real_1 is doing.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

> 2021-02-22  Jakub Jelinek  
> 
>   PR tree-optimization/99204
>   * fold-const.c (fold_read_from_constant_string): Check that
>   tree_fits_uhwi_p (index) rather than just that index is INTEGER_CST.
> 
>   * gfortran.dg/pr99204.f90: New test.
> 
> --- gcc/fold-const.c.jj   2021-01-04 10:25:39.031231582 +0100
> +++ gcc/fold-const.c  2021-02-22 19:46:57.167342075 +0100
> @@ -15433,7 +15433,7 @@ fold_read_from_constant_string (tree exp
>if (string
> && TYPE_MODE (TREE_TYPE (exp)) == TYPE_MODE (TREE_TYPE (TREE_TYPE 
> (string)))
> && TREE_CODE (string) == STRING_CST
> -   && TREE_CODE (index) == INTEGER_CST
> +   && tree_fits_uhwi_p (index)
> && compare_tree_int (index, TREE_STRING_LENGTH (string)) < 0
> && is_int_mode (TYPE_MODE (TREE_TYPE (TREE_TYPE (string))),
> &char_mode)
> --- gcc/testsuite/gfortran.dg/pr99204.f90.jj  2021-02-22 19:52:55.749431186 
> +0100
> +++ gcc/testsuite/gfortran.dg/pr99204.f90 2021-02-22 19:51:07.410612678 
> +0100
> @@ -0,0 +1,10 @@
> +! PR tree-optimization/99204
> +! { dg-do compile }
> +! { dg-options "-O2 -w" }
> +
> +program pr99204
> +  character :: c
> +  integer :: i = -12345678
> +  c = 'abc'(i:i)
> +  print *, c
> +end
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


Patch ping

2021-02-23 Thread Jakub Jelinek via Gcc-patches
Hi!

I'd like to ping the
https://gcc.gnu.org/pipermail/gcc-patches/2021-February/565350.html
patch, P2 PR99085 ice-on-valid-code fix in fixup_partitions.

Thanks

Jakub



[COMMITED][BACKPORT GCC9] aarch64: Add cpu cost tables for A64FX

2021-02-23 Thread Qian Jianhua
This is a backport of adding cost tables for A64FX.

2021-02-23 Qian Jianhua 

gcc/ChangeLog:

* config/aarch64/aarch64-cost-tables.h (a64fx_extra_costs): New.
* config/aarch64/aarch64.c (a64fx_addrcost_table): New.
(a64fx_regmove_cost, a64fx_vector_cost): New.
(a64fx_tunings): Use the new added cost tables.

(cherry picked from commit 041f720c821d394896c0e58a1c9b756fe9c82322)
---
 gcc/config/aarch64/aarch64-cost-tables.h | 103 +++
 gcc/config/aarch64/aarch64.c |  53 +++-
 2 files changed, 152 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cost-tables.h 
b/gcc/config/aarch64/aarch64-cost-tables.h
index 5c9442e1b89..3613497 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -438,4 +438,107 @@ const struct cpu_cost_table tsv110_extra_costs =
   }
 };
 
+const struct cpu_cost_table a64fx_extra_costs =
+{
+  /* ALU */
+  {
+0, /* arith.  */
+0, /* logical.  */
+0, /* shift.  */
+0, /* shift_reg.  */
+COSTS_N_INSNS (1), /* arith_shift.  */
+COSTS_N_INSNS (1), /* arith_shift_reg.  */
+COSTS_N_INSNS (1), /* log_shift.  */
+COSTS_N_INSNS (1), /* log_shift_reg.  */
+0, /* extend.  */
+COSTS_N_INSNS (1), /* extend_arith.  */
+0, /* bfi.  */
+0, /* bfx.  */
+0, /* clz.  */
+0, /* rev.  */
+0, /* non_exec.  */
+true   /* non_exec_costs_exec.  */
+  },
+  {
+/* MULT SImode */
+{
+  COSTS_N_INSNS (4),   /* simple.  */
+  COSTS_N_INSNS (4),   /* flag_setting.  */
+  COSTS_N_INSNS (4),   /* extend.  */
+  COSTS_N_INSNS (5),   /* add.  */
+  COSTS_N_INSNS (5),   /* extend_add.  */
+  COSTS_N_INSNS (18)   /* idiv.  */
+},
+/* MULT DImode */
+{
+  COSTS_N_INSNS (4),   /* simple.  */
+  0,   /* flag_setting (N/A).  */
+  COSTS_N_INSNS (4),   /* extend.  */
+  COSTS_N_INSNS (5),   /* add.  */
+  COSTS_N_INSNS (5),   /* extend_add.  */
+  COSTS_N_INSNS (26)   /* idiv.  */
+}
+  },
+  /* LD/ST */
+  {
+COSTS_N_INSNS (4), /* load.  */
+COSTS_N_INSNS (4), /* load_sign_extend.  */
+COSTS_N_INSNS (5), /* ldrd.  */
+COSTS_N_INSNS (4), /* ldm_1st.  */
+1, /* ldm_regs_per_insn_1st.  */
+2, /* ldm_regs_per_insn_subsequent.  */
+COSTS_N_INSNS (4), /* loadf.  */
+COSTS_N_INSNS (4), /* loadd.  */
+COSTS_N_INSNS (5), /* load_unaligned.  */
+0, /* store.  */
+0, /* strd.  */
+0, /* stm_1st.  */
+1, /* stm_regs_per_insn_1st.  */
+2, /* stm_regs_per_insn_subsequent.  */
+0, /* storef.  */
+0, /* stored.  */
+0, /* store_unaligned.  */
+COSTS_N_INSNS (1), /* loadv.  */
+COSTS_N_INSNS (1)  /* storev.  */
+  },
+  {
+/* FP SFmode */
+{
+  COSTS_N_INSNS (6),  /* div.  */
+  COSTS_N_INSNS (1),   /* mult.  */
+  COSTS_N_INSNS (1),   /* mult_addsub.  */
+  COSTS_N_INSNS (2),   /* fma.  */
+  COSTS_N_INSNS (1),   /* addsub.  */
+  COSTS_N_INSNS (1),   /* fpconst.  */
+  COSTS_N_INSNS (1),   /* neg.  */
+  COSTS_N_INSNS (1),   /* compare.  */
+  COSTS_N_INSNS (2),   /* widen.  */
+  COSTS_N_INSNS (2),   /* narrow.  */
+  COSTS_N_INSNS (2),   /* toint.  */
+  COSTS_N_INSNS (2),   /* fromint.  */
+  COSTS_N_INSNS (2)/* roundint.  */
+},
+/* FP DFmode */
+{
+  COSTS_N_INSNS (11),  /* div.  */
+  COSTS_N_INSNS (1),   /* mult.  */
+  COSTS_N_INSNS (1),   /* mult_addsub.  */
+  COSTS_N_INSNS (2),   /* fma.  */
+  COSTS_N_INSNS (1),   /* addsub.  */
+  COSTS_N_INSNS (1),   /* fpconst.  */
+  COSTS_N_INSNS (1),   /* neg.  */
+  COSTS_N_INSNS (1),   /* compare.  */
+  COSTS_N_INSNS (2),   /* widen.  */
+  COSTS_N_INSNS (2),   /* narrow.  */
+  COSTS_N_INSNS (2),   /* toint.  */
+  COSTS_N_INSNS (2),   /* fromint.  */
+  COSTS_N_INSNS (2)/* roundint.  */
+}
+  },
+  /* Vector */
+  {
+COSTS_N_INSNS (1)  /* alu.  */
+  }
+};
+
 #endif
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index bce50aea01e..d71daf92d0b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -317,6 +317,22 @@ static const struct cpu_addrcost_table 
qdf24xx_addrcost_table =
   2, /* imm_offset  */
 };
 
+s

Re: [PATCH v3 2/2] ada: add 128bit operation to MIPS N32 and N64

2021-02-23 Thread Arnaud Charlet
> For MIPS N64 and N32:
>   add GNATRTL_128BIT_PAIRS to LIBGNAT_TARGET_PAIRS
>   add GNATRTL_128BIT_OBJS to EXTRA_GNATRTL_NONTASKING_OBJS
> 
> gcc/ada/ChangeLog:
>   PR ada/98996
>   * Makefile.rtl:  add 128Bit operation file to MIPS
>   N64 and N32 (LIBGNAT_TARGET_PAIRS, EXTRA_GNATRTL_NONTASKING_OBJS).

As for the other recent submit, the ChangeLog file is now generated
automatically.

The Makefile.rtl change is OK.

Arno


[PATCH] IPA ICF + ASAN: do not merge vars with different alignment

2021-02-23 Thread Martin Liška

Hello.

The patch is about confusion that brings ICF when it merged 2 variables
with different alignments (when ASAN is used).

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

PR sanitizer/99168
* ipa-icf.c (sem_variable::merge): Do not merge 2 variables
with different alignment. That leads to an invalid red zone
size allocated in runtime.

gcc/testsuite/ChangeLog:

PR sanitizer/99168
* c-c++-common/asan/pr99168.c: New test.
---
 gcc/ipa-icf.c | 13 
 gcc/testsuite/c-c++-common/asan/pr99168.c | 26 +++
 2 files changed, 39 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/asan/pr99168.c

diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
index 687ad8d45b7..5dd33a75c3a 100644
--- a/gcc/ipa-icf.c
+++ b/gcc/ipa-icf.c
@@ -88,6 +88,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-vector-builder.h"
 #include "symtab-thunks.h"
 #include "alias.h"
+#include "asan.h"
 
 using namespace ipa_icf_gimple;
 
@@ -2022,6 +2023,18 @@ sem_variable::merge (sem_item *alias_item)

   return false;
 }
 
+  if (DECL_ALIGN (original->decl) != DECL_ALIGN (alias->decl)

+  && (sanitize_flags_p (SANITIZE_ADDRESS, original->decl)
+ || sanitize_flags_p (SANITIZE_ADDRESS, alias->decl)))
+{
+  if (dump_enabled_p ())
+   dump_printf (MSG_MISSED_OPTIMIZATION,
+"Not unifying; "
+"ASAN requires equal alignments for original and alias\n");
+
+  return false;
+}
+
   if (DECL_ALIGN (original->decl) < DECL_ALIGN (alias->decl))
 {
   if (dump_enabled_p ())
diff --git a/gcc/testsuite/c-c++-common/asan/pr99168.c 
b/gcc/testsuite/c-c++-common/asan/pr99168.c
new file mode 100644
index 000..ed59ffb3d48
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/pr99168.c
@@ -0,0 +1,26 @@
+/* PR sanitizer/99168 */
+/* { dg-do run } */
+
+struct my_struct
+{
+  unsigned long volatile x;
+} __attribute__((aligned(128)));
+
+static int variablek[5][6] = {};
+static struct my_struct variables1 = {0UL};
+static struct my_struct variables2 __attribute__((aligned(32))) = {0UL};
+
+int main() {
+  int i, j;
+  for (i = 0; i < 5; i++) {
+for (j = 0; j < 6; j++) {
+  __builtin_printf("%d ", variablek[i][j]);
+}
+  }
+  __builtin_printf("\n");
+
+  __builtin_printf("%lu\n", variables1.x);
+  __builtin_printf("%lu\n", variables2.x);
+
+  return 0;
+}
--
2.30.1



Re: add rv64im{,c,fc} multilibs

2021-02-23 Thread Kito Cheng via Gcc-patches
Hi Alexandre:

We've added a new configure option to allow you to override that
without changing source code.

For example:
--with-multilib-generator="rv32i-ilp32--c;rv32im-ilp32--c;rv32iac-ilp32--;rv32imac-ilp32--;rv32imafc-ilp32f-rv32imafdc-;rv64im-lp64--;rv64imc-lp64--;rv64imfc-lp64f--;rv64imac-lp64--;rv64imafdc-lp64d--"

Doc:
https://gcc.gnu.org/install/configure.html

GCC Changes:
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c1e6691245ca2f1f329549f323f67afe32bcb97a

On Tue, Feb 23, 2021 at 4:01 PM Alexandre Oliva  wrote:
>
>
> We've had customer demand for these multilibs.  We'd be happy to
> maintain this change locally, but I thought I'd contribute the patch,
> just in case there's wider interest in them.  WDYT?
>
>
> for  gcc/ChangeLog
>
> * config/riscv/t-elf-multilib: Add multilibs for rv64im,
> rv64imc, and rv64imfc/lp64f.
> ---
>  gcc/config/riscv/t-elf-multilib |   11 +--
>  1 file changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/riscv/t-elf-multilib b/gcc/config/riscv/t-elf-multilib
> index 19f9434616c2d..b268e26c954c8 100644
> --- a/gcc/config/riscv/t-elf-multilib
> +++ b/gcc/config/riscv/t-elf-multilib
> @@ -1,6 +1,6 @@
>  # This file was generated by multilib-generator with the command:
> -#  ./multilib-generator rv32i-ilp32--c rv32im-ilp32--c rv32iac-ilp32-- 
> rv32imac-ilp32-- rv32imafc-ilp32f-rv32imafdc- rv64imac-lp64-- 
> rv64imafdc-lp64d--
> -MULTILIB_OPTIONS = 
> march=rv32i/march=rv32ic/march=rv32im/march=rv32imc/march=rv32iac/march=rv32imac/march=rv32imafc/march=rv32imafdc/march=rv32gc/march=rv64imac/march=rv64imafdc/march=rv64gc
>  mabi=ilp32/mabi=ilp32f/mabi=lp64/mabi=lp64d
> +#  ./multilib-generator rv32i-ilp32--c rv32im-ilp32--c rv32iac-ilp32-- 
> rv32imac-ilp32-- rv32imafc-ilp32f-rv32imafdc- rv64im-lp64-- rv64imc-lp64-- 
> rv64imfc-lp64f-- rv64imac-lp64-- rv64imafdc-lp64d--
> +MULTILIB_OPTIONS = 
> march=rv32i/march=rv32ic/march=rv32im/march=rv32imc/march=rv32iac/march=rv32imac/march=rv32imafc/march=rv32imafdc/march=rv32gc/march=rv64im/march=rv64imc/march=rv64imfc/march=rv64imac/march=rv64imafdc/march=rv64gc
>  mabi=ilp32/mabi=ilp32f/mabi=lp64/mabi=lp64f/mabi=lp64d
>  MULTILIB_DIRNAMES = rv32i \
>  rv32ic \
>  rv32im \
> @@ -10,17 +10,24 @@ rv32imac \
>  rv32imafc \
>  rv32imafdc \
>  rv32gc \
> +rv64im \
> +rv64imc \
> +rv64imfc \
>  rv64imac \
>  rv64imafdc \
>  rv64gc ilp32 \
>  ilp32f \
>  lp64 \
> +lp64f \
>  lp64d
>  MULTILIB_REQUIRED = march=rv32i/mabi=ilp32 \
>  march=rv32im/mabi=ilp32 \
>  march=rv32iac/mabi=ilp32 \
>  march=rv32imac/mabi=ilp32 \
>  march=rv32imafc/mabi=ilp32f \
> +march=rv64im/mabi=lp64 \
> +march=rv64imc/mabi=lp64 \
> +march=rv64imfc/mabi=lp64f \
>  march=rv64imac/mabi=lp64 \
>  march=rv64imafdc/mabi=lp64d
>  MULTILIB_REUSE = march.rv32i/mabi.ilp32=march.rv32ic/mabi.ilp32 \
>
>
> --
> Alexandre Oliva, happy hacker  https://FSFLA.org/blogs/lxo/
>Free Software Activist GNU Toolchain Engineer
> Vim, Vi, Voltei pro Emacs -- GNUlius Caesar


Re: [PATCH] config.sub, config.guess : Import upstream 2021-01-25.

2021-02-23 Thread Kito Cheng via Gcc-patches
Committed, thanks!

On Tue, Feb 23, 2021 at 4:18 PM Richard Biener  wrote:
>
> On Tue, 23 Feb 2021, Kito Cheng wrote:
>
> > Hi
> >
> > Does it update config.sub and config.guess, I know it's already
> > stage 4, but the config.* stuff update should be harmless things,
> > and we need this for RISC-V big-endian support, which is already
> > supported in binutils 2.36.
> >
> > This imports from:
> >
> > sha1 6faca61810d335c7837f320733fe8e15a1431fc2
>
> OK.
>
> > ChangeLog:
> >
> > * config.guess: Import latest upstream.
> > * config.sub: Import latest upstream.
> > ---
> >  config.guess | 48 +++-
> >  config.sub   | 23 +++
> >  2 files changed, 46 insertions(+), 25 deletions(-)
> >
> > diff --git a/config.guess b/config.guess
> > index 0fc11edb2d1..1972fda8eb0 100755
> > --- a/config.guess
> > +++ b/config.guess
> > @@ -1,8 +1,8 @@
> >  #! /bin/sh
> >  # Attempt to guess a canonical system name.
> > -#   Copyright 1992-2020 Free Software Foundation, Inc.
> > +#   Copyright 1992-2021 Free Software Foundation, Inc.
> >
> > -timestamp='2020-11-07'
> > +timestamp='2021-01-25'
> >
> >  # This file is free software; you can redistribute it and/or modify it
> >  # under the terms of the GNU General Public License as published by
> > @@ -27,7 +27,7 @@ timestamp='2020-11-07'
> >  # Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
> >  #
> >  # You can get the latest version of this script from:
> > -# 
> > https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
> > +# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
> >  #
> >  # Please send patches to .
> >
> > @@ -50,7 +50,7 @@ version="\
> >  GNU config.guess ($timestamp)
> >
> >  Originally written by Per Bothner.
> > -Copyright 1992-2020 Free Software Foundation, Inc.
> > +Copyright 1992-2021 Free Software Foundation, Inc.
> >
> >  This is free software; see the source for copying conditions.  There is NO
> >  warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR 
> > PURPOSE."
> > @@ -138,9 +138,7 @@ UNAME_VERSION=$( (uname -v) 2>/dev/null) || 
> > UNAME_VERSION=unknown
> >
> >  case "$UNAME_SYSTEM" in
> >  Linux|GNU|GNU/*)
> > - # If the system lacks a compiler, then just pick glibc.
> > - # We could probably try harder.
> > - LIBC=gnu
> > + LIBC=unknown
> >
> >   set_cc_for_build
> >   cat <<-EOF > "$dummy.c"
> > @@ -149,16 +147,30 @@ Linux|GNU|GNU/*)
> >   LIBC=uclibc
> >   #elif defined(__dietlibc__)
> >   LIBC=dietlibc
> > + #elif defined(__GLIBC__)
> > + LIBC=gnu
> >   #else
> >   #include 
> > + /* First heuristic to detect musl libc.  */
> >   #ifdef __DEFINED_va_list
> >   LIBC=musl
> > - #else
> > - LIBC=gnu
> >   #endif
> >   #endif
> >   EOF
> >   eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 
> > 's, ,,g')"
> > +
> > + # Second heuristic to detect musl libc.
> > + if [ "$LIBC" = unknown ] &&
> > +command -v ldd >/dev/null &&
> > +ldd --version 2>&1 | grep -q ^musl; then
> > + LIBC=musl
> > + fi
> > +
> > + # If the system lacks a compiler, then just pick glibc.
> > + # We could probably try harder.
> > + if [ "$LIBC" = unknown ]; then
> > + LIBC=gnu
> > + fi
> >   ;;
> >  esac
> >
> > @@ -176,10 +188,9 @@ case 
> > "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
> >   #
> >   # Note: NetBSD doesn't particularly care about the vendor
> >   # portion of the name.  We always set it to "unknown".
> > - sysctl="sysctl -n hw.machine_arch"
> >   UNAME_MACHINE_ARCH=$( (uname -p 2>/dev/null || \
> > - "/sbin/$sysctl" 2>/dev/null || \
> > - "/usr/sbin/$sysctl" 2>/dev/null || \
> > + /sbin/sysctl -n hw.machine_arch 2>/dev/null || \
> > + /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \
> >   echo unknown))
> >   case "$UNAME_MACHINE_ARCH" in
> >   aarch64eb) machine=aarch64_be-unknown ;;
> > @@ -984,6 +995,9 @@ EOF
> >  k1om:Linux:*:*)
> >   echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
> >   exit ;;
> > +loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
> > + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
> > + exit ;;
> >  m32r*:Linux:*:*)
> >   echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
> >   exit ;;
> > @@ -1072,7 +1086,7 @@ EOF
> >  ppcle:Linux:*:*)
> >   echo powerpcle-unknown-linux-"$LIBC"
> >   exit ;;
> > -riscv32:Linux:*:* | riscv64:Linux:*:*)
> > +riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | 
> > riscv64be:Linux:*:*)
> >   echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
> >   exit ;;
> >  s390:Linux:*:* | s390x:Linux:*:*)
> > @@ -1468,8 +1482,8 @@ EOF
> >  i*86:rdos:*:*)
> >   echo "$UNAME_MACHINE"-pc-rdos
> >

Re: [PATCH v3 2/2] ada: add 128bit operation to MIPS N32 and N64

2021-02-23 Thread YunQiang Su via Gcc-patches
Arnaud Charlet  于2021年2月23日周二 下午5:07写道:
>
> > For MIPS N64 and N32:
> >   add GNATRTL_128BIT_PAIRS to LIBGNAT_TARGET_PAIRS
> >   add GNATRTL_128BIT_OBJS to EXTRA_GNATRTL_NONTASKING_OBJS
> >
> > gcc/ada/ChangeLog:
> >   PR ada/98996
> >   * Makefile.rtl:  add 128Bit operation file to MIPS
> >   N64 and N32 (LIBGNAT_TARGET_PAIRS, EXTRA_GNATRTL_NONTASKING_OBJS).
>
> As for the other recent submit, the ChangeLog file is now generated
> automatically.

I did generate the ChangeLog by contrib/mklog.py by:
 contrib/mklog.py 0002_MY_PATCH.patch
and get an empty ChangeLog template.

gcc/ada/ChangeLog:

* ChangeLog:
* Makefile.rtl:

So, should I keep it as is?




>
> The Makefile.rtl change is OK.
>
> Arno



--
YunQiang Su


Re: [PATCH v3 2/2] ada: add 128bit operation to MIPS N32 and N64

2021-02-23 Thread Arnaud Charlet
> > > For MIPS N64 and N32:
> > >   add GNATRTL_128BIT_PAIRS to LIBGNAT_TARGET_PAIRS
> > >   add GNATRTL_128BIT_OBJS to EXTRA_GNATRTL_NONTASKING_OBJS
> > >
> > > gcc/ada/ChangeLog:
> > >   PR ada/98996
> > >   * Makefile.rtl:  add 128Bit operation file to MIPS
> > >   N64 and N32 (LIBGNAT_TARGET_PAIRS, EXTRA_GNATRTL_NONTASKING_OBJS).
> >
> > As for the other recent submit, the ChangeLog file is now generated
> > automatically.
> 
> I did generate the ChangeLog by contrib/mklog.py by:
>  contrib/mklog.py 0002_MY_PATCH.patch
> and get an empty ChangeLog template.
> 
> gcc/ada/ChangeLog:
> 
> * ChangeLog:
> * Makefile.rtl:
> 
> So, should I keep it as is?

The ChangeLog file is generated automatically, so you should not touch it.

Arno


Re: [gcc-12 PATCH] ira: Correct HONOR_REG_ALLOC_ORDER usage

2021-02-23 Thread Uros Bizjak via Gcc-patches
On Tue, Feb 23, 2021 at 8:48 AM Richard Biener  wrote:
>
> On Mon, 22 Feb 2021, Uros Bizjak wrote:
>
> > The intention of HONOR_REG_ALLOC_ORDER is to ensure that IRA allocates
> > registers in the order given by REG_ALLOC_ORDER.  However in
> > ira_better_spill_reload_regno_p, there is still a place where the
> > calculation depends on the presence of REG_ALLOC_ORDER, ignoring
> > HONOR_REG_ALLOC_ORDER macro altogether.  The patch uses the correct macro
> > at this place.
> >
> > On the other hand, assign_hard_reg function respects HONOR_REG_ALLOC_ORDER,
> > but expects this macro to return 1 to avoid internal cost calculations.
> > As the macro is defined to 0 by default, it is expected that targets 
> > redefine
> > HONOR_REG_ALLOC_ORDER to return nonzero value, even if REG_ALLOC_ORDER
> > is defined.  This approach is prone to errors, so the patch defines
> > HONOR_REG_ALLOC_ORDER to 1 by default if REG_ALLOC_ORDER is defined.
> >
> > 2021-02-22  Uroš Bizjak  
> >
> > gcc/
> > * defaults.h (HONOR_REG_ALLOC_ORDER): If not defined,
> > define to 1 if REG_ALLOC_ORDER is defined.
> > * doc/tm.texi.in (HONOR_REG_ALLOC_ORDER):
> > Describe new default definition.
> > * doc/tm.texi: Regenerate.
> > * ira-color.c (ira_better_spill_reload_regno_p):
> > Use HONOR_REG_ALLOC_ORDER instead of REG_ALLOC_ORDER
> > to determine better spill reload regno.
> >
> > Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
> >
> > OK for gcc-12 when it opens?
>
> OK in case Vlad doesn't have a better suggestion or further comments.
>
> Do you have an idea for how many targets the changed default is an
> actual change?

Practically every target defines REG_ALLOC_ORDER, namely:

$ grep -R "define REG_ALLOC_ORDER" *
alpha/vms.h:#define REG_ALLOC_ORDER
alpha/alpha.h:#define REG_ALLOC_ORDER
arc/arc.h:#define REG_ALLOC_ORDER
arm/arm.h:#define REG_ALLOC_ORDER
avr/avr.h:#define REG_ALLOC_ORDER
bfin/bfin.h:#define REG_ALLOC_ORDER
c6x/c6x.h:#define REG_ALLOC_ORDER
cris/cris.h:#define REG_ALLOC_ORDER
csky/csky.h:#define REG_ALLOC_ORDER
epiphany/epiphany.h:#define REG_ALLOC_ORDER
frv/frv.h:#define REG_ALLOC_ORDER
h8300/h8300.h:#define REG_ALLOC_ORDER
i386/i386.h:#define REG_ALLOC_ORDER
ia64/ia64.h:#define REG_ALLOC_ORDER
iq2000/iq2000.h:#define REG_ALLOC_ORDER
m32c/m32c.h:#define REG_ALLOC_ORDER
m32r/m32r.h:#define REG_ALLOC_ORDER
m32r/m32r.h:#define REG_ALLOC_ORDER
m68k/m68k.h:#define REG_ALLOC_ORDER
mcore/mcore.h:#define REG_ALLOC_ORDER
mips/mips.h:#define REG_ALLOC_ORDER
mmix/mmix.h:#define REG_ALLOC_ORDER
mn10300/mn10300.h:#define REG_ALLOC_ORDER
msp430/msp430.h:#define REG_ALLOC_ORDER
nds32/nds32.h:#define REG_ALLOC_ORDER
nios2/nios2.h:#define REG_ALLOC_ORDER
or1k/or1k.h:#define REG_ALLOC_ORDER
pa/pa32-regs.h:#define REG_ALLOC_ORDER
pa/pa64-regs.h:#define REG_ALLOC_ORDER
pru/pru.h:#define REG_ALLOC_ORDER
riscv/riscv.h:#define REG_ALLOC_ORDER
rl78/rl78.h:#define REG_ALLOC_ORDER
rs6000/rs6000.h:#define REG_ALLOC_ORDER
rx/rx.h:#define REG_ALLOC_ORDER
s390/s390.h:#define REG_ALLOC_ORDER
sh/sh.h:#define REG_ALLOC_ORDER
sparc/sparc.h:#define REG_ALLOC_ORDER
stormy16/stormy16.h:#define REG_ALLOC_ORDER
tilegx/tilegx.h:#define REG_ALLOC_ORDER
tilepro/tilepro.h:#define REG_ALLOC_ORDER
v850/v850.h:#define REG_ALLOC_ORDER
visium/visium.h:#define REG_ALLOC_ORDER
xtensa/xtensa.h:#define REG_ALLOC_ORDER

while HONOR_REG_ALLOC_ORDER is defined by a few:

$ grep -R "define HONOR_REG_ALLOC_ORDER" *
arc/arc.h:#define HONOR_REG_ALLOC_ORDER 1
arm/arm.h:#define HONOR_REG_ALLOC_ORDER optimize_function_for_size_p (cfun)
nds32/nds32.h:#define HONOR_REG_ALLOC_ORDER optimize_size
nios2/nios2.h:#define HONOR_REG_ALLOC_ORDER (TARGET_HAS_CDX)

So, setting new HONOR_REG_ALLOC_ORDER default would have a noticeable
impact, mainly in the existing assing_hard_reg heuristics (please note
that patched condition in ira_better_spill_reload_regno with the new
default is a no-op for targets that don't define
HONOR_REG_ALLOC_ORDER, and a correction for targets that do).

Based on the comment in assign_hard_reg, the heuristic assumes that:

  /* We don't care about giving callee saved registers to allocnos no
 living through calls because call clobbered registers are
 allocated first (it is usual practice to put them first in
 REG_ALLOC_ORDER).  */

and now for targets that define REG_ALLOC_ORDER (and don't define
HONOR_REG_ALLOC_ORDER) the new default *disables*

  if (!HONOR_REG_ALLOC_ORDER)
{
  if ((saved_nregs = calculate_saved_nregs (hard_regno, mode)) != 0)
  /* We need to save/restore the hard register in
 epilogue/prologue.  Therefore we increase the cost.  */
  {
rclass = REGNO_REG_CLASS (hard_regno);
add_cost = ((ira_memory_move_cost[mode][rclass][0]
 + ira_memory_move_cost[mode][rclass][1])
* saved_nregs / hard_regno_nregs (hard_regno,
  mode) - 1);
cost += add_cost;
full_cost += add_

Re: [PATCH v3 2/2] ada: add 128bit operation to MIPS N32 and N64

2021-02-23 Thread YunQiang Su via Gcc-patches
Arnaud Charlet  于2021年2月23日周二 下午5:33写道:
>
> > > > For MIPS N64 and N32:
> > > >   add GNATRTL_128BIT_PAIRS to LIBGNAT_TARGET_PAIRS
> > > >   add GNATRTL_128BIT_OBJS to EXTRA_GNATRTL_NONTASKING_OBJS
> > > >
> > > > gcc/ada/ChangeLog:
> > > >   PR ada/98996
> > > >   * Makefile.rtl:  add 128Bit operation file to MIPS
> > > >   N64 and N32 (LIBGNAT_TARGET_PAIRS, EXTRA_GNATRTL_NONTASKING_OBJS).
> > >
> > > As for the other recent submit, the ChangeLog file is now generated
> > > automatically.
> >
> > I did generate the ChangeLog by contrib/mklog.py by:
> >  contrib/mklog.py 0002_MY_PATCH.patch
> > and get an empty ChangeLog template.
> >
> > gcc/ada/ChangeLog:
> >
> > * ChangeLog:
> > * Makefile.rtl:
> >
> > So, should I keep it as is?
>
> The ChangeLog file is generated automatically, so you should not touch it.

Thank you. I got it. so the the ChangeLog file is generated for the
ChangeLog section of commit msg?

>
> Arno



-- 
YunQiang Su


[PATCH v4 1/2] MIPS: unaligned load: use SImode for SUBREG if OK (PR98996)

2021-02-23 Thread YunQiang Su
It is found by ada s-pack96.adb ftbfs, due to 96bit load: 96 = 64 + 32.
While the 32bit pair of l r is mark as SUBREG, so they are
not in SImode, make it fail to find suitable insn.

gcc/ChangeLog:

PR target/98996
* config/mips/mips.c (mips_expand_ext_as_unaligned_load):
If TARGET_64BIT and dest is SUBREG, we check the width, if it
equal to SImode, we use SImode operation, just like what we are
doing for REG one.
---
 gcc/config/mips/mips.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 8bd2d29552e..e901d860c3d 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -8400,7 +8400,7 @@ mips_expand_ext_as_unaligned_load (rtx dest, rtx src, 
HOST_WIDE_INT width,
   /* If TARGET_64BIT, the destination of a 32-bit "extz" or "extzv" will
  be a DImode, create a new temp and emit a zero extend at the end.  */
   if (GET_MODE (dest) == DImode
-  && REG_P (dest)
+  && (REG_P (dest) || (SUBREG_P(dest) && !MEM_P(SUBREG_REG(dest
   && GET_MODE_BITSIZE (SImode) == width)
 {
   dest1 = dest;
-- 
2.20.1



[PATCH v4 2/2] ada: add 128bit operation for MIPS N32 and N64

2021-02-23 Thread YunQiang Su
For MIPS N64 and N32:
  add GNATRTL_128BIT_PAIRS to LIBGNAT_TARGET_PAIRS
  add GNATRTL_128BIT_OBJS to EXTRA_GNATRTL_NONTASKING_OBJS

gcc/ada/ChangeLog:

PR ada/98996
* Makefile.rtl: 
add 128Bit operation file for MIPS N64 and N32 to
LIBGNAT_TARGET_PAIRS and EXTRA_GNATRTL_NONTASKING_OBJS
---
 gcc/ada/Makefile.rtl | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/ada/Makefile.rtl b/gcc/ada/Makefile.rtl
index 35faf13ea46..987eff0abba 100644
--- a/gcc/ada/Makefile.rtl
+++ b/gcc/ada/Makefile.rtl
@@ -2311,6 +2311,18 @@ ifeq ($(strip $(filter-out mips% linux%,$(target_cpu) 
$(target_os))),)
   s-tpopsp.adb

[PATCH v4 1/2] MIPS: Not trigger error for pre-R6 and -mcompact-branches=always

2021-02-23 Thread YunQiang Su
For MIPSr6, we may wish to use compact-branches only.
Currently, we have to use `always' option, while it is mark as conflict
with pre-R6.
  cc1: error: unsupported combination: ‘mips32r2’ -mcompact-branches=always
Just ignore -mcompact-branches=always for pre-R6.

This patch also defines
__mips_compact_branches_never
__mips_compact_branches_always
__mips_compact_branches_optimal
predefined macros

gcc/ChangeLog:
* config/mips/mips.c (mips_option_override):
* config/mips/mips.h (TARGET_RTP_PIC): not trigger error for
compact-branches=always for pre-R6.
(TARGET_CB_NEVER): Likewise.
(TARGET_CB_ALWAYS): Likewise.
(struct mips_cpu_info): define macros for compact branch policy.
* doc/invoke.texi: Document "always" with pre-R6.

gcc/testsuite/ChangeLog:
* gcc.target/mips/compact-branches-1.c: add isa_rev>=6.
* gcc.target/mips/mips.exp: don't add -mipsXXr6 option for
-mcompact-branches=always. It is usable for pre-R6 now.
* gcc.target/mips/compact-branches-8.c: New test.
* gcc.target/mips/compact-branches-9.c: New test.
---
 gcc/config/mips/mips.c|  8 +--
 gcc/config/mips/mips.h| 22 ---
 gcc/doc/invoke.texi   |  1 +
 .../gcc.target/mips/compact-branches-1.c  |  2 +-
 .../gcc.target/mips/compact-branches-8.c  | 10 +
 .../gcc.target/mips/compact-branches-9.c  | 10 +
 gcc/testsuite/gcc.target/mips/mips.exp|  4 +---
 7 files changed, 38 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/compact-branches-8.c
 create mode 100644 gcc/testsuite/gcc.target/mips/compact-branches-9.c

diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 8bd2d29552e..9a75dd61031 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -20107,13 +20107,7 @@ mips_option_override (void)
   target_flags |= MASK_ODD_SPREG;
 }
 
-  if (!ISA_HAS_COMPACT_BRANCHES && mips_cb == MIPS_CB_ALWAYS)
-{
-  error ("unsupported combination: %qs%s %s",
- mips_arch_info->name, TARGET_MICROMIPS ? " -mmicromips" : "",
- "-mcompact-branches=always");
-}
-  else if (!ISA_HAS_DELAY_SLOTS && mips_cb == MIPS_CB_NEVER)
+  if (!ISA_HAS_DELAY_SLOTS && mips_cb == MIPS_CB_NEVER)
 {
   error ("unsupported combination: %qs%s %s",
  mips_arch_info->name, TARGET_MICROMIPS ? " -mmicromips" : "",
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index b4a60a55d80..b8399fe1b0d 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -103,11 +103,9 @@ struct mips_cpu_info {
 #define TARGET_RTP_PIC (TARGET_VXWORKS_RTP && flag_pic)
 
 /* Compact branches must not be used if the user either selects the
-   'never' policy or the 'optimal' policy on a core that lacks
+   'never' policy or the 'optimal' / 'always' policy on a core that lacks
compact branch instructions.  */
-#define TARGET_CB_NEVER (mips_cb == MIPS_CB_NEVER  \
-|| (mips_cb == MIPS_CB_OPTIMAL \
-&& !ISA_HAS_COMPACT_BRANCHES))
+#define TARGET_CB_NEVER (mips_cb == MIPS_CB_NEVER || !ISA_HAS_COMPACT_BRANCHES)
 
 /* Compact branches may be used if the user either selects the
'always' policy or the 'optimal' policy on a core that supports
@@ -117,10 +115,11 @@ struct mips_cpu_info {
 && ISA_HAS_COMPACT_BRANCHES))
 
 /* Compact branches must always be generated if the user selects
-   the 'always' policy or the 'optimal' policy om a core that
-   lacks delay slot branch instructions.  */
-#define TARGET_CB_ALWAYS (mips_cb == MIPS_CB_ALWAYS\
-|| (mips_cb == MIPS_CB_OPTIMAL \
+   the 'always' policy on a core support compact branches,
+   or the 'optimal' policy on a core that lacks delay slot branch 
instructions.  */
+#define TARGET_CB_ALWAYS ((mips_cb == MIPS_CB_ALWAYS \
+&& ISA_HAS_COMPACT_BRANCHES) \
+|| (mips_cb == MIPS_CB_OPTIMAL   \
 && !ISA_HAS_DELAY_SLOTS))
 
 /* Special handling for JRC that exists in microMIPSR3 as well as R6
@@ -655,6 +654,13 @@ struct mips_cpu_info {
builtin_define ("__mips_no_lxc1_sxc1"); \
   if (!ISA_HAS_UNFUSED_MADD4 && !ISA_HAS_FUSED_MADD4)  \
builtin_define ("__mips_no_madd4"); \
+   \
+  if (TARGET_CB_NEVER) \
+   builtin_define ("__mips_compact_branches_never");   \
+  else if (TARGET_CB_ALWAYS)   \
+   builtin_define ("__mips_compact_branches_always");  \
+  else \
+   builtin_define ("

[PATCH v4 2/2] MIPS: add builtime option for -mcompact-branches

2021-02-23 Thread YunQiang Su
For R6+ target, it allows to configure gcc to use compact branches only.

gcc/ChangeLog:
* config.gcc: add -with-compact-branches=policy build option.
* doc/install.texi: Likewise.
---
 gcc/config.gcc   | 12 +++-
 gcc/doc/install.texi | 19 +++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 17fea83b2e4..047f5631067 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4743,7 +4743,7 @@ case "${target}" in
;;
 
mips*-*-*)
-   supported_defaults="abi arch arch_32 arch_64 float fpu nan 
fp_32 odd_spreg_32 tune tune_32 tune_64 divide llsc mips-plt synci lxc1-sxc1 
madd4"
+   supported_defaults="abi arch arch_32 arch_64 float fpu nan 
fp_32 odd_spreg_32 tune tune_32 tune_64 divide llsc mips-plt synci lxc1-sxc1 
madd4 compact-branches"
 
case ${with_float} in
"" | soft | hard)
@@ -4896,6 +4896,16 @@ case "${target}" in
exit 1
;;
esac
+
+   case ${with_compact_branches} in
+   never | always | optimal)
+   with_compact_branches=${with_compact_branches}
+   ;;
+   *)
+   echo "Unknown compact-branches policy used in 
--with-compact-branches" 1>&2
+   exit 1
+   ;;
+   esac
;;
 
nds32*-*-*)
diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 4c38244ae58..865630826c6 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -1464,6 +1464,25 @@ systems that support conditional traps).
 Division by zero checks use the break instruction.
 @end table
 
+@item --with-compact-branches=@var{policy}
+Specify how the compiler should generate code for checking for
+division by zero.  This option is only supported on the MIPS target.
+The possibilities for @var{type} are:
+@table @code
+@item optimal
+Cause a delay slot branch to be used if one is available in the
+current ISA and the delay slot is successfully filled. If the delay slot
+is not filled, a compact branch will be chosen if one is available.
+@item never
+Ensures that compact branch instructions will never be generated.
+@item always
+Ensures that a compact branch instruction will be generated if available.
+If a compact branch instruction is not available,
+a delay slot form of the branch will be used instead.
+This option is supported from MIPS Release 6 onwards.
+For pre-R6, this option is just same as never/optimal.
+@end table
+
 @c If you make --with-llsc the default for additional targets,
 @c update the --with-llsc description in the MIPS section below.
 
-- 
2.20.1



[PATCH][pushed] Fix UBSAN in __ubsan::Value::getSIntValue

2021-02-23 Thread Martin Liška

The patch is LLVM backport.

Applied to master.

/home/marxin/Programming/gcc2/libsanitizer/ubsan/ubsan_value.cpp:77:25: runtime 
error: left shift of 0xfffb by 96 places cannot be 
represented in type '__int128'
#0 0x7754edfe in __ubsan::Value::getSIntValue() const 
/home/marxin/Programming/gcc2/libsanitizer/ubsan/ubsan_value.cpp:77
#1 0x77548719 in __ubsan::Value::isNegative() const 
/home/marxin/Programming/gcc2/libsanitizer/ubsan/ubsan_value.h:190
#2 0x77542a34 in handleShiftOutOfBoundsImpl 
/home/marxin/Programming/gcc2/libsanitizer/ubsan/ubsan_handlers.cpp:338
#3 0x775431b7 in __ubsan_handle_shift_out_of_bounds 
/home/marxin/Programming/gcc2/libsanitizer/ubsan/ubsan_handlers.cpp:370
#4 0x40067f in main (/home/marxin/Programming/testcases/a.out+0x40067f)
#5 0x772c8b24 in __libc_start_main (/lib64/libc.so.6+0x27b24)
#6 0x4005bd in _start (/home/marxin/Programming/testcases/a.out+0x4005bd)

Differential Revision: https://reviews.llvm.org/D97263

Cherry-pick from 16ede0956cb1f4b692dfa619ccfa6ab1de28e19b.
---
 libsanitizer/ubsan/ubsan_value.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libsanitizer/ubsan/ubsan_value.cpp 
b/libsanitizer/ubsan/ubsan_value.cpp
index 79c3ba991d3..40042bf3a90 100644
--- a/libsanitizer/ubsan/ubsan_value.cpp
+++ b/libsanitizer/ubsan/ubsan_value.cpp
@@ -74,7 +74,7 @@ SIntMax Value::getSIntValue() const {
 // to SIntMax.
 const unsigned ExtraBits =
   sizeof(SIntMax) * 8 - getType().getIntegerBitWidth();
-return SIntMax(Val) << ExtraBits >> ExtraBits;
+return SIntMax(UIntMax(Val) << ExtraBits) >> ExtraBits;
   }
   if (getType().getIntegerBitWidth() == 64)
 return *reinterpret_cast(Val);
--
2.30.1



Re: add rv64im{,c,fc} multilibs

2021-02-23 Thread Alexandre Oliva
On Feb 23, 2021, Kito Cheng  wrote:

> We've added a new configure option to allow you to override that
> without changing source code.

Ah, nice, thanks!  I'll add a note on our internal patch to switch to
that when we switch to GCC 11.


I take your response as confirming my expectation that the defaults are
to remain unchanged for now, and I will thus proceed under this
assumption.

Thanks for the prompt response!


-- 
Alexandre Oliva, happy hacker  https://FSFLA.org/blogs/lxo/
   Free Software Activist GNU Toolchain Engineer
Vim, Vi, Voltei pro Emacs -- GNUlius Caesar


Re: [PATCH v3 2/2] ada: add 128bit operation to MIPS N32 and N64

2021-02-23 Thread Arnaud Charlet


>>> I did generate the ChangeLog by contrib/mklog.py by:
>>> contrib/mklog.py 0002_MY_PATCH.patch
>>> and get an empty ChangeLog template.
>>> 
>>> gcc/ada/ChangeLog:
>>> 
>>>* ChangeLog:
>>>* Makefile.rtl:
>>> 
>>> So, should I keep it as is?
>> 
>> The ChangeLog file is generated automatically, so you should not touch it.
> 
> Thank you. I got it. so the the ChangeLog file is generated for the
> ChangeLog section of commit msg?

It’s generated from the commit log, yes.

Arno



[Patch, fortran] PR99124 - [9/10/11 Regression] ICE in gfc_get_class_from_expr, at fortran/trans-expr.c:541

2021-02-23 Thread Paul Richard Thomas via Gcc-patches
Hi All,

This is a straightforward fix that had the side-effect of uncovering an
invalid testcase, class_assign_4.f90. I had worked up a new test, based on
the one in the PR, and found that another brand determined that it is
invalid according to F2018, C15100.

I was unable to find a way to use a typebound operator with a polymorphic
result and so resorted to correcting class_assign_4.f90 with an operator
interface. This respects the purpose of the test. I have left the commented
out lines in place for the review; these will be removed when committing.

Regtested on FC33/x86_64. OK for 9- to 11-branches?

Paul

Fortran: Fix for class functions as associated target [PR99124].

2021-02-23  Paul Thomas  

gcc/fortran
PR fortran/99124
* resolve.c (resolve_fl_procedure): Include class results in
the test for F2018, C15100.
* trans-array.c (get_class_info_from_ss): Do not use the saved
descriptor to obtain the class expression for variables. Use
gfc_get_class_from_expr instead.

gcc/testsuite/
PR fortran/99124
* gfortran.dg/class_defined_operator_2.f03 : New test.
* class_assign_4.f90: Correct the non-conforming elemental
function with an allocatable result with an operator interface
with array dummies and result.
diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c
index 11b5dbc7a03..b4dd32163af 100644
--- a/gcc/fortran/resolve.c
+++ b/gcc/fortran/resolve.c
@@ -13051,6 +13051,7 @@ static bool
 resolve_fl_procedure (gfc_symbol *sym, int mp_flag)
 {
   gfc_formal_arglist *arg;
+  bool allocatable_or_pointer;
 
   if (sym->attr.function
   && !resolve_fl_var_and_proc (sym, mp_flag))
@@ -13235,8 +13236,16 @@ resolve_fl_procedure (gfc_symbol *sym, int mp_flag)
   /* F2018, C15100: "The result of an elemental function shall be scalar,
  and shall not have the POINTER or ALLOCATABLE attribute."  The scalar
  pointer is tested and caught elsewhere.  */
+  if (sym->result)
+allocatable_or_pointer = sym->ts.type == BT_CLASS ?
+			 (CLASS_DATA (sym->result)->attr.allocatable
+			  || CLASS_DATA (sym->result)->attr.pointer) :
+			 (sym->result->attr.allocatable
+			  || sym->result->attr.pointer);
+
   if (sym->attr.elemental && sym->result
-  && (sym->result->attr.allocatable || sym->result->attr.pointer))
+  && sym->result->ts.type != BT_CLASS
+  && allocatable_or_pointer)
 {
   gfc_error ("Function result variable %qs at %L of elemental "
 		 "function %qs shall not have an ALLOCATABLE or POINTER "
diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index c346183e129..c6725659093 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -1167,8 +1167,11 @@ get_class_info_from_ss (stmtblock_t * pre, gfc_ss *ss, tree *eltype)
   && rhs_ss->info->expr->ts.type == BT_CLASS
   && rhs_ss->info->data.array.descriptor)
 {
-  rhs_class_expr
-	= gfc_get_class_from_expr (rhs_ss->info->data.array.descriptor);
+  if (rhs_ss->info->expr->expr_type != EXPR_VARIABLE)
+	rhs_class_expr
+	  = gfc_get_class_from_expr (rhs_ss->info->data.array.descriptor);
+  else
+	rhs_class_expr = gfc_get_class_from_gfc_expr (rhs_ss->info->expr);
   unlimited_rhs = UNLIMITED_POLY (rhs_ss->info->expr);
   if (rhs_ss->info->expr->expr_type == EXPR_FUNCTION)
 	rhs_function = true;
diff --git a/gcc/testsuite/gfortran.dg/class_assign_4.f90 b/gcc/testsuite/gfortran.dg/class_assign_4.f90
index 517e3121cc8..c6c54bbaed2 100644
--- a/gcc/testsuite/gfortran.dg/class_assign_4.f90
+++ b/gcc/testsuite/gfortran.dg/class_assign_4.f90
@@ -11,17 +11,21 @@ module m
   type :: t1
 integer :: i
   CONTAINS
-PROCEDURE :: add_t1
-GENERIC :: OPERATOR(+) => add_t1
+!PROCEDURE :: add_t1
+!GENERIC :: OPERATOR(+) => add_t1
   end type
   type, extends(t1) :: t2
 real :: r
   end type
 
+  interface operator(+)
+module procedure add_t1
+  end interface
+
 contains
-  impure elemental function add_t1 (a, b) result (c)
-class(t1), intent(in) :: a, b
-class(t1), allocatable :: c
+  function add_t1 (a, b) result (c)
+class(t1), intent(in) :: a(:), b(:)
+class(t1), allocatable :: c(:)
 allocate (c, source = a)
 c%i = a%i + b%i
 select type (c)


class_defined_operator_2.f03
Description: Binary data


Re: [PATCH] IPA ICF + ASAN: do not merge vars with different alignment

2021-02-23 Thread Richard Biener via Gcc-patches
On Tue, Feb 23, 2021 at 10:42 AM Martin Liška  wrote:
>
> Hello.
>
> The patch is about confusion that brings ICF when it merged 2 variables
> with different alignments (when ASAN is used).
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

Can't we fix the asan runtime?  Does the same issue happen when merging
two comdat with different alignment and LTO?

> Thanks,
> Martin
>
> gcc/ChangeLog:
>
> PR sanitizer/99168
> * ipa-icf.c (sem_variable::merge): Do not merge 2 variables
> with different alignment. That leads to an invalid red zone
> size allocated in runtime.
>
> gcc/testsuite/ChangeLog:
>
> PR sanitizer/99168
> * c-c++-common/asan/pr99168.c: New test.
> ---
>   gcc/ipa-icf.c | 13 
>   gcc/testsuite/c-c++-common/asan/pr99168.c | 26 +++
>   2 files changed, 39 insertions(+)
>   create mode 100644 gcc/testsuite/c-c++-common/asan/pr99168.c
>
> diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
> index 687ad8d45b7..5dd33a75c3a 100644
> --- a/gcc/ipa-icf.c
> +++ b/gcc/ipa-icf.c
> @@ -88,6 +88,7 @@ along with GCC; see the file COPYING3.  If not see
>   #include "tree-vector-builder.h"
>   #include "symtab-thunks.h"
>   #include "alias.h"
> +#include "asan.h"
>
>   using namespace ipa_icf_gimple;
>
> @@ -2022,6 +2023,18 @@ sem_variable::merge (sem_item *alias_item)
> return false;
>   }
>
> +  if (DECL_ALIGN (original->decl) != DECL_ALIGN (alias->decl)
> +  && (sanitize_flags_p (SANITIZE_ADDRESS, original->decl)
> + || sanitize_flags_p (SANITIZE_ADDRESS, alias->decl)))
> +{
> +  if (dump_enabled_p ())
> +   dump_printf (MSG_MISSED_OPTIMIZATION,
> +"Not unifying; "
> +"ASAN requires equal alignments for original and 
> alias\n");
> +
> +  return false;
> +}
> +
> if (DECL_ALIGN (original->decl) < DECL_ALIGN (alias->decl))
>   {
> if (dump_enabled_p ())
> diff --git a/gcc/testsuite/c-c++-common/asan/pr99168.c 
> b/gcc/testsuite/c-c++-common/asan/pr99168.c
> new file mode 100644
> index 000..ed59ffb3d48
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/asan/pr99168.c
> @@ -0,0 +1,26 @@
> +/* PR sanitizer/99168 */
> +/* { dg-do run } */
> +
> +struct my_struct
> +{
> +  unsigned long volatile x;
> +} __attribute__((aligned(128)));
> +
> +static int variablek[5][6] = {};
> +static struct my_struct variables1 = {0UL};
> +static struct my_struct variables2 __attribute__((aligned(32))) = {0UL};
> +
> +int main() {
> +  int i, j;
> +  for (i = 0; i < 5; i++) {
> +for (j = 0; j < 6; j++) {
> +  __builtin_printf("%d ", variablek[i][j]);
> +}
> +  }
> +  __builtin_printf("\n");
> +
> +  __builtin_printf("%lu\n", variables1.x);
> +  __builtin_printf("%lu\n", variables2.x);
> +
> +  return 0;
> +}
> --
> 2.30.1
>


Re: [Patch, fortran] PR99124 - [9/10/11 Regression] ICE in gfc_get_class_from_expr, at fortran/trans-expr.c:541

2021-02-23 Thread Tobias Burnus

Hi Paul,

On 23.02.21 12:52, Paul Richard Thomas via Gcc-patches wrote:

This is a straightforward fix that had the side-effect of uncovering an
invalid testcase, class_assign_4.f90. I had worked up a new test, based on
the one in the PR, and found that another brand determined that it is
invalid according to F2018, C15100.


Namely: "C15100  All dummy arguments of an elemental procedure ... shall
not have the POINTER or ALLOCATABLE attribute."

The operator does not have to be elemental – as the fixed test case show.

→ Can you add also a testcase that which triggers the error message you
see in the unpatched  class_assign_4.f90?

I was unable to find a way to use a typebound operator with a polymorphic
result


I am confused – the attach testcase does seem to work fine with current
GCC. (And if we don't have such a testcase, it should be added.)

Can you elaborate?


and so resorted to correcting class_assign_4.f90 with an operator
interface. This respects the purpose of the test. I have left the commented
out lines in place for the review; these will be removed when committing.

Regtested on FC33/x86_64. OK for 9- to 11-branches?


The patch itself LGTM, except for testing the newly shown error message
and for the confusion about the type-bound operator.

Thanks,

Tobias


Fortran: Fix for class functions as associated target [PR99124].

2021-02-23  Paul Thomas  

gcc/fortran
PR fortran/99124
* resolve.c (resolve_fl_procedure): Include class results in
the test for F2018, C15100.
* trans-array.c (get_class_info_from_ss): Do not use the saved
descriptor to obtain the class expression for variables. Use
gfc_get_class_from_expr instead.

gcc/testsuite/
PR fortran/99124
* gfortran.dg/class_defined_operator_2.f03 : New test.
* class_assign_4.f90: Correct the non-conforming elemental
function with an allocatable result with an operator interface
with array dummies and result.

-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
module m
type t1
   integer :: i
contains
PROCEDURE :: add_t1
GENERIC :: OPERATOR(+) => add_t1
end type

type, extends(t1):: t2
  integer j
end type
contains
  function add_t1 (a, b) result (c)
class(t1), intent(in) :: a, b
class(t1), allocatable :: c
allocate (c, mold = a)
c%i = a%i + b%i
select type(c)
 class is (t2)
  select type(a)
   class is (t2)
select type(b)
  class is (t2)
c%j = a%j + b%j
  class default
c%j = a%j
end select
  end select
end select
  end
end module m

use m
type(t1) :: v1, v1a
type(t2) :: v2, v2a
class(t1), allocatable :: c1, c2

v1 = t1(42)
v1a = t1(43)
v2 = t2(11,22)
v2a = t2(55,66)

c1 = v1 + v1a
select type (c1); class is (t1); if (c1%i /= 42 + 43) stop 1; class default; stop 2; end select
c1 = v1; c2 = v1a
c1 = c1 + c2
select type (c1); class is (t1); if (c1%i /= 42 + 43) stop 3; class default; stop 4; end select

c1 = v1 + v2
select type (c1); class is (t1); if (c1%i /= 42 + 11) stop 5; class default; stop 6; end select
c1 = v1; c2 = v2
c1 = c1 + c2
select type (c1); class is (t1); if (c1%i /= 42 + 11) stop 7; class default; stop 8; end select

c1 = v2 + v1
select type (c1); class is (t2); if (c1%i /= 11 + 42.or.c1%j /= 22) stop 9; class default; stop 10; end select
c1 = v2; c2 = v1
c1 = c1 + c2
select type (c1); class is (t2); if (c1%i /= 11 + 42.or.c1%j /= 22) stop 11; class default; stop 12; end select
end 


Re: [PATCH, constexpr, coroutines ] Generic lambda coroutines cannot be constexpr [PR96251].

2021-02-23 Thread Iain Sandoe
Hi Jason,

Jason Merrill  wrote:

> On 2/22/21 3:59 PM, Iain Sandoe wrote:

>> * I was not able to see any way in which the instantiation process
>>   could be made to bail in this case and re-try for non-constexpr.
> 
> Many of the other places that set cp_function_chain->invalid_constexpr 
> condition their errors on !is_instantiation_of_constexpr, which should also 
> fix this testcase.

Thanks!
(FWIW, there only seem to be three instances of this in the FE and two of those 
are in constexpr.c).

so like this?
(tested on x86_64-darwin, regtest running x86_64 linux)

thanks
iain


 [PATCH] coroutines : Adjust error handling for type-dependent coroutines 
[PR96251].

Although coroutines are not permitted to be constexpr, generic lambdas
are implicitly from C++17 and, because of this, a generic coroutine lambda
can be marked as potentially constexpr. As per the PR, this then fails when
type substitution is attempted because the check disallowing constexpr in
the coroutines code was overly restrictive.

This changes the error handing to mark the function  as 'invalid_constexpr'
but suppresses the error in the case that we are instantiating a constexpr.

gcc/cp/ChangeLog:

PR c++/PR96251
* coroutines.cc (coro_common_keyword_context_valid_p): Suppress
error reporting when instantiating for a constexpr.

gcc/testsuite/ChangeLog:

PR c++/96251
* g++.dg/coroutines/pr96251.C: New test.
---
 gcc/cp/coroutines.cc  | 11 +---
 gcc/testsuite/g++.dg/coroutines/pr96251.C | 32 +++
 2 files changed, 39 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/coroutines/pr96251.C

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index e61de1fac01..abfe8d08192 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -683,11 +683,14 @@ coro_common_keyword_context_valid_p (tree fndecl, 
location_t kw_loc,
 
   if (DECL_DECLARED_CONSTEXPR_P (fndecl))
 {
-  /* [dcl.constexpr] 3.3 it shall not be a coroutine.  */
-  error_at (kw_loc, "%qs cannot be used in a % function",
-   kw_name);
   cp_function_chain->invalid_constexpr = true;
-  return false;
+  if (!is_instantiation_of_constexpr (fndecl))
+   {
+ /* [dcl.constexpr] 3.3 it shall not be a coroutine.  */
+ error_at (kw_loc, "%qs cannot be used in a % function",
+   kw_name);
+ return false;
+   }
 }
 
   if (FNDECL_USED_AUTO (fndecl))
diff --git a/gcc/testsuite/g++.dg/coroutines/pr96251.C 
b/gcc/testsuite/g++.dg/coroutines/pr96251.C
new file mode 100644
index 000..3f435044e41
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr96251.C
@@ -0,0 +1,32 @@
+#include 
+
+struct coroutine {
+  struct promise_type {
+auto get_return_object() { return coroutine(); }
+auto initial_suspend() { return std::suspend_always(); }
+auto yield_value(int) { return std::suspend_always(); }
+void return_void() {}
+auto final_suspend() noexcept { return std::suspend_always(); }
+void unhandled_exception() {}
+  };
+};
+
+int main() {
+  auto f = [](auto max) -> coroutine {
+for (int i = 0; i < max; ++i) {
+   co_yield i;
+}
+  };
+
+  f(10);
+
+  // From PR98976
+  auto foo = [](auto&&) -> coroutine {
+switch (42) {
+  case 42:
+co_return;
+}
+  };
+  foo(1);
+
+}
-- 
2.24.1




[PATCH 1/4] libstdc++: More efficient date from days.

2021-02-23 Thread Cassio Neri via Gcc-patches
This patch reimplements std::chrono::year_month_day::_S_from_days() which
retrieves a date from the number of elapsed days since 1970/01/01.  The new
implementation is based on Proposition 6.3 of Neri and Schneider, "Euclidean
Affine Functions and Applications to Calendar Algorithms" available at
https://arxiv.org/abs/2102.06959.

The aforementioned paper benchmarks the implementation against several
counterparts, including libc++'s (which is identical to the current
implementation).  The results, shown in Figure 4, indicate the new algorithm is
2.2 times faster than the current one.

The patch adds a test which loops through all integers in [-12687428, 11248737],
and for each of them, gets the corresponding date and compares the result
against its expected value.  The latter is calculated using a much simpler and
easy to understand algorithm but which is also much slower.

The interval used in the test covers the full range of values for which a
roundtrip must work [time.cal.ymd.members].  Despite its completeness the test
runs in a matter of seconds.

libstdc++-v3/ChangeLog:

* include/std/chrono:
* testsuite/std/time/year_month_day/3.cc: New test.
---
 libstdc++-v3/include/std/chrono   | 46 
 .../testsuite/std/time/year_month_day/3.cc| 71 +++
 2 files changed, 104 insertions(+), 13 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/std/time/year_month_day/3.cc

diff --git a/libstdc++-v3/include/std/chrono b/libstdc++-v3/include/std/chrono
index 7840099d743..d224762fd3f 100644
--- a/libstdc++-v3/include/std/chrono
+++ b/libstdc++-v3/include/std/chrono
@@ -2429,22 +2429,42 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // TODO: Implement operator<<, from_stream.
 };

-// Construct from days since 1970/01/01. Magic.
+// Construct from days since 1970/01/01.
+// Proposition 6.3 of Neri and Schneider, "Euclidean Affine
Functions and Applications to
+// Calendar Algorithms". https://arxiv.org/abs/2102.06959
 constexpr year_month_day
 year_month_day::_S_from_days(const days& __dp) noexcept
 {
-  const auto __z = __dp.count() + 719468;
-  const auto __era = (__z >= 0 ? __z : __z - 146096) / 146097;
-  const auto __doe = static_cast(__z - __era * 146097);
-  const auto __yoe
-= (__doe - __doe / 1460 + __doe / 36524 - __doe / 146096) / 365;
-  const auto __y = static_cast(__yoe) + __era * 400;
-  const auto __doy = __doe - (365 * __yoe + __yoe / 4 - __yoe / 100);
-  const auto __mp = (5 * __doy + 2) / 153;
-  const auto __d = __doy - (153 * __mp + 2) / 5 + 1;
-  const auto __m = __mp < 10 ? __mp + 3 : __mp - 9;
-  return year_month_day{chrono::year(__y + (__m <= 2)),
-chrono::month(__m), chrono::day(__d)};
+  constexpr auto __z2= static_cast(-1468000);
+  constexpr auto __r2_e3 = static_cast(536895458);
+
+  const auto __r0 = __dp.count() + __r2_e3;
+
+  const auto __n1 = 4 * __r0 + 3;
+  const auto __q1 = __n1 / 146097;
+  const auto __r1 = __n1 % 146097 / 4;
+
+  constexpr auto __p32 = static_cast(1) << 32;
+  const auto __n2 = 4 * __r1 + 3;
+  const auto __u2 = static_cast(2939745) * __n2;
+  const auto __q2 = static_cast(__u2 / __p32);
+  const auto __r2 = static_cast(__u2 % __p32) / 2939745 / 4;
+
+  constexpr auto __p16 = static_cast(1) << 16;
+  const auto __n3 = 2141 * __r2 + 197913;
+  const auto __q3 = __n3 / __p16;
+  const auto __r3 = __n3 % __p16 / 2141;
+
+  const auto __y0 = 100 * __q1 + __q2;
+  const auto __m0 = __q3;
+  const auto __d0 = __r3;
+
+  const auto __j  = __r2 >= 306;
+  const auto __y1 = __y0 + __j;
+  const auto __m1 = __j ? __m0 - 12 : __m0;
+  const auto __d1 = __d0 + 1;
+
+  return year_month_day{chrono::year{__y1 + __z2},
chrono::month{__m1}, chrono::day{__d1}};
 }

 // Days since 1970/01/01. Magic.
diff --git a/libstdc++-v3/testsuite/std/time/year_month_day/3.cc
b/libstdc++-v3/testsuite/std/time/year_month_day/3.cc
new file mode 100644
index 000..eede649cd54
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/time/year_month_day/3.cc
@@ -0,0 +1,71 @@
+// { dg-options "-std=gnu++2a" }
+// { dg-do run { target c++2a } }
+
+// Copyright (C) 2020-2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COP

[PATCH 2/4] libstdc++: More efficient days from date.

2021-02-23 Thread Cassio Neri via Gcc-patches
This patch reimplements std::chrono::year_month_day::_M_days_since_epoch()
which calculates the number of elapsed days since 1970/01/01.  The new
implementation is based on Proposition 6.2 of Neri and Schneider, "Euclidean
Affine Functions and Applications to Calendar Algorithms" available at
https://arxiv.org/abs/2102.06959.

The aforementioned paper benchmarks the implementation against several
counterparts, including libc++'s (which is identical to the current
implementation).  The results, shown in Figure 3, indicate the new algorithm is
1.7 times faster than the current one.

The patch adds a test which loops through all dates in [-32767/01/01,
32767/12/31], and for each of them, gets the number of days and compares the
result against its expected value. The latter is calculated using a much
simpler and easy to understand algorithm but which is also much slower.

The dates used in the test covers the full range of possible values
[time.cal.year.members].  Despite its completeness the test runs in matter of
seconds.

libstdc++-v3/ChangeLog:

* include/std/chrono:
* testsuite/std/time/year_month_day/4.cc: New test.
---
 libstdc++-v3/include/std/chrono   | 34 +
 .../testsuite/std/time/year_month_day/4.cc| 71 +++
 2 files changed, 92 insertions(+), 13 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/std/time/year_month_day/4.cc

diff --git a/libstdc++-v3/include/std/chrono b/libstdc++-v3/include/std/chrono
index 7840099d743..30203540f36 100644
--- a/libstdc++-v3/include/std/chrono
+++ b/libstdc++-v3/include/std/chrono
@@ -2447,22 +2447,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 chrono::month(__m), chrono::day(__d)};
 }

-// Days since 1970/01/01. Magic.
+// Days since 1970/01/01.
+// Proposition 6.2 of Neri and Schneider, "Euclidean Affine
Functions and Applications to
+// Calendar Algorithms". https://arxiv.org/abs/2102.06959
 constexpr days
 year_month_day::_M_days_since_epoch() const noexcept
 {
-  const auto __y = static_cast(_M_y) - (_M_m <= February);
-  const auto __m = static_cast(_M_m);
-  const auto __d = static_cast(_M_d);
-  const auto __era = (__y >= 0 ? __y : __y - 399) / 400;
-  // Year of "era" [0, 399].
-  const auto __yoe = static_cast(__y - __era * 400);
-  // Day of year [0, 365].
-  const auto __doy = (153 * (__m > 2 ? __m - 3 : __m + 9) + 2) /
5 + __d - 1;
-  // Day of "era" [0, 146096].
-  const auto __doe = __yoe * 365 + __yoe / 4 - __yoe / 100 + __doy;
-  const auto __days = __era * 146097 + static_cast(__doe) - 719468;
-  return days{__days};
+  auto constexpr __z2= static_cast(-1468000);
+  auto constexpr __r2_e3 = static_cast(536895458);
+
+  const auto __y1 = static_cast(static_cast(_M_y)) - __z2;
+  const auto __m1 = static_cast(_M_m);
+  const auto __d1 = static_cast(_M_d);
+
+  const auto __j  = static_cast(__m1 < 3);
+  const auto __y0 = __y1 - __j;
+  const auto __m0 = __j ? __m1 + 12 : __m1;
+  const auto __d0 = __d1 - 1;
+
+  const auto __q1 = __y0 / 100;
+  const auto __yc = 1461 * __y0 / 4 - __q1 + __q1 / 4;
+  const auto __mc = (979 *__m0 - 2919) / 32;
+  const auto __dc = __d0;
+
+  return days{static_cast(__yc + __mc + __dc - __r2_e3)};
 }

 // YEAR_MONTH_DAY_LAST
diff --git a/libstdc++-v3/testsuite/std/time/year_month_day/4.cc
b/libstdc++-v3/testsuite/std/time/year_month_day/4.cc
new file mode 100644
index 000..2194af17775
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/time/year_month_day/4.cc
@@ -0,0 +1,71 @@
+// { dg-options "-std=gnu++2a" }
+// { dg-do run { target c++2a } }
+
+// Copyright (C) 2020-2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// Class year_month_day [time.cal.year_month_day]
+
+#include 
+#include 
+
+// Slow but very clear way of advancing one day.
+constexpr void
+advance(std::chrono::year_month_day& ymd) noexcept {
+
+  using namespace std::chrono;
+
+  auto y = ymd.year();
+  auto m = ymd.month();
+  auto d = ymd.day();
+
+  if (d != year_month_day_last{year{y}, month_day_last{m}}.day())
+++d;
+  else {
+d = day{1};
+if (m != December)
+  ++m;
+else {
+  m = January;
+  ++y;
+}
+  }
+  ym

[PATCH 3/4] libstdc++: More efficient is_leap.

2021-02-23 Thread Cassio Neri via Gcc-patches
This patch reimplements std::chrono::year::is_leap().  Leap year check is
ubiquitously implemented (including here) as:

y % 4 == 0 && (y % 100 != 0 || y % 400 == 0).

The rationale being that testing divisibility by 4 first implies an earlier
return for 75% of the cases, therefore, avoiding the needless calculations of
y % 100 and y % 400. Although this fact is true, it does not take into account
the cost of branching.  This patch, instead, tests divisibility by 100 first:

(y % 100 != 0 || y % 400 == 0) && y % 4 == 0.

It is certainly counterintuitive that this could be more efficient since among
the three divisibility tests (4, 100 and 400) the one by 100 is the only one
that can never provide a definitive answer and a second divisibility test (by 4
or 400) is always required. However, measurements [1] in x86_64 suggest this is
3x more efficient!  A possible explanation is that checking divisibility by 100
first implies a split in the execution path with probabilities of (1%, 99%)
rather than (25%, 75%) when divisibility by 4 is checked first.  This decreases
the entropy of the branching distribution which seems to help prediction.

Given that y belongs to [-32767, 32767] [time.cal.year.members], a more
efficient algorithm [2] to check divisibility by 100 is used (instead of
y % 100 != 0).  Measurements suggest that this optimization improves performance
by 20%.

The patch adds a test that exhaustively compares the result of this
implementation with the ubiquitous one for all y in [-32767, 32767]. Although
its completeness, the test completes in a matter of seconds.

References:
[1] https://stackoverflow.com/a/60646967/1137388
[2] https://accu.org/journals/overload/28/155/overload155.pdf#page=16

libstdc++-v3/ChangeLog:

* include/std/chrono:
* testsuite/std/time/year/2.cc: New test.
---
 libstdc++-v3/include/std/chrono   | 19 -
 libstdc++-v3/testsuite/std/time/year/2.cc | 52 +++
 2 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 libstdc++-v3/testsuite/std/time/year/2.cc

diff --git a/libstdc++-v3/include/std/chrono b/libstdc++-v3/include/std/chrono
index 7840099d743..b777acdd4a2 100644
--- a/libstdc++-v3/include/std/chrono
+++ b/libstdc++-v3/include/std/chrono
@@ -1597,7 +1597,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION

   constexpr bool
   is_leap() const noexcept
-  { return _M_y % 4 == 0 && (_M_y % 100 != 0 || _M_y % 400 == 0); }
+  {
+// Testing divisibility by 100 first gives better performance, that is,
+// return (_M_y % 100 != 0 || _M_y % 400 == 0) && _M_y % 4 == 0;
+
+// It gets even faster if _M_y is in [-536870800, 536870999]
(which is the case here) and
+// _M_y % 100 is replaced by __is_multiple_of_100 below.
+
+// References:
+// [1] https://github.com/cassioneri/calendar
+// [2]
https://accu.org/journals/overload/28/155/overload155.pdf#page=16
+
+constexpr uint32_t __multiplier   = 42949673;
+constexpr uint32_t __bound= 42949669;
+constexpr uint32_t __max_dividend = 1073741799;
+constexpr uint32_t __offset   = __max_dividend / 2 / 100 * 100;
+const bool __is_multiple_of_100 = __multiplier * (_M_y +
__offset) < __bound;
+return (!__is_multiple_of_100 || _M_y % 400 == 0) && _M_y % 4 == 0;
+  }

   explicit constexpr
   operator int() const noexcept
diff --git a/libstdc++-v3/testsuite/std/time/year/2.cc
b/libstdc++-v3/testsuite/std/time/year/2.cc
new file mode 100644
index 000..e22101e305a
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/time/year/2.cc
@@ -0,0 +1,52 @@
+// { dg-options "-std=gnu++2a" }
+// { dg-do run { target c++2a } }
+
+// Copyright (C) 2020-2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// Class year [time.cal.year_month_day]
+
+#include 
+#include 
+
+// Slow but clear test for leap year.
+constexpr bool
+is_leap_year(const std::chrono::year& y) noexcept
+{
+  const int n = static_cast(y);
+  return n % 4 == 0 && (n % 100 != 0 || n % 400 == 0);
+}
+
+void test01()
+{
+  using namespace std::chrono;
+
+  year y{-32767};
+  while (y < year{32767}) {
+VERIFY( y.is_leap() ==  is_leap_year(y) );
+++y;
+  }
+
+  // One more for y = 32767.
+  VERIFY( year{3

[PATCH 4/4] libstdc++: More efficient last day of month.

2021-02-23 Thread Cassio Neri via Gcc-patches
This patch reimplements std::chrono::year_month_day_last:day() which yields the
last day of a particular month.  The current implementation uses a look-up table
implemented as an unsigned[12] array.  The new implementation instead
is based on
the fact that a month m in [1, 12], except for m == 2 (February), is
either 31 or
30 days long and m's length depends on two things: m's parity and whether m >= 8
or not. These two conditions are determined by the 0th and 3th bit of m and,
therefore, cheap and straightforward bit-twiddling can provide the right result.

Measurements in x86_64 [1] suggest a 10% performance boost.  Although this does
not seem to be huge, notice that measurements are done in hot L1 cache
conditions which might not be very representative of production runs. Also
freeing L1 cache from holding the look-up table might allow performance
improvements elsewhere.

References:
[1] https://github.com/cassioneri/calendar

libstdc++-v3/ChangeLog:

* include/std/chrono:
---
 libstdc++-v3/include/std/chrono | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/include/std/chrono b/libstdc++-v3/include/std/chrono
index 7840099d743..35a7a5e4382 100644
--- a/libstdc++-v3/include/std/chrono
+++ b/libstdc++-v3/include/std/chrono
@@ -1269,9 +1269,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION

   inline constexpr unsigned __days_per_month[12]
 = { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
-
-  inline constexpr unsigned __last_day[12]
-= { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
 }

 // DAY
@@ -2526,9 +2523,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   constexpr chrono::day
   day() const noexcept
   {
-if (!_M_mdl.ok() || (month() == February && _M_y.is_leap()))
-  return chrono::day{29};
-return chrono::day{__detail::__last_day[unsigned(month()) - 1]};
+const auto __m = static_cast(month());
+
+// Excluding February, the last day of month __m is either 30 or 31 or,
+// in another words, it is 30 + b = 30 | b, where b is in {0, 1}.
+
+// If __m in {1, 3, 4, 5, 6, 7}, then b is 1 if, and only if
__m is odd.
+// Hence, b = __m & 1 = (__m ^ 0) & 1.
+
+// If __m in {8, 9, 10, 11, 12}, then b is 1 if, and only if
__m is even.
+// Hence, b = (__m ^ 1) & 1.
+
+// Therefore, b = (__m ^ c) & 1, where c = 0, if __m < 8, or c = 1 if
+// __m >= 8, that is, c = __m >> 3.
+
+// The above mathematically justifies this implementation whose
+// performance does not depend on look-up tables being on the L1 cache.
+return chrono::day{__m != 2 ? ((__m ^ (__m >> 3)) & 1) | 30 :
_M_y.is_leap() ? 29 : 28};
   }

   constexpr
-- 
2.29.2


Re: [PATCH] IPA ICF + ASAN: do not merge vars with different alignment

2021-02-23 Thread Martin Liška

On 2/23/21 12:56 PM, Richard Biener wrote:

Can't we fix the asan runtime?  Does the same issue happen when merging
two comdat with different alignment and LTO?


All right, there's a detail explanation what happens.
Let's consider the following example:

struct my_struct
{
  unsigned long volatile x;
} __attribute__((aligned(128)));

static int array[5][6] = {};
static struct my_struct variable128 = {1UL};
static struct my_struct variable32 __attribute__((aligned(64))) = {1UL};

Here we have 2 variables (variable128 and variable32) that are merged. Later on,
we decide not to protect the global variable variable128 due to:
  || DECL_ALIGN_UNIT (decl) > 2 * ASAN_RED_ZONE_SIZE

Without ICF we end up with:

.align 64
.type   variable32, @object
.size   variable32, 128
variable32:
.zero   128
.zero   32
.align 128
.type   variable128, @object
.size   variable128, 128
variable128:
.zero   128

As seen, variable32 has .zero 128+32, where 32 is the red-zone (and alignment 
is increased to 64).

With ICF we end up with:

.align 128
.type   variable128, @object
.size   variable128, 128
variable128:
.zero   128
.setvariable32,variable128

So variable32 points to variable128 (which has no prepared red zone + alignment 
is the same).
$ nm -n a.out
...
00400b80 r variable128
00400b80 r variable32
00400c00 r array

00400c00 - 00400b80 == sizeof(variable32).

Then we tell libasan what is the variable size and size of the corresponding 
red zone:
$ ASAN_OPTIONS=report_globals=3 ./a.out
...
==20602==Added Global[0x00403080]: beg=0x00400b80 size=128/160 
name=variable32 module=asan.c dyn_init=0 odr_indicator=0x

And bad thinks happen. So I really think ICF should not merge the variables.
Please provide a comdat test-case :)

Thanks,
Martin




Re: [PATCH] IPA ICF + ASAN: do not merge vars with different alignment

2021-02-23 Thread Richard Biener via Gcc-patches
On Tue, Feb 23, 2021 at 3:22 PM Martin Liška  wrote:
>
> On 2/23/21 12:56 PM, Richard Biener wrote:
> > Can't we fix the asan runtime?  Does the same issue happen when merging
> > two comdat with different alignment and LTO?
>
> All right, there's a detail explanation what happens.
> Let's consider the following example:
>
> struct my_struct
> {
>unsigned long volatile x;
> } __attribute__((aligned(128)));
>
> static int array[5][6] = {};
> static struct my_struct variable128 = {1UL};
> static struct my_struct variable32 __attribute__((aligned(64))) = {1UL};
>
> Here we have 2 variables (variable128 and variable32) that are merged. Later 
> on,
> we decide not to protect the global variable variable128 due to:
>|| DECL_ALIGN_UNIT (decl) > 2 * ASAN_RED_ZONE_SIZE
>
> Without ICF we end up with:
>
> .align 64
> .type   variable32, @object
> .size   variable32, 128
> variable32:
> .zero   128
> .zero   32
> .align 128
> .type   variable128, @object
> .size   variable128, 128
> variable128:
> .zero   128
>
> As seen, variable32 has .zero 128+32, where 32 is the red-zone (and alignment 
> is increased to 64).
>
> With ICF we end up with:
>
> .align 128
> .type   variable128, @object
> .size   variable128, 128
> variable128:
> .zero   128
> .setvariable32,variable128
>
> So variable32 points to variable128 (which has no prepared red zone + 
> alignment is the same).
> $ nm -n a.out
> ...
> 00400b80 r variable128
> 00400b80 r variable32
> 00400c00 r array
>
> 00400c00 - 00400b80 == sizeof(variable32).
>
> Then we tell libasan what is the variable size and size of the corresponding 
> red zone:
> $ ASAN_OPTIONS=report_globals=3 ./a.out
> ...
> ==20602==Added Global[0x00403080]: beg=0x00400b80 size=128/160 
> name=variable32 module=asan.c dyn_init=0 odr_indicator=0x

Ah, so the issue is that ASAN still sees both variables (and isn't
properly cgraph/varpool aware)?  So instead of just
keying on different alignment you'd have to verify in ICF whether the
decls are "registered the same" by ASAN, no?
Or simply not perform any variable ICF when ASAN is enabled?

> And bad thinks happen. So I really think ICF should not merge the variables.
> Please provide a comdat test-case :)
>
> Thanks,
> Martin
>
>


Re: [PATCH] IPA ICF + ASAN: do not merge vars with different alignment

2021-02-23 Thread Martin Liška

On 2/23/21 3:32 PM, Richard Biener wrote:

On Tue, Feb 23, 2021 at 3:22 PM Martin Liška  wrote:


On 2/23/21 12:56 PM, Richard Biener wrote:

Can't we fix the asan runtime?  Does the same issue happen when merging
two comdat with different alignment and LTO?


All right, there's a detail explanation what happens.
Let's consider the following example:

struct my_struct
{
unsigned long volatile x;
} __attribute__((aligned(128)));

static int array[5][6] = {};
static struct my_struct variable128 = {1UL};
static struct my_struct variable32 __attribute__((aligned(64))) = {1UL};

Here we have 2 variables (variable128 and variable32) that are merged. Later on,
we decide not to protect the global variable variable128 due to:
|| DECL_ALIGN_UNIT (decl) > 2 * ASAN_RED_ZONE_SIZE

Without ICF we end up with:

 .align 64
 .type   variable32, @object
 .size   variable32, 128
variable32:
 .zero   128
 .zero   32
 .align 128
 .type   variable128, @object
 .size   variable128, 128
variable128:
 .zero   128

As seen, variable32 has .zero 128+32, where 32 is the red-zone (and alignment 
is increased to 64).

With ICF we end up with:

 .align 128
 .type   variable128, @object
 .size   variable128, 128
variable128:
 .zero   128
 .setvariable32,variable128

So variable32 points to variable128 (which has no prepared red zone + alignment 
is the same).
$ nm -n a.out
...
00400b80 r variable128
00400b80 r variable32
00400c00 r array

00400c00 - 00400b80 == sizeof(variable32).

Then we tell libasan what is the variable size and size of the corresponding 
red zone:
$ ASAN_OPTIONS=report_globals=3 ./a.out
...
==20602==Added Global[0x00403080]: beg=0x00400b80 size=128/160 
name=variable32 module=asan.c dyn_init=0 odr_indicator=0x


Ah, so the issue is that ASAN still sees both variables (and isn't
properly cgraph/varpool aware)?


No, in both cases the variable128 is not handled by ASAN (it has too big 
alignment).


So instead of just
keying on different alignment you'd have to verify in ICF whether the
decls are "registered the same" by ASAN, no?


Yes, ICF is too optimistic about alignment of global variables. I'm not sure
I want to call asan_protect_global from ICF.


Or simply not perform any variable ICF when ASAN is enabled?


I think the suggested patch should tell ICF to be strict about alignment
when ASAN is enabled.

Note the issue is quite hairy :)

Martin




And bad thinks happen. So I really think ICF should not merge the variables.
Please provide a comdat test-case :)

Thanks,
Martin






Re: [PATCH] IPA ICF + ASAN: do not merge vars with different alignment

2021-02-23 Thread Richard Biener via Gcc-patches
On Tue, Feb 23, 2021 at 3:41 PM Martin Liška  wrote:
>
> On 2/23/21 3:32 PM, Richard Biener wrote:
> > On Tue, Feb 23, 2021 at 3:22 PM Martin Liška  wrote:
> >>
> >> On 2/23/21 12:56 PM, Richard Biener wrote:
> >>> Can't we fix the asan runtime?  Does the same issue happen when merging
> >>> two comdat with different alignment and LTO?
> >>
> >> All right, there's a detail explanation what happens.
> >> Let's consider the following example:
> >>
> >> struct my_struct
> >> {
> >> unsigned long volatile x;
> >> } __attribute__((aligned(128)));
> >>
> >> static int array[5][6] = {};
> >> static struct my_struct variable128 = {1UL};
> >> static struct my_struct variable32 __attribute__((aligned(64))) = {1UL};
> >>
> >> Here we have 2 variables (variable128 and variable32) that are merged. 
> >> Later on,
> >> we decide not to protect the global variable variable128 due to:
> >> || DECL_ALIGN_UNIT (decl) > 2 * ASAN_RED_ZONE_SIZE
> >>
> >> Without ICF we end up with:
> >>
> >>  .align 64
> >>  .type   variable32, @object
> >>  .size   variable32, 128
> >> variable32:
> >>  .zero   128
> >>  .zero   32
> >>  .align 128
> >>  .type   variable128, @object
> >>  .size   variable128, 128
> >> variable128:
> >>  .zero   128
> >>
> >> As seen, variable32 has .zero 128+32, where 32 is the red-zone (and 
> >> alignment is increased to 64).
> >>
> >> With ICF we end up with:
> >>
> >>  .align 128
> >>  .type   variable128, @object
> >>  .size   variable128, 128
> >> variable128:
> >>  .zero   128
> >>  .setvariable32,variable128
> >>
> >> So variable32 points to variable128 (which has no prepared red zone + 
> >> alignment is the same).
> >> $ nm -n a.out
> >> ...
> >> 00400b80 r variable128
> >> 00400b80 r variable32
> >> 00400c00 r array
> >>
> >> 00400c00 - 00400b80 == sizeof(variable32).
> >>
> >> Then we tell libasan what is the variable size and size of the 
> >> corresponding red zone:
> >> $ ASAN_OPTIONS=report_globals=3 ./a.out
> >> ...
> >> ==20602==Added Global[0x00403080]: beg=0x00400b80 size=128/160 
> >> name=variable32 module=asan.c dyn_init=0 odr_indicator=0x
> >
> > Ah, so the issue is that ASAN still sees both variables (and isn't
> > properly cgraph/varpool aware)?
>
> No, in both cases the variable128 is not handled by ASAN (it has too big 
> alignment).
>
> > So instead of just
> > keying on different alignment you'd have to verify in ICF whether the
> > decls are "registered the same" by ASAN, no?
>
> Yes, ICF is too optimistic about alignment of global variables. I'm not sure
> I want to call asan_protect_global from ICF.
>
> > Or simply not perform any variable ICF when ASAN is enabled?
>
> I think the suggested patch should tell ICF to be strict about alignment
> when ASAN is enabled.

Sure.  The question is whether there's more issues with ASAN on-the-side
tracking of stuff.

> Note the issue is quite hairy :)

Understood, I guess the patch is OK but it doesn't look very nice to sprinkle
such checks around the code that might "confuse" ASAN.  For example
there's the vectorizer "IPA" pass that increases alignment of globals.
I know nothing of ASAN but it sounds like it produces its tables too early.

Richard.

> Martin
>
> >
> >> And bad thinks happen. So I really think ICF should not merge the 
> >> variables.
> >> Please provide a comdat test-case :)
> >>
> >> Thanks,
> >> Martin
> >>
> >>
>


Re: [PATCH] IPA ICF + ASAN: do not merge vars with different alignment

2021-02-23 Thread Martin Liška

On 2/23/21 3:55 PM, Richard Biener wrote:

On Tue, Feb 23, 2021 at 3:41 PM Martin Liška  wrote:


On 2/23/21 3:32 PM, Richard Biener wrote:

On Tue, Feb 23, 2021 at 3:22 PM Martin Liška  wrote:


On 2/23/21 12:56 PM, Richard Biener wrote:

Can't we fix the asan runtime?  Does the same issue happen when merging
two comdat with different alignment and LTO?


All right, there's a detail explanation what happens.
Let's consider the following example:

struct my_struct
{
 unsigned long volatile x;
} __attribute__((aligned(128)));

static int array[5][6] = {};
static struct my_struct variable128 = {1UL};
static struct my_struct variable32 __attribute__((aligned(64))) = {1UL};

Here we have 2 variables (variable128 and variable32) that are merged. Later on,
we decide not to protect the global variable variable128 due to:
 || DECL_ALIGN_UNIT (decl) > 2 * ASAN_RED_ZONE_SIZE

Without ICF we end up with:

  .align 64
  .type   variable32, @object
  .size   variable32, 128
variable32:
  .zero   128
  .zero   32
  .align 128
  .type   variable128, @object
  .size   variable128, 128
variable128:
  .zero   128

As seen, variable32 has .zero 128+32, where 32 is the red-zone (and alignment 
is increased to 64).

With ICF we end up with:

  .align 128
  .type   variable128, @object
  .size   variable128, 128
variable128:
  .zero   128
  .setvariable32,variable128

So variable32 points to variable128 (which has no prepared red zone + alignment 
is the same).
$ nm -n a.out
...
00400b80 r variable128
00400b80 r variable32
00400c00 r array

00400c00 - 00400b80 == sizeof(variable32).

Then we tell libasan what is the variable size and size of the corresponding 
red zone:
$ ASAN_OPTIONS=report_globals=3 ./a.out
...
==20602==Added Global[0x00403080]: beg=0x00400b80 size=128/160 
name=variable32 module=asan.c dyn_init=0 odr_indicator=0x


Ah, so the issue is that ASAN still sees both variables (and isn't
properly cgraph/varpool aware)?


No, in both cases the variable128 is not handled by ASAN (it has too big 
alignment).


So instead of just
keying on different alignment you'd have to verify in ICF whether the
decls are "registered the same" by ASAN, no?


Yes, ICF is too optimistic about alignment of global variables. I'm not sure
I want to call asan_protect_global from ICF.


Or simply not perform any variable ICF when ASAN is enabled?


I think the suggested patch should tell ICF to be strict about alignment
when ASAN is enabled.


Sure.  The question is whether there's more issues with ASAN on-the-side
tracking of stuff.


I hope not, ASAN is quite heavily tested.




Note the issue is quite hairy :)


Understood, I guess the patch is OK but it doesn't look very nice to sprinkle
such checks around the code that might "confuse" ASAN.  For example
there's the vectorizer "IPA" pass that increases alignment of globals.
I know nothing of ASAN but it sounds like it produces its tables too early.


Well ASAN runs at the very end of TREE passes, so it should see all globals
that are modified in the described way.

Anyway, I've just installed the patch.
Thanks,
Martin



Richard.


Martin




And bad thinks happen. So I really think ICF should not merge the variables.
Please provide a comdat test-case :)

Thanks,
Martin








Re: PR 96391? Can we fix it for gcc11?

2021-02-23 Thread Qing Zhao via Gcc-patches
Hi, Richard,

> On Feb 9, 2021, at 11:36 AM, Richard Biener  wrote:
> 
> On Tue, 9 Feb 2021, Qing Zhao wrote:
>> 
>> Yes, I understand that without a working testing case to repeat the error, 
>> it’s very hard to debug and fix the issue. 
>> 
>> However, providing a testing case for this bug is really challenging from 
>> our side due to multiple reasons…
>> 
>> 
> 
> Note you can try reducing a proprietary testcase with tools like
> cvise or creduce.  Does your case also happen in a mingw/windows
> environment?

We are trying to install a creduce on our system, and noticed that it depend on 
LLVM, I am wondering whether there is
a similar tool that depends on GCC? 

Qing
> 
> Richard.
> 



c++: typedef for linkage [PR 99208]

2021-02-23 Thread Nathan Sidwell


Unnamed	types with a typedef name for linkage were always troublesome in 
modules.  This is the underlying cause of that trouble -- we	were 
creating incorrect type structures.  Classes have an implicit 
self-reference, and we created that for unnamed classes too.  It turns 
out we make use of this member, so just not generating it turned into a 
rathole.  This member is created using the anonymous name -- because 
we've not yet met the typedef name.  When we retrofit the typedef name 
we were checking identifier matching and changing all type variants with 
that identifier.  Which meant we ended up with a strange typedef for the 
self reference.  This fixes things to check for DECL identity of the 
variants, so we don't smash the self-reference -- that continues to have 
the anonymous name.


PR c++/99208
gcc/cp/
* decl.c (name_unnamed_type): Check DECL identity, not IDENTIFIER
identity.
gcc/testsuite/
* g++.dg/modules/pr99208_a.C: New.
* g++.dg/modules/pr99208_b.C: New.

--
Nathan Sidwell
diff --git c/gcc/cp/decl.c w/gcc/cp/decl.c
index 7fa8f52d667..1742e286d9f 100644
--- c/gcc/cp/decl.c
+++ w/gcc/cp/decl.c
@@ -11081,21 +11081,18 @@ name_unnamed_type (tree type, tree decl)
 {
   gcc_assert (TYPE_UNNAMED_P (type));
 
-  /* Replace the anonymous name with the real name everywhere.  */
+  /* Replace the anonymous decl with the real decl.  Be careful not to
+ rename other typedefs (such as the self-reference) of type.  */
+  tree orig = TYPE_NAME (type);
   for (tree t = TYPE_MAIN_VARIANT (type); t; t = TYPE_NEXT_VARIANT (t))
-if (IDENTIFIER_ANON_P (TYPE_IDENTIFIER (t)))
-  /* We do not rename the debug info representing the unnamed
-	 tagged type because the standard says in [dcl.typedef] that
-	 the naming applies only for linkage purposes.  */
-  /*debug_hooks->set_name (t, decl);*/
+if (TYPE_NAME (t) == orig)
   TYPE_NAME (t) = decl;
 
   /* If this is a typedef within a template class, the nested
  type is a (non-primary) template.  The name for the
  template needs updating as well.  */
   if (TYPE_LANG_SPECIFIC (type) && CLASSTYPE_TEMPLATE_INFO (type))
-DECL_NAME (CLASSTYPE_TI_TEMPLATE (type))
-  = TYPE_IDENTIFIER (type);
+DECL_NAME (CLASSTYPE_TI_TEMPLATE (type)) = DECL_NAME (decl);
 
   /* Adjust linkage now that we aren't unnamed anymore.  */
   reset_type_linkage (type);
diff --git c/gcc/testsuite/g++.dg/modules/pr99208_a.C w/gcc/testsuite/g++.dg/modules/pr99208_a.C
new file mode 100644
index 000..427c7f1b04c
--- /dev/null
+++ w/gcc/testsuite/g++.dg/modules/pr99208_a.C
@@ -0,0 +1,9 @@
+// PR 99208 typedef anonymous class
+// { dg-additional-options {-Wno-pedantic -fmodules-ts} }
+module;
+# 5 "pr99208_a.C" 1
+typedef struct {} __mbstate_t;
+# 7 "" 2
+export module hello:format;
+// { dg-module-cmi {hello:format} }
+export __mbstate_t v;
diff --git c/gcc/testsuite/g++.dg/modules/pr99208_b.C w/gcc/testsuite/g++.dg/modules/pr99208_b.C
new file mode 100644
index 000..0ed68d8069a
--- /dev/null
+++ w/gcc/testsuite/g++.dg/modules/pr99208_b.C
@@ -0,0 +1,4 @@
+// { dg-additional-options {-fmodules-ts} }
+export module hello;
+// { dg-module-cmi hello }
+export import :format;


[PATCH] libstdc++: Update baseline symbols for {aarch64,ia64,m68k,riscv64}-linux

2021-02-23 Thread Andreas Schwab
libstdc++-v3/
* config/abi/post/aarch64-linux-gnu/baseline_symbols.txt: Update.
* config/abi/post/ia64-linux-gnu/baseline_symbols.txt: Update.
* config/abi/post/m68k-linux-gnu/baseline_symbols.txt: Update.
* config/abi/post/riscv64-linux-gnu/baseline_symbols.txt: Update.
---
 .../aarch64-linux-gnu/baseline_symbols.txt| 104 +++
 .../post/ia64-linux-gnu/baseline_symbols.txt  | 121 ++
 .../post/m68k-linux-gnu/baseline_symbols.txt  | 121 ++
 .../riscv64-linux-gnu/baseline_symbols.txt| 116 +
 4 files changed, 462 insertions(+)

diff --git 
a/libstdc++-v3/config/abi/post/aarch64-linux-gnu/baseline_symbols.txt 
b/libstdc++-v3/config/abi/post/aarch64-linux-gnu/baseline_symbols.txt
index 089537d5ddf..45f1540ca11 100644
--- a/libstdc++-v3/config/abi/post/aarch64-linux-gnu/baseline_symbols.txt
+++ b/libstdc++-v3/config/abi/post/aarch64-linux-gnu/baseline_symbols.txt
@@ -199,6 +199,14 @@ 
FUNC:_ZNK11__gnu_debug16_Error_formatter17_M_get_max_lengthEv@@GLIBCXX_3.4.10
 FUNC:_ZNK11__gnu_debug16_Error_formatter8_M_errorEv@@GLIBCXX_3.4
 FUNC:_ZNK11__gnu_debug19_Safe_iterator_base11_M_singularEv@@GLIBCXX_3.4
 FUNC:_ZNK11__gnu_debug19_Safe_iterator_base14_M_can_compareERKS0_@@GLIBCXX_3.4
+FUNC:_ZNKRSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE3strEv@@GLIBCXX_3.4.29
+FUNC:_ZNKRSt7__cxx1115basic_stringbufIwSt11char_traitsIwESaIwEE3strEv@@GLIBCXX_3.4.29
+FUNC:_ZNKRSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEE3strEv@@GLIBCXX_3.4.29
+FUNC:_ZNKRSt7__cxx1118basic_stringstreamIwSt11char_traitsIwESaIwEE3strEv@@GLIBCXX_3.4.29
+FUNC:_ZNKRSt7__cxx1119basic_istringstreamIcSt11char_traitsIcESaIcEE3strEv@@GLIBCXX_3.4.29
+FUNC:_ZNKRSt7__cxx1119basic_istringstreamIwSt11char_traitsIwESaIwEE3strEv@@GLIBCXX_3.4.29
+FUNC:_ZNKRSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEE3strEv@@GLIBCXX_3.4.29
+FUNC:_ZNKRSt7__cxx1119basic_ostringstreamIwSt11char_traitsIwESaIwEE3strEv@@GLIBCXX_3.4.29
 FUNC:_ZNKSbIwSt11char_traitsIwESaIwEE11_M_disjunctEPKw@@GLIBCXX_3.4.5
 FUNC:_ZNKSbIwSt11char_traitsIwESaIwEE11_M_disjunctEPKw@GLIBCXX_3.4
 FUNC:_ZNKSbIwSt11char_traitsIwESaIwEE12find_last_ofEPKwm@@GLIBCXX_3.4
@@ -856,19 +864,29 @@ 
FUNC:_ZNKSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE8capacityEv@@GLIBCXX_
 
FUNC:_ZNKSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE8max_sizeEv@@GLIBCXX_3.4.21
 
FUNC:_ZNKSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEcvSt17basic_string_viewIwS2_EEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEixEm@@GLIBCXX_3.4.21
+FUNC:_ZNKSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE13get_allocatorEv@@GLIBCXX_3.4.29
 
FUNC:_ZNKSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE3strEv@@GLIBCXX_3.4.21
+FUNC:_ZNKSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE4viewEv@@GLIBCXX_3.4.29
+FUNC:_ZNKSt7__cxx1115basic_stringbufIwSt11char_traitsIwESaIwEE13get_allocatorEv@@GLIBCXX_3.4.29
 
FUNC:_ZNKSt7__cxx1115basic_stringbufIwSt11char_traitsIwESaIwEE3strEv@@GLIBCXX_3.4.21
+FUNC:_ZNKSt7__cxx1115basic_stringbufIwSt11char_traitsIwESaIwEE4viewEv@@GLIBCXX_3.4.29
 
FUNC:_ZNKSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEE3strEv@@GLIBCXX_3.4.21
+FUNC:_ZNKSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEE4viewEv@@GLIBCXX_3.4.29
 
FUNC:_ZNKSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEE5rdbufEv@@GLIBCXX_3.4.21
 
FUNC:_ZNKSt7__cxx1118basic_stringstreamIwSt11char_traitsIwESaIwEE3strEv@@GLIBCXX_3.4.21
+FUNC:_ZNKSt7__cxx1118basic_stringstreamIwSt11char_traitsIwESaIwEE4viewEv@@GLIBCXX_3.4.29
 
FUNC:_ZNKSt7__cxx1118basic_stringstreamIwSt11char_traitsIwESaIwEE5rdbufEv@@GLIBCXX_3.4.21
 
FUNC:_ZNKSt7__cxx1119basic_istringstreamIcSt11char_traitsIcESaIcEE3strEv@@GLIBCXX_3.4.21
+FUNC:_ZNKSt7__cxx1119basic_istringstreamIcSt11char_traitsIcESaIcEE4viewEv@@GLIBCXX_3.4.29
 
FUNC:_ZNKSt7__cxx1119basic_istringstreamIcSt11char_traitsIcESaIcEE5rdbufEv@@GLIBCXX_3.4.21
 
FUNC:_ZNKSt7__cxx1119basic_istringstreamIwSt11char_traitsIwESaIwEE3strEv@@GLIBCXX_3.4.21
+FUNC:_ZNKSt7__cxx1119basic_istringstreamIwSt11char_traitsIwESaIwEE4viewEv@@GLIBCXX_3.4.29
 
FUNC:_ZNKSt7__cxx1119basic_istringstreamIwSt11char_traitsIwESaIwEE5rdbufEv@@GLIBCXX_3.4.21
 
FUNC:_ZNKSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEE3strEv@@GLIBCXX_3.4.21
+FUNC:_ZNKSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEE4viewEv@@GLIBCXX_3.4.29
 
FUNC:_ZNKSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEE5rdbufEv@@GLIBCXX_3.4.21
 
FUNC:_ZNKSt7__cxx1119basic_ostringstreamIwSt11char_traitsIwESaIwEE3strEv@@GLIBCXX_3.4.21
+FUNC:_ZNKSt7__cxx1119basic_ostringstreamIwSt11char_traitsIwESaIwEE4viewEv@@GLIBCXX_3.4.29
 
FUNC:_ZNKSt7__cxx1119basic_ostringstreamIwSt11char_traitsIwESaIwEE5rdbufEv@@GLIBCXX_3.4.21
 FUNC:_ZNKSt7__cxx117collateIcE10_M_compareEPKcS3_@@GLIBCXX_3.4.21
 FUNC:_ZNKSt7__cxx117collateIcE10do_compareEPKcS3_S3_S3_@@GLIBCXX_3.4.21
@@ -1300,6 +1318,14 @@ FUNC:_ZNKSt9type_info10__do_catchEPKS_PPvj@@GLIBCXX_

PING [PATCH] avoid -Warray-bounds checks for vtable assignments (PR 98266)

2021-02-23 Thread Martin Sebor via Gcc-patches

[CC Jason for any further comments/clarification]

On 2/9/21 10:49 AM, Martin Sebor wrote:

On 2/8/21 4:11 PM, Jeff Law wrote:



On 2/8/21 3:44 PM, Martin Sebor wrote:

On 2/8/21 3:26 PM, Jeff Law wrote:



On 2/8/21 2:56 PM, Martin Sebor wrote:

On 2/8/21 12:59 PM, Jeff Law wrote:



On 1/19/21 5:56 PM, Martin Sebor via Gcc-patches wrote:

Similar to the problem reported for -Wstringop-overflow in pr98266
and already fixed, -Warray-bounds is also susceptible to false
positives in assignments and copies involving virtual inheritance.
Because the two warnings don't share code yet (hopefully in GCC 12)
the attached patch adds its own workaround for this problem to
gimple-array-bounds.cc, this one slightly more crude because of
the limited insight the array bounds checking has into the checked
expressions.

Tested on x86_64-linux.

Martin

gcc-98266.diff

PR middle-end/98266 - bogus array subscript is partly outside array
bounds on virtual inheritance

gcc/ChangeLog:

  PR middle-end/98266
  * gimple-array-bounds.cc
(array_bounds_checker::check_array_bounds):
  Avoid checking references involving artificial members.

gcc/testsuite/ChangeLog:

  PR middle-end/98266
  * g++.dg/warn/Warray-bounds-15.C: New test.

It seems to me that we've got the full statement at some point  and
thus
the full expression so at some point couldn't we detect when
TYPE_SIZE_UNIT!= DECL_SIZE_UNIT?  Or should we be using 
TYPE_SIZE_UNIT

rather than DECL_SIZE_UNIT in gimple-array-bounds.cc

Am I missing something?


The expression we're looking at when the false positive is issued
is the MEM_REF in the LHS of:

MEM[(struct D *)&D.2652 + 24B]._vptr.D = &MEM  [(void
*)&_ZTC1E24_1D + 24B];

TREE_TYPE(LHS) is D, DECL_SIZE_UNIT (D.2652) is 24, and
TYPE_SIZE_UNIT(D) is also 24, so there's no discrepancy between
DECL_SIZE and TYPE_SIZE.

So that seems like it's a different issue then, unrelated to 97595.
Right?


I think the underlying problem is the same.  We're getting a size
that doesn't correspond to what's actually being accessed, and it
happens because of the virtual inheritance.  In pr97595 Jason
suggested to use the decl/type size inequality to identify this
case but I think we could have just as well used DECL_ARTIFICIAL
instead.  At least the test cases from pr97595 both pass with
this change.

But in the 98266 case the type and decl sizes are the same.  So to be
true that would mean that the underlying type we're using to access
memory differs from its actual type.  Is that the case in the IL?  And
does this have wider implications for diagnostics or optimizations that
rely on accurate type sizing?

I'm just trying to make sure I understand, not accepting or rejecting
the patch yet.


The part of the IL with the MEM_REF is this:

void g ()
{
   void * D.2789;
   struct E D.2652;

    [local count: 1073741824]:
   E::E (&D.2652, "");
   f (&D.2652);

    [local count: 1073741824]:
   MEM[(struct D *)&D.2652 + 24B]._vptr.D = &MEM  [(void 
*)&_ZTC1E24_1D + 24B];

   ...

The access here is to the _vptr.D pointer member of D.2652 which is
just past the end of the parent object (as reflected by its SIZE):
it sets sets up the virtual table pointer.

The access in pr97595 is to the member subobject, which, as Jason
explained (and I accordingly documented under DECL_SIZE in tree.h),
is also laid out separately from the parent object.

These cases aren't exactly the same (which is also why the test
I added for -Warray-bounds in pr97595 didn't expose this bug) but
they are closely related.  The one here can be distinguished by
DECL_ARTIFICAL.  The other by the DECL_SIZE != TYPE_SIZE member
inequality.

Might this impact other warnings?  I'd say so if they don't take
these things into account.  I just learned about this in pr97595
which was a -Wstringop-overflow false positive but I also saw
a similar instance of -Warray-bounds with my patch to improve
caching and enhance array bounds checking.  I dealt with that
instance of the warning in that patch but proactively added
a test case to the fix for pr97595.  But the test case is focused
on the subobject access and not on one to the virtual table so
(as I said above) it didn't expose this bug.

Might this also impact optimizations?  I can imagine someone
unaware of this "gotcha" making the same "naive" assumption
I did, but I'd also expect such an invalid assumption to be
found either in code review or quickly cause problems in
testing.


Jeff, does this answer your question?



Martin




Re: [PATCH 1/2] libstdc++: Robustify long double std::to_chars testcase [PR98384]

2021-02-23 Thread Patrick Palka via Gcc-patches
On Mon, 22 Feb 2021, Patrick Palka wrote:

> This makes the hexadecimal section of the long double std::to_chars
> testcase more robust by avoiding false-negative FAILs due to printf
> using a different leading hex digit than us, and by additionally
> verifying the correctness of the hexadecimal form via round-tripping
> through std::from_chars.
> 
> Tested on x86, x86_64, powerpc64be, powerpc64le and aarch64.  Does this
> look OK for trunk?

The commit message could explain the issue better, so here's v2 with a
more detailed commit message.

-- >8 --

Subject: [PATCH] libstdc++: Robustify long double std::to_chars testcase
 [PR98384]

The long double std::to_chars testcase currently verifies the
correctness of its output by comparing it to that of printf, so if
there's a mismatch between to_chars and printf, the test FAILs.  This
works well for the scientific, fixed and general formatting modes,
because the corresponding printf conversion specifiers (%e, %f and %g)
are rigidly specified.

But this doesn't work so well for the hex formatting mode because the
corresponding printf conversion specifier %a is more flexibly specified.
For instance, the hexadecimal forms 0x1p+0, 0x2p-1, 0x4p-2 and 0x8p-3
are all equivalent and valid outputs of the %a specifier for the number
1.  The apparent freedom here is the choice of leading hex digit -- the
standard just requires that the leading hex digit is nonzero for
normalized numbers.

Currently, our hexadecimal formatting implementation uses 0/1/2 as the
leading hex digit for floating point types that have an implicit leading
mantissa bit which in practice means all supported floating point types
except x86 long double.  The latter type has a 64 bit mantissa with an
explicit leading mantissa bit, and for this type our implementation uses
the most significant four bits of the mantissa as leading hex digit.
This seems to be consistent with most printf implementations, but not
all, as PR98384 illustrates.

In order to avoid false-positive FAILs due to arbitrary disagreement
between to_chars and printf about the choice of leading hex digit, this
patch makes the testcase's verification via printf conditional on the
leading hex digits first agreeing.  An additional verification step is
also added: round-tripping the output of to_chars through from_chars
should yield the original value.

Tested on x86, x86_64, powerpc64be, powerpc64le and aarch64.  Does this
look OK for trunk?

libstdc++-v3/ChangeLog:

PR libstdc++/98384
* testsuite/20_util/to_chars/long_double.cc: Include .
(test01): Simplify verifying the nearby values by using a
2-iteration loop and a dedicated output buffer to check that the
nearby values are different.  Factor out the printf-based
verification into a local function, and check that the leading
hex digits agree before comparing with the output of printf.
Also verify the output by round-tripping it through from_chars.
---
 .../testsuite/20_util/to_chars/long_double.cc | 73 ---
 1 file changed, 47 insertions(+), 26 deletions(-)

diff --git a/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc 
b/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc
index 4f72cb65400..da847ae5401 100644
--- a/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc
+++ b/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -50,6 +51,38 @@ namespace detail
 void
 test01()
 {
+  // Verifies correctness of the hexadecimal form [BEGIN,END) for VALUE by
+  // round-tripping it through from_chars (if available).
+  auto verify_via_from_chars = [] (char *begin, char *end, long double value) {
+#if __cpp_lib_to_chars >= 201611L || _GLIBCXX_HAVE_USELOCALE
+long double roundtrip;
+auto result = from_chars(begin, end, roundtrip, chars_format::hex);
+VERIFY( result.ec == errc{} );
+VERIFY( result.ptr == end );
+VERIFY( roundtrip == value );
+#endif
+  };
+
+  // Verifies correctness of the null-terminated hexadecimal form at BEGIN
+  // for VALUE and PRECISION by comparing it with the output of printf's %La
+  // conversion specifier.
+  auto verify_via_printf = [] (char *begin, long double value,
+  optional precision = nullopt) {
+char printf_buffer[1024] = {};
+if (precision.has_value())
+  sprintf(printf_buffer, "%.*La", precision.value(), value);
+else
+  sprintf(printf_buffer, "%La", value);
+
+// Only compare with the output of printf if the leading hex digits agree.
+// If the leading hex digit of our form doesn't agree with that of printf,
+// then the two forms may still be equivalent (e.g. 1.1p+0 vs 8.8p-3).  But
+// if the leading hex digits do agree, then we do expect the two forms to 
be
+// the same.
+if (printf_buffer[strlen("0x")] == begin[0])
+  VERIFY( !strcmp(begin, printf_buffer+strlen("0x")) );
+  };

[PATCH, 0 of 3], Fix PowerPC test suite for IEEE 128-bit long double

2021-02-23 Thread Michael Meissner via Gcc-patches
As I mentioned in the patch for adding _Float128 <-> Decimal conversions, there
are two test cases that fail if you configure the compiler to use IEEE 128-bit
long double or 64-bit long double.  That is because these tests are explicitly
testing that the long double is a pair of doubles (i.e. IBM 128-bit long
double).

In previous iterations of this patch, I set the target condition to run the
test only if long double uses the IBM 128-bit format.  Segher suggest that
instead I add target support tests and options to force the long double format
to IBM 128-bit long double.  I rewrote the patches to do this.

In the last time I posted this patch, Joseph Myers noticed that I misspelled
'override' in two of the functions.  I have fixed this in this patch.  Thanks.

There are 3 patches in this set:

1)  The patch to target-supports.exp to add 3 new functions that returns
true based on what the current long double format is.  And there are
three functions that return true if you can set the long double type at
compile time and the GLIBC supports this option.  The remaining 2
patches need this patch to be installed before they can be installed.

2)  The patch to gcc/testsuite/gcc.target/powerpc/pr70117.c that forces
this test to use IBM 128-bit long double.

3)  The patch to gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c that
forces this test to use IBM 128-bit long double.

I have done bootstrap builds on a power9 little endian system with the compiler
configured for the default IBM 128-bit long double and the IEEE 128-bit long
double.  The 2 tests now pass when I add these patches.  I also have built
bootstrap compilers on a power8 big endian system, and there were no
regressions.  Can I check these patches into the master branch?

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Re: [PATCH 1/3] PowerPC: Add long double target-supports.

2021-02-23 Thread Michael Meissner via Gcc-patches
On Fri, Jan 15, 2021 at 06:16:43PM +, Joseph Myers wrote:
> On Thu, 14 Jan 2021, Michael Meissner via Gcc-patches wrote:
> 
> > +return [check_runtime_nocache ppc_long_double_ovveride_ibm128 {
> 
> > +return [check_runtime_nocache ppc_long_double_ovveride_ieee128 {
> 
> > +return [check_runtime_nocache ppc_long_double_ovveride_64bit {
> 
> All these places have the typo "ovveride".

Thanks.  I have fixed these in the next version of the patch.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


[PATCH 1/3] Add long double target-supports on PowerPC.

2021-02-23 Thread Michael Meissner via Gcc-patches
[PATCH 1/3] Add long double target-supports on PowerPC.

This patch add several more selections to target-supports.exp:

*   3 selections for the current long double format;
*   3 selections if we can change the long double format to a particular
value.
*   3 functions to return the options needed to switch the long double
format for use with dg-add-options.

I have run tests on a little endian power9 system with 3 compilers.  There
were no regressions with these patches, and the two tests in the following
patches now work if the default long double is not IBM 128-bit:

*   One compiler using the default IBM 128-bit format;
*   One compiler using the IEEE 128-bit format; (and)
*   One compiler using 64-bit long doubles.

I have also tested compilers on a big endian power8 system with a compiler
defaulting to power8 code generation and another with the default cpu
set.  There were no regressions.

Can I check this patch into the master branch?

gcc/testsuite/
2021-02-23  Michael Meissner  

* lib/target-supports.exp
(check_effective_target_ppc_long_double_ibm128): New function.
(check_effective_target_ppc_long_double_ieee128): New function.
(check_effective_target_ppc_long_double_64bit): New function.
(add_options_for_ppc_long_double_override_ibm128): New function.
(check_effective_target_ppc_long_double_override_ibm128): New
function.
(add_options_for_ppc_long_double_override_ieee128): New function.
(check_effective_target_ppc_long_double_override_ieee128): New
function.
(add_options_for_ppc_long_double_override_64bit): New function.
(check_effective_target_ppc_long_double_override_64bit): New
function.
---
 gcc/testsuite/lib/target-supports.exp | 155 ++
 1 file changed, 155 insertions(+)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index af46c779214..50302973b62 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2354,6 +2354,161 @@ proc check_effective_target_ppc_ieee128_ok { } {
 }]
 }
 
+# See if the target is a powerpc with the long double format that uses the IBM
+# extended double format.
+
+proc check_effective_target_ppc_long_double_ibm128 { } {
+return [check_cached_effective_target ppc_long_double_ibm {
+   int main()
+   {
+ #if !defined(_ARCH_PPC) || !defined(__LONG_DOUBLE_IBM128__)
+   return 1;
+ #else
+   return 0;
+ #endif
+   }
+}]
+}
+
+# See if the target is a powerpc with the long double format that uses the IEEE
+# 128-bit format.
+
+proc check_effective_target_ppc_long_double_ieee128 { } {
+return [check_cached_effective_target ppc_long_double_ieee {
+   int main()
+   {
+ #if !defined(_ARCH_PPC) || !defined(__LONG_DOUBLE_IEEE128__)
+   return 1;
+ #else
+   return 0;
+ #endif
+   }
+}]
+}
+
+# See if the target is a powerpc with the long double format that is 64-bit.
+
+proc check_effective_target_ppc_long_double_64bit { } {
+return [check_cached_effective_target ppc_long_double_64bit {
+   int main()
+   {
+ #ifndef _ARCH_PPC
+   return 1;
+ #else
+   return sizeof (long double) != 8;
+ #endif
+   }
+}]
+}
+
+# Like check_effective_target_ppc_long_double_ibm128, but check if we can
+# explicitly override the long double format to use the IBM 128-bit extended
+# double format, and GLIBC supports doing this override by switching the
+# sprintf to handle long double.
+
+proc add_options_for_ppc_long_double_override_ibm128 { flags } {
+if { [istarget powerpc*-*-*] } {
+   return "$flags -mlong-double-128 -Wno-psabi -mabi=ibmlongdouble"
+}
+return "$flags"
+}
+
+proc check_effective_target_ppc_long_double_override_ibm128 { } {
+return [check_runtime_nocache ppc_long_double_override_ibm128 {
+   #include 
+   #include 
+   volatile __ibm128 a = (__ibm128) 3.0;
+   volatile long double one = 1.0L;
+   volatile long double two = 2.0L;
+   volatile long double b;
+   char buffer[20];
+   int main()
+   {
+ #if !defined(_ARCH_PPC) || !defined(__LONG_DOUBLE_IBM128__)
+   return 1;
+ #else
+   b = one + two;
+   if (memcmp ((void *)&a, (void *)&b, sizeof (long double)) != 0)
+ return 1;
+   sprintf (buffer, "%lg", b);
+   return strcmp (buffer, "3") != 0;
+ #endif
+   }
+} [add_options_for_ppc_long_double_override_ibm128 ""]]
+}
+
+# Like check_effective_target_ppc_long_double_ieee, but check if we can
+# explicitly override the long double format to use the IEEE 128-bit format,
+# and GLIBC supports doing this override by switching the sprintf to handle
+# long double.
+
+proc add_options_for_ppc_long_double_override

[PATCH 2/3] Force long double to be IBM 128-bit on PowerPC test, PR target/70117

2021-02-23 Thread Michael Meissner via Gcc-patches
[PATCH 2/3] Force long double to be IBM 128-bit on PowerPC test, PR 
target/70117.

This patch fixes the pr70117 test to use IBM 128-bit long double.

I have run tests on a little endian power9 system with 3 compilers.  There
were no regressions with these patches, and the two tests in the following
patches now work if the default long double is not IBM 128-bit:

*   One compiler using the default IBM 128-bit format;
*   One compiler using the IEEE 128-bit format; (and)
*   One compiler using 64-bit long doubles.

I have also tested compilers on a big endian power8 system with a compiler
defaulting to power8 code generation and another with the default cpu
set.  There were no regressions.

Can I check this patch into the master branch?

gcc/testsuite/
2021-02-23  Michael Meissner  

PR target/70117
* gcc.target/powerpc/pr70117.c: Force the long double type to use
the IBM 128-bit format.
---
 gcc/testsuite/gcc.target/powerpc/pr70117.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr70117.c 
b/gcc/testsuite/gcc.target/powerpc/pr70117.c
index 3bbd2c595e0..8a5fad1dee0 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr70117.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr70117.c
@@ -1,5 +1,7 @@
-/* { dg-do run { target { powerpc*-*-linux* powerpc*-*-darwin* powerpc*-*-aix* 
rs6000-*-* } } } */
-/* { dg-options "-std=c99 -mlong-double-128 -O2" } */
+/* { dg-do run } */
+/* { dg-require-effective-target ppc_long_double_override_ibm128 } */
+/* { dg-options "-std=c99 -O2" } */
+/* { dg-add-options ppc_long_double_override_ibm128 } */
 
 #include 
 
-- 
2.22.0


-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


[PATCH 3/3] Force IBM long double for conversion test on PowerPC.

2021-02-23 Thread Michael Meissner via Gcc-patches
[PATCH 3/3] Force IBM long double for conversion test on PowerPC.

The test c-c++-common/dfp/convert-bfp-11.c explicit expects long double to use
the IBM 128-bit extended double format.  In particular, some of the tests
expect an infinity to be created if decimal values that are converted that are
too large for IBM extended double.  However, the numbers do fit in the range
for IEEE 128-bit format, since it has a larger exponent than the IBM 128-bit
format.  The test fails because an infinity is not generated.

This patch explicitly sets the long double type to IBM extended double.

I have run tests on a little endian power9 system with 3 compilers.  There
were no regressions with these patches, and the two tests in the following
patches now work if the default long double is not IBM 128-bit:

*   One compiler using the default IBM 128-bit format;
*   One compiler using the IEEE 128-bit format; (and)
*   One compiler using 64-bit long doubles.

I have also tested compilers on a big endian power8 system with a compiler
defaulting to power8 code generation and another with the default cpu
set.  There were no regressions.

Can I check this patch into the master branch?

gcc/testsuite/
2021-02-22  Michael Meissner  

* c-c++-common/dfp/convert-bfp-11.c: Force using IBM 128-bit long
double.  Remove check for 64-bit long double.
---
 .../c-c++-common/dfp/convert-bfp-11.c  | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c 
b/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c
index 95c433d2c24..35da07d1fa4 100644
--- a/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c
+++ b/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c
@@ -1,9 +1,14 @@
-/* { dg-skip-if "" { ! "powerpc*-*-linux*" } } */
+/* { dg-require-effective-target dfp } */
+/* { dg-require-effective-target ppc_long_double_override_ibm128 } */
+/* { dg-add-options ppc_long_double_override_ibm128 } */
 
-/* Test decimal float conversions to and from IBM 128-bit long double. 
-   Checks are skipped at runtime if long double is not 128 bits.
-   Don't force 128-bit long doubles because runtime support depends
-   on glibc.  */
+/* We force the long double type to be IBM 128-bit because the CONVERT_TO_PINF
+   tests will fail if we use IEEE 128-bit floating point.  This is due to IEEE
+   128-bit having a larger exponent range than IBM 128-bit extended double.  So
+   tests that would generate an infinity with IBM 128-bit will generate a
+   normal number with IEEE 128-bit.  */
+
+/* Test decimal float conversions to and from IBM 128-bit long double.   */
 
 #include "convert.h"
 
@@ -36,9 +41,6 @@ CONVERT_TO_PINF (312, tf, sd, 1.6e+308L, d32)
 int
 main ()
 {
-  if (sizeof (long double) != 16)
-return 0;
-
   convert_101 ();
   convert_102 ();
 
-- 
2.22.0


-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


arm: Fix CMSE support detection in libgcc (PR target/99157)

2021-02-23 Thread Christophe Lyon via Gcc-patches
As discussed in the PR, the Makefile fragment lacks a double '$' to
get the return-code from GCC invocation, resulting is CMSE support
missing from multilibs.

I checked that the simple patch proposed in the PR fixes the problem.

2021-02-23  Christophe Lyon  
Hau Hsu  

PR target/99157
libgcc/
* config/arm/t-arm: Fix cmse support detection.

diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm
index 364f40e..3625a25 100644
--- a/libgcc/config/arm/t-arm
+++ b/libgcc/config/arm/t-arm
@@ -4,7 +4,7 @@ LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi
_thumb1_case_shi \

 HAVE_CMSE:=$(findstring __ARM_FEATURE_CMSE,$(shell
$(gcc_compile_bare) -dM -E - /dev/null
2>/dev/null; echo $?),0)
+ifeq ($(shell $(gcc_compile_bare) -E -mcmse - /dev/null
2>/dev/null; echo $$?),0)
 CMSE_OPTS:=-mcmse
 endif
arm: Fix CMSE support detection in libgcc (PR target/99157)

As discussed in the PR, the Makefile fragment lacks a double '$' to
get the return-code from GCC invocation, resulting is CMSE support
missing from multilibs.

The simple patch proposed in the PR fixes the problem.

2021-02-23  Christophe Lyon  
Hau Hsu  

PR target/99157
libgcc/
* config/arm/t-arm: Fix cmse support detection.

diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm
index 364f40e..3625a25 100644
--- a/libgcc/config/arm/t-arm
+++ b/libgcc/config/arm/t-arm
@@ -4,7 +4,7 @@ LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi 
_thumb1_case_shi \
 
 HAVE_CMSE:=$(findstring __ARM_FEATURE_CMSE,$(shell $(gcc_compile_bare) -dM -E 
- /dev/null 
2>/dev/null; echo $?),0)
+ifeq ($(shell $(gcc_compile_bare) -E -mcmse - /dev/null 
2>/dev/null; echo $$?),0)
 CMSE_OPTS:=-mcmse
 endif
 


Re: [Patch, fortran] PR99124 - [9/10/11 Regression] ICE in gfc_get_class_from_expr, at fortran/trans-expr.c:541

2021-02-23 Thread Paul Richard Thomas via Gcc-patches
Hi Tobias,

→ Can you add also a testcase that which triggers the error message you
> see in the unpatched  class_assign_4.f90?
> > I was unable to find a way to use a typebound operator with a polymorphic
> > result


> I am confused – the attach testcase does seem to work fine with current
> GCC. (And if we don't have such a testcase, it should be added.)


> Can you elaborate?
>



The polymorphic result must be allocatable or pointer for the dynamic type
to be transmitted. This means that the function cannot be elemental. If the
result of the non-elemental function is an array, gfc responds with:
"Error: Passed-object dummy argument of ‘f’ at (1) must be scalar"
If the procedure declaration is made nopass, the response is:
"Type-bound operator at (1) cannot be NOPASS"

See the attached elemental_result_2.f90, which tests the new error message.

>From these points, I concluded that a typebound operator could not provide
the required polymorphic array result. If I am wrong about this, please let
me know and I will change the patch accordingly.

The interface operator does not have these constraints and so was
implemented in class_assign_4.f90.

The patch itself LGTM, except for testing the newly shown error message
> and for the confusion about the type-bound operator.
>

 All done. Note that the patch has changed slightly in resolve.c because
(1) it was the wrong version and (2) it sporadically segfaulted at line
13240.

Thanks

Paul
! { dg-do compile }
!
! Test part of the fix for PR99124 which adds errors for class results
! That violate F2018, C15100.
!
! Contributed by Gerhard Steinmetz  
!
module m
   type t
  integer :: i
   contains
  procedure :: f
  generic :: operator(+) => f
   end type
contains
   elemental function f(a, b) &
   result(c) ! { dg-error "shall not have an ALLOCATABLE or POINTER attribute" }
  class(t), intent(in) :: a, b
  class(t), allocatable :: c
  c = t(a%i + b%i)
   end
   elemental function g(a, b) &
   result(c) ! { dg-error "shall not have an ALLOCATABLE or POINTER attribute" }
  class(t), intent(in) :: a, b
  class(t), pointer :: c
  c => null ()
   end
   elemental function h(a, b) &  ! { dg-error "must have a scalar result" }
   result(c) ! { dg-error "must be dummy, allocatable or pointer" }
  class(t), intent(in) :: a, b
  class(t) :: c(2)
   end
end
diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c
index 11b5dbc7a03..de62266e96b 100644
--- a/gcc/fortran/resolve.c
+++ b/gcc/fortran/resolve.c
@@ -13051,6 +13051,7 @@ static bool
 resolve_fl_procedure (gfc_symbol *sym, int mp_flag)
 {
   gfc_formal_arglist *arg;
+  bool allocatable_or_pointer;
 
   if (sym->attr.function
   && !resolve_fl_var_and_proc (sym, mp_flag))
@@ -13235,8 +13236,15 @@ resolve_fl_procedure (gfc_symbol *sym, int mp_flag)
   /* F2018, C15100: "The result of an elemental function shall be scalar,
  and shall not have the POINTER or ALLOCATABLE attribute."  The scalar
  pointer is tested and caught elsewhere.  */
+  if (sym->result)
+allocatable_or_pointer = sym->result->ts.type == BT_CLASS && CLASS_DATA (sym->result) ?
+			 (CLASS_DATA (sym->result)->attr.allocatable
+			  || CLASS_DATA (sym->result)->attr.pointer) :
+			 (sym->result->attr.allocatable
+			  || sym->result->attr.pointer);
+
   if (sym->attr.elemental && sym->result
-  && (sym->result->attr.allocatable || sym->result->attr.pointer))
+  && allocatable_or_pointer)
 {
   gfc_error ("Function result variable %qs at %L of elemental "
 		 "function %qs shall not have an ALLOCATABLE or POINTER "
diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index c346183e129..c6725659093 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -1167,8 +1167,11 @@ get_class_info_from_ss (stmtblock_t * pre, gfc_ss *ss, tree *eltype)
   && rhs_ss->info->expr->ts.type == BT_CLASS
   && rhs_ss->info->data.array.descriptor)
 {
-  rhs_class_expr
-	= gfc_get_class_from_expr (rhs_ss->info->data.array.descriptor);
+  if (rhs_ss->info->expr->expr_type != EXPR_VARIABLE)
+	rhs_class_expr
+	  = gfc_get_class_from_expr (rhs_ss->info->data.array.descriptor);
+  else
+	rhs_class_expr = gfc_get_class_from_gfc_expr (rhs_ss->info->expr);
   unlimited_rhs = UNLIMITED_POLY (rhs_ss->info->expr);
   if (rhs_ss->info->expr->expr_type == EXPR_FUNCTION)
 	rhs_function = true;
diff --git a/gcc/testsuite/gfortran.dg/class_assign_4.f90 b/gcc/testsuite/gfortran.dg/class_assign_4.f90
index 517e3121cc8..c6c54bbaed2 100644
--- a/gcc/testsuite/gfortran.dg/class_assign_4.f90
+++ b/gcc/testsuite/gfortran.dg/class_assign_4.f90
@@ -11,17 +11,21 @@ module m
   type :: t1
 integer :: i
   CONTAINS
-PROCEDURE :: add_t1
-GENERIC :: OPERATOR(+) => add_t1
+!PROCEDURE :: add_t1
+!GENERIC :: OPERATOR(+) => add_t1
   end type
   type, extends(t1) :: t2
   

Re: [Patch, fortran] PR99124 - [9/10/11 Regression] ICE in gfc_get_class_from_expr, at fortran/trans-expr.c:541

2021-02-23 Thread Tobias Burnus

Hi Paul,

On 23.02.21 18:39, Paul Richard Thomas via Fortran wrote:

Can you elaborate?

The polymorphic result must be allocatable or pointer for the dynamic type
to be transmitted. This means that the function cannot be elemental. If the
result of the non-elemental function is an array, gfc responds with:
"Error: Passed-object dummy argument of ‘f’ at (1) must be scalar"


Ok, I think I understood the issue: The problem is that with 'pass', you
get a scalar as argument; and with scalars, you'd need 'elemental',
which is not permitted.

Sorry, I missed the 'pass' → 'scalar' step. I did get the 'elemental' →
nonalloc step.


All done. Note that the patch has changed slightly in resolve.c because
(1) it was the wrong version and (2) it sporadically segfaulted at line
13240.


Ups!

LGTM. Thanks for the patch!

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf


[PATCH]middle-end slp: fix accidental resource re-use of slp_tree (PR99220)

2021-02-23 Thread Tamar Christina via Gcc-patches
Hi Richi,

The attached testcase shows a bug where two nodes end up with the same pointer.
During the loop that analyzes all the instances
in optimize_load_redistribution_1 we do

  if (value)
{
  SLP_TREE_REF_COUNT (value)++;
  SLP_TREE_CHILDREN (root)[i] = value;
  vect_free_slp_tree (node);
}

when doing a replacement.  When this is done and the refcount for the node
reaches 0, the node is removed, which allows the libc to return the pointer
again in the next call to new, which it does..

First instance

note:   node 0x5325f48 (max_nunits=1, refcnt=2)
note:   op: VEC_PERM_EXPR
note:   { }
note:   lane permutation { 0[0] 1[1] 0[2] 1[3] }
note:   children 0x5325db0 0x5325200

Second instance

note:   node 0x5325f48 (max_nunits=1, refcnt=1)
note:   op: VEC_PERM_EXPR
note:   { }
note:   lane permutation { 0[0] 1[1] }
note:   children 0x53255b8 0x5325530

This will end up with the illegal construction of

note:   node 0x53258e8 (max_nunits=2, refcnt=2)
note:   op template: slp_patt_57 = .COMPLEX_MUL (_16, _16);
note:   stmt 0 _16 = _14 - _15;
note:   stmt 1 _23 = _17 + _22;
note:   children 0x53257d8 0x5325d28
note:   node 0x53257d8 (max_nunits=2, refcnt=3)
note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
note:   stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
note:   stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
note:   load permutation { 0 1 }
note:   node 0x5325d28 (max_nunits=2, refcnt=8)
note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
note:   stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
note:   stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
note:   stmt 2 l$b_4 = MEM[(const struct a &)_3].b;
note:   stmt 3 l$c_5 = MEM[(const struct a &)_3].c;
note:   load permutation { 0 1 0 1 }

To prevent this my initial thought was to add the temporary VEC_PERM_EXPR nodes
to the bst_map cache and increase their refcnt one more.  However since bst_map
is gated on scalar statements and these nodes have none we can't do that.

Instead I realized that load_map is really only a visited list at the top level.
So instead of returning the reference, we should return NULL.

What this means is that it will no replacement was found at that level.  This is
fine since these VEC_PERM_EXPR are single use.  So while the any other node is
an indication to use the cache, VEC_PERM_EXPR are an indication to avoid it.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/99220
* tree-vect-slp.c (optimize_load_redistribution_1): Don't use
VEC_PERM_EXPR in cache.

gcc/testsuite/ChangeLog:

PR tree-optimization/99220
* g++.dg/vect/pr99220.cc: New test.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/g++.dg/vect/pr99220.cc 
b/gcc/testsuite/g++.dg/vect/pr99220.cc
new file mode 100755
index 
..ff3058832b742414202a8ada0a9dafc72c9a54aa
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr99220.cc
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-w -O3 -march=armv8.3-a" { target { aarch64*-*-* } 
} } */
+
+class a {
+  float b;
+  float c;
+
+public:
+  a(float d, float e) : b(d), c(e) {}
+  a operator+(a d) { return a(b + d.b, c + d.c); }
+  a operator-(a d) { return a(b - d.b, c - d.c); }
+  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
+};
+long f;
+a *g;
+class {
+  a *h;
+  long i;
+  a *j;
+
+public:
+  void k() {
+a l = h[0], m = g[i], n = l * g[1], o = l * j[8];
+g[i] = m + n;
+g[i + 1] = m - n;
+j[f] = o;
+  }
+} p;
+main() { p.k(); }
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 
605873714a5cafaaf822f61f1f769f96b3876694..e631463be8fc5b2de355e674a9c96665beb9516c
 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2292,7 +2292,12 @@ optimize_load_redistribution_1 
(scalar_stmts_to_slp_tree_map_t *bst_map,
slp_tree root)
 {
   if (slp_tree *leader = load_map->get (root))
-return *leader;
+{
+  if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
+   return NULL;
+  else
+   return *leader;
+}
 
   load_map->put (root, NULL);
 


-- 
diff --git a/gcc/testsuite/g++.dg/vect/pr99220.cc b/gcc/testsuite/g++.dg/vect/pr99220.cc
new file mode 100755
index ..ff3058832b742414202a8ada0a9dafc72c9a54aa
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr99220.cc
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-w -O3 -march=armv8.3-a" { target { aarch64*-*-* } } } */
+
+class a {
+  float b;
+  float c;
+
+public:
+  a(float d, float e) : b(d), c(e) {}
+  a operator+(a d) { return a(b + d.b, c + d.c); }
+  a operator-(a d) { return a(b - d.b, c - d.c); }
+  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
+

[PATCH] clear more front end VLA bounds from IL (PR 97172)

2021-02-23 Thread Martin Sebor via Gcc-patches

Adding attribute access to declarations of functions that take
VLA arguments relies on the front end adding attribute "arg spec"
to each VLA parameter.  Like the VLA bounds in attribute access,
the same VLA bounds in attribute "arg spec" can cause trouble
during LTO streaming which expects front end trees to have been
either gimplified or removed.  For some reason, with arg spec
the LTO abort happens only with -fpic/-shared and so it escaped
testing in the prior fixes.

The attached patch clears the bounds from attribute "arg spec."

Martin
PR middle-end/97172 - ICE: tree code ‘ssa_name’ is not supported in LTO streams

gcc/ChangeLog:
	PR middle-end/97172
	* attribs.c (attr_access::free_lang_data): Clear attribute arg spec
	from function arguments.

gcc/c/ChangeLog:

	PR middle-end/97172
	* c-decl.c (free_attr_access_data): Clear attribute arg spec.

gcc/testsuite/ChangeLog:

	PR middle-end/97172
	* gcc.dg/pr97172-2.c: New test.


diff --git a/gcc/attribs.c b/gcc/attribs.c
index 60933d20810..16c6b12d477 100644
--- a/gcc/attribs.c
+++ b/gcc/attribs.c
@@ -2265,6 +2265,14 @@ attr_access::free_lang_data (tree attrs)
 	  *pvbnd = NULL_TREE;
 	}
 }
+
+  for (tree argspec = attrs; (argspec = lookup_attribute ("arg spec", argspec));
+   argspec = TREE_CHAIN (argspec))
+{
+  /* Same as above.  */
+  tree *pvblist = &TREE_VALUE (argspec);
+  *pvblist = NULL_TREE;
+}
 }
 
 /* Defined in attr_access.  */
diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
index a5852a3bae7..b559ed5d76a 100644
--- a/gcc/c/c-decl.c
+++ b/gcc/c/c-decl.c
@@ -12166,6 +12166,10 @@ free_attr_access_data ()
   /* Iterate over all functions declared in the translation unit.  */
   FOR_EACH_FUNCTION (n)
 {
+  for (tree parm = DECL_ARGUMENTS (n->decl); parm; parm = TREE_CHAIN (parm))
+	if (tree attrs = DECL_ATTRIBUTES (parm))
+	  attr_access::free_lang_data (attrs);
+
   tree fntype = TREE_TYPE (n->decl);
   if (!fntype)
 	continue;
diff --git a/gcc/testsuite/gcc.dg/pr97172-2.c b/gcc/testsuite/gcc.dg/pr97172-2.c
new file mode 100644
index 000..6f355bb9ed9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr97172-2.c
@@ -0,0 +1,8 @@
+/* PR middle-end/97172 - ICE: tree code ‘ssa_name’ is not supported in LTO
+   streams
+   { dg-do link }
+   { dg-options "-Wall -flto -fpic -shared" }
+   { dg-require-effective-target fpic }
+   { dg-require-effective-target lto } */
+
+#include "pr97172.c"


Re: add rv64im{,c,fc} multilibs

2021-02-23 Thread Jim Wilson
On Tue, Feb 23, 2021 at 2:17 AM Alexandre Oliva  wrote:

> I take your response as confirming my expectation that the defaults are
> to remain unchanged for now, and I will thus proceed under this
> assumption.
>

If we add default multilibs for you, then to be fair, we need to add
default multilibs for other people that ask, and before long we are trying
to build hundreds or maybe even thousands of multilibs by default which is
unworkable.  There are simply too many different extensions, and too many
different valid combinations of them.  A problem that is quickly getting
worse, as there are a slew of extensions that are planned for final
approval this year.  The current set was defined before I started doing
RISC-V work over 3 years ago, and I've been saying no to everyone that
wants to change the default set.  The current set is tractable for newbies
to try to build and use.  People that want a different set can define their
own, and we have made it easy for people to define their own sets of
multilibs as Kito pointed out.

I do think that when the architecture profiles are adopted and implemented
it would make sense to add them to the default set, and maybe eventually
replace the default set.

Jim


Re: [PATCH, constexpr, coroutines ] Generic lambda coroutines cannot be constexpr [PR96251].

2021-02-23 Thread Jason Merrill via Gcc-patches

On 2/23/21 8:20 AM, Iain Sandoe wrote:

Hi Jason,

Jason Merrill  wrote:


On 2/22/21 3:59 PM, Iain Sandoe wrote:



* I was not able to see any way in which the instantiation process
   could be made to bail in this case and re-try for non-constexpr.


Many of the other places that set cp_function_chain->invalid_constexpr 
condition their errors on !is_instantiation_of_constexpr, which should also fix 
this testcase.


Thanks!
(FWIW, there only seem to be three instances of this in the FE and two of those 
are in constexpr.c).

so like this?
(tested on x86_64-darwin, regtest running x86_64 linux)


OK.


thanks
iain


  [PATCH] coroutines : Adjust error handling for type-dependent coroutines 
[PR96251].

Although coroutines are not permitted to be constexpr, generic lambdas
are implicitly from C++17 and, because of this, a generic coroutine lambda
can be marked as potentially constexpr. As per the PR, this then fails when
type substitution is attempted because the check disallowing constexpr in
the coroutines code was overly restrictive.

This changes the error handing to mark the function  as 'invalid_constexpr'
but suppresses the error in the case that we are instantiating a constexpr.

gcc/cp/ChangeLog:

PR c++/PR96251
* coroutines.cc (coro_common_keyword_context_valid_p): Suppress
error reporting when instantiating for a constexpr.

gcc/testsuite/ChangeLog:

PR c++/96251
* g++.dg/coroutines/pr96251.C: New test.
---
  gcc/cp/coroutines.cc  | 11 +---
  gcc/testsuite/g++.dg/coroutines/pr96251.C | 32 +++
  2 files changed, 39 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/coroutines/pr96251.C

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index e61de1fac01..abfe8d08192 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -683,11 +683,14 @@ coro_common_keyword_context_valid_p (tree fndecl, 
location_t kw_loc,
  
if (DECL_DECLARED_CONSTEXPR_P (fndecl))

  {
-  /* [dcl.constexpr] 3.3 it shall not be a coroutine.  */
-  error_at (kw_loc, "%qs cannot be used in a % function",
-   kw_name);
cp_function_chain->invalid_constexpr = true;
-  return false;
+  if (!is_instantiation_of_constexpr (fndecl))
+   {
+ /* [dcl.constexpr] 3.3 it shall not be a coroutine.  */
+ error_at (kw_loc, "%qs cannot be used in a % function",
+   kw_name);
+ return false;
+   }
  }
  
if (FNDECL_USED_AUTO (fndecl))

diff --git a/gcc/testsuite/g++.dg/coroutines/pr96251.C 
b/gcc/testsuite/g++.dg/coroutines/pr96251.C
new file mode 100644
index 000..3f435044e41
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr96251.C
@@ -0,0 +1,32 @@
+#include 
+
+struct coroutine {
+  struct promise_type {
+auto get_return_object() { return coroutine(); }
+auto initial_suspend() { return std::suspend_always(); }
+auto yield_value(int) { return std::suspend_always(); }
+void return_void() {}
+auto final_suspend() noexcept { return std::suspend_always(); }
+void unhandled_exception() {}
+  };
+};
+
+int main() {
+  auto f = [](auto max) -> coroutine {
+for (int i = 0; i < max; ++i) {
+   co_yield i;
+}
+  };
+
+  f(10);
+
+  // From PR98976
+  auto foo = [](auto&&) -> coroutine {
+switch (42) {
+  case 42:
+co_return;
+}
+  };
+  foo(1);
+
+}





Re: [PATCH] handle bad __dynamic_cast more gracefully (PR 99074)

2021-02-23 Thread Martin Sebor via Gcc-patches

On 2/22/21 7:03 PM, Jason Merrill wrote:

On 2/22/21 8:00 PM, Martin Sebor wrote:

On 2/22/21 4:08 PM, Jason Merrill wrote:

On 2/13/21 7:31 PM, Martin Sebor wrote:

The test case in PR 99074 invokes dynamic_cast with the this pointer
in a non-static member function called on a null pointer.  The call
is, of course, undefined and other different circumstances would be
diagnosed by -Wnonnull.   Unfortunately, in the test case, the null
pointer is the result of inlining and constant propagation and so
detected neither by the front end -Wnonnull nor by the middle end.
The program ends up passing it to __dynamic_cast() which then
crashes at runtime (again, not surprising for undefined behavior.

However, the reporter says the code behaved gracefully (didn't crash)
when compiled with GCC 4.8, and in my tests it also doesn't crash
when compiled with Clang or ICC.  I looked to see if it's possible
to do better and it seems it is.

The attached patch improves things by changing __dynamic_cast to
fail by returning null when the first argument is null, and also


This hunk is OK.


by declaring __dynamic_cast with attribute nonnull so that invalid
calls to it with a constant null pointer can be detected at compile
time.


This is not; dynamic_cast is specified to return null for a null 
operand.


"If v is a null pointer value, the result is a null pointer value."

The undefined behavior is the call to _to_object, not the dynamic_cast.


Yes, of course.  Just to be clear, in case it's not from the patch,
it adds nonnull to the __dynamic_cast() function in libsupc++ which
(if I read the comment right) documents nonnull-ness as its
precondition.  The function should never be called with a null
pointer except in buggy code like in the PR.


True.  So the attribute is technically correct, but the resulting 
warning is misleading, since the user shouldn't need to know anything 
about the internal implementation of dynamic_cast, and the user-visible 
feature doesn't have that constraint.


I agree.  Let's revisit this in stage 1 and see if we can do better
(see below).




I don't think this
is the most elegant way to diagnose the user bug but I also couldn't
think of anything better.  Do you have any suggestions?  (I ask in
part because for GCC 12 I'd like to see about issuing the warning
requested on PR 12277.)


If we can manage to warn about a null argument to __dynamic_cast, I'd 
think we should also be able to warn about a null 'this' argument to 
_to_object.


The -Wnonnull on the __dynamic_cast is readily detectable because
the null constant is propagated into the call after it has been
inlined into _to_object() by the time -Wnonnull runs (just after
early inlining).

The null this pointer isn't diagnosed for the same reason: the call
to _to_object() is inlined before the null is fully propagated into
the this argument.  This can be fixed by enhancing the warning to
look through use-def chains rather than just rely on constant
propagation.  But I also recently submitted a patch for PR 87489
to run -Wnonnull later, after FRE, to avoid a class of false
positives and negatives.  If that goes forward (in GCC 12) this
solution won't work.




WRT to the documentation of __dynamic_cast(), I didn't remove
the comment in the patch that mentions the precondition because
of the new warning.  If we want to consider null pointers valid
input to the function it seems we should update the comment.  Do
you agree?


The comment is from the ABI 
(https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dynamic_cast-algorithm), 
we shouldn't change it.


Ack.




The comment is below.  I assume SUB refers to SRC_PTR.

/* sub: source address to be adjusted; nonnull, and since the
  *  source object is polymorphic, *(void**)sub is a virtual pointer.
  * src: static type of the source object.
  * dst: destination type (the "T" in "dynamic_cast(v)").
  * src2dst_offset: a static hint about the location of the
  *    source subobject with respect to the complete object;
  *    special negative values are:
  *   -1: no hint
  *   -2: src is not a public base of dst
  *   -3: src is a multiple public base type but never a
  *   virtual base type
  *    otherwise, the src type is a unique public nonvirtual
  *    base type of dst at offset src2dst_offset from the
  *    origin of dst.  */
extern "C" void *
__dynamic_cast (const void *src_ptr,    // object started from
 const __class_type_info *src_type, // type of the 
starting object
 const __class_type_info *dst_type, // desired target 
type

 ptrdiff_t src2dst) // how src and dst are related



Since the test case is undefined it seems borderline whether this
can strictly be considered a regression, even if some previous
releases handled it more gracefully.


Indeed.  But handling the null case in __dynamic_cast as well as in 
the compiler seems harmless enough.


Okay.


I guess it's a question of which behavior is pref

Re: [WIP] Re: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp tests [PR98738]

2021-02-23 Thread Kwok Cheung Yeung

On 19/02/2021 7:12 pm, Kwok Cheung Yeung wrote:
I have included the current state of my patch. All task-detach-* tests pass when 
executed without offloading or with offloading to GCN, but with offloading to 
Nvidia, task-detach-6.* hangs consistently but everything else passes (probably 
because of the missing gomp_team_barrier_done?).




It looks like the hang has nothing to do with the detach patch - this hangs 
consistently for me when offloaded to NVPTX:


#include 

int main (void)
{
#pragma omp target
  #pragma omp parallel
#pragma omp task
  ;
}

This doesn't hang when offloaded to GCN or the host device, or if num_threads(1) 
is specified on the omp parallel.


Kwok


[PATCH] PR libfortran/99218 - [8/9/10/11 Regression] matmul on temporary array accesses invalid memory

2021-02-23 Thread Harald Anlauf via Gcc-patches
Dear all,

under certain circumstances a call to MATMUL for rank-2 times rank-1
would invoke a highly tuned rank-2 times rank-2 algorithm which could
lead to invalid reads and writes.  The solution is to check the rank
of the second argument to matmul and fall back to a regular algorithm
for rank-1.  The invalid accesses did show up with valgrind.

I have not been able to create a testcase that gives wrong results.

Regtested on x86_64-pc-linux-gnu, and verified with valgrind.

OK for master?

As this affects all open branches down to 8, ok for backports?

Thanks,
Harald


PR libfortran/99218 - matmul on temporary array accesses invalid memory

Do not invoke tuned rank-2 times rank-2 matmul if rank(b) == 1.

libgfortran/ChangeLog:

PR libfortran/99218
* m4/matmul_internal.m4: Invoke tuned matmul only for rank(b)>1.
* generated/matmul_c10.c: Regenerated.
* generated/matmul_c16.c: Likewise.
* generated/matmul_c4.c: Likewise.
* generated/matmul_c8.c: Likewise.
* generated/matmul_i1.c: Likewise.
* generated/matmul_i16.c: Likewise.
* generated/matmul_i2.c: Likewise.
* generated/matmul_i4.c: Likewise.
* generated/matmul_i8.c: Likewise.
* generated/matmul_r10.c: Likewise.
* generated/matmul_r16.c: Likewise.
* generated/matmul_r4.c: Likewise.
* generated/matmul_r8.c: Likewise.
* generated/matmulavx128_c10.c: Likewise.
* generated/matmulavx128_c16.c: Likewise.
* generated/matmulavx128_c4.c: Likewise.
* generated/matmulavx128_c8.c: Likewise.
* generated/matmulavx128_i1.c: Likewise.
* generated/matmulavx128_i16.c: Likewise.
* generated/matmulavx128_i2.c: Likewise.
* generated/matmulavx128_i4.c: Likewise.
* generated/matmulavx128_i8.c: Likewise.
* generated/matmulavx128_r10.c: Likewise.
* generated/matmulavx128_r16.c: Likewise.
* generated/matmulavx128_r4.c: Likewise.
* generated/matmulavx128_r8.c: Likewise.

diff --git a/libgfortran/generated/matmul_c10.c b/libgfortran/generated/matmul_c10.c
index 3e81b491ea1..b8172e8845d 100644
--- a/libgfortran/generated/matmul_c10.c
+++ b/libgfortran/generated/matmul_c10.c
@@ -276,7 +276,8 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
 	}
 }

-  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+  if (rxstride == 1 && axstride == 1 && bxstride == 1
+  && GFC_DESCRIPTOR_RANK (b) != 1)
 {
   /* This block of code implements a tuned matmul, derived from
  Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
@@ -844,7 +845,8 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
 	}
 }

-  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+  if (rxstride == 1 && axstride == 1 && bxstride == 1
+  && GFC_DESCRIPTOR_RANK (b) != 1)
 {
   /* This block of code implements a tuned matmul, derived from
  Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
@@ -1412,7 +1414,8 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
 	}
 }

-  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+  if (rxstride == 1 && axstride == 1 && bxstride == 1
+  && GFC_DESCRIPTOR_RANK (b) != 1)
 {
   /* This block of code implements a tuned matmul, derived from
  Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
@@ -1994,7 +1997,8 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
 	}
 }

-  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+  if (rxstride == 1 && axstride == 1 && bxstride == 1
+  && GFC_DESCRIPTOR_RANK (b) != 1)
 {
   /* This block of code implements a tuned matmul, derived from
  Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
@@ -2635,7 +2639,8 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
 	}
 }

-  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+  if (rxstride == 1 && axstride == 1 && bxstride == 1
+  && GFC_DESCRIPTOR_RANK (b) != 1)
 {
   /* This block of code implements a tuned matmul, derived from
  Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
diff --git a/libgfortran/generated/matmul_c16.c b/libgfortran/generated/matmul_c16.c
index 61a9a70b5e4..a97e06f0155 100644
--- a/libgfortran/generated/matmul_c16.c
+++ b/libgfortran/generated/matmul_c16.c
@@ -276,7 +276,8 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
 	}
 }

-  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+  if (rxstride == 1 && axstride == 1 && bxstride == 1
+  && GFC_DESCRIPTOR_RANK (b) != 1)
 {
   /* This block of code implements a tuned matmul, derived from
  Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
@@ -844,7 +845,8 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
 	}
 }

-  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+  if (rxstride == 1 && axstride == 1 && bxstride == 1
+  && GFC_DESCRIPTOR_RANK (b) != 1

Re: PING [PATCH] avoid -Warray-bounds checks for vtable assignments (PR 98266)

2021-02-23 Thread Jason Merrill via Gcc-patches

On 2/23/21 11:02 AM, Martin Sebor wrote:

[CC Jason for any further comments/clarification]

On 2/9/21 10:49 AM, Martin Sebor wrote:

On 2/8/21 4:11 PM, Jeff Law wrote:



On 2/8/21 3:44 PM, Martin Sebor wrote:

On 2/8/21 3:26 PM, Jeff Law wrote:



On 2/8/21 2:56 PM, Martin Sebor wrote:

On 2/8/21 12:59 PM, Jeff Law wrote:



On 1/19/21 5:56 PM, Martin Sebor via Gcc-patches wrote:

Similar to the problem reported for -Wstringop-overflow in pr98266
and already fixed, -Warray-bounds is also susceptible to false
positives in assignments and copies involving virtual inheritance.
Because the two warnings don't share code yet (hopefully in GCC 12)
the attached patch adds its own workaround for this problem to
gimple-array-bounds.cc, this one slightly more crude because of
the limited insight the array bounds checking has into the checked
expressions.

Tested on x86_64-linux.

Martin

gcc-98266.diff

PR middle-end/98266 - bogus array subscript is partly outside array
bounds on virtual inheritance

gcc/ChangeLog:

  PR middle-end/98266
  * gimple-array-bounds.cc
(array_bounds_checker::check_array_bounds):
  Avoid checking references involving artificial members.

gcc/testsuite/ChangeLog:

  PR middle-end/98266
  * g++.dg/warn/Warray-bounds-15.C: New test.

It seems to me that we've got the full statement at some point  and
thus
the full expression so at some point couldn't we detect when
TYPE_SIZE_UNIT!= DECL_SIZE_UNIT?  Or should we be using 
TYPE_SIZE_UNIT

rather than DECL_SIZE_UNIT in gimple-array-bounds.cc

Am I missing something?


The expression we're looking at when the false positive is issued
is the MEM_REF in the LHS of:

MEM[(struct D *)&D.2652 + 24B]._vptr.D = &MEM  [(void
*)&_ZTC1E24_1D + 24B];

TREE_TYPE(LHS) is D, DECL_SIZE_UNIT (D.2652) is 24, and
TYPE_SIZE_UNIT(D) is also 24, so there's no discrepancy between
DECL_SIZE and TYPE_SIZE.

So that seems like it's a different issue then, unrelated to 97595.
Right?


I think the underlying problem is the same.  We're getting a size
that doesn't correspond to what's actually being accessed, and it
happens because of the virtual inheritance.  In pr97595 Jason
suggested to use the decl/type size inequality to identify this
case but I think we could have just as well used DECL_ARTIFICIAL
instead.  At least the test cases from pr97595 both pass with
this change.

But in the 98266 case the type and decl sizes are the same.  So to be
true that would mean that the underlying type we're using to access
memory differs from its actual type.  Is that the case in the IL?  And
does this have wider implications for diagnostics or optimizations that
rely on accurate type sizing?

I'm just trying to make sure I understand, not accepting or rejecting
the patch yet.


The part of the IL with the MEM_REF is this:

void g ()
{
   void * D.2789;
   struct E D.2652;

    [local count: 1073741824]:
   E::E (&D.2652, "");
   f (&D.2652);

    [local count: 1073741824]:
   MEM[(struct D *)&D.2652 + 24B]._vptr.D = &MEM  [(void 
*)&_ZTC1E24_1D + 24B];

   ...

The access here is to the _vptr.D pointer member of D.2652 which is
just past the end of the parent object (as reflected by its SIZE):
it sets sets up the virtual table pointer.

The access in pr97595 is to the member subobject, which, as Jason
explained (and I accordingly documented under DECL_SIZE in tree.h),
is also laid out separately from the parent object.

These cases aren't exactly the same (which is also why the test
I added for -Warray-bounds in pr97595 didn't expose this bug) but
they are closely related.  The one here can be distinguished by
DECL_ARTIFICAL.  The other by the DECL_SIZE != TYPE_SIZE member
inequality.

Might this impact other warnings?  I'd say so if they don't take
these things into account.  I just learned about this in pr97595
which was a -Wstringop-overflow false positive but I also saw
a similar instance of -Warray-bounds with my patch to improve
caching and enhance array bounds checking.  I dealt with that
instance of the warning in that patch but proactively added
a test case to the fix for pr97595.  But the test case is focused
on the subobject access and not on one to the virtual table so
(as I said above) it didn't expose this bug.

Might this also impact optimizations?  I can imagine someone
unaware of this "gotcha" making the same "naive" assumption
I did, but I'd also expect such an invalid assumption to be
found either in code review or quickly cause problems in
testing.


Jeff, does this answer your question?


I don't see how the issue here depends on the artificiality of the vptr; 
I'd expect to see the same problem with a data member.  The problem is 
that a D base subobject is smaller than a complete D object, and in this 
case the base subobject is allocated such that if it were a full D 
object, it would overlap the end of E.  And we're checking the MEM_REF 
as though accessing a full D object, so we get a warning.


The general issue about the confusion b

Re: [WIP] Re: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp tests [PR98738]

2021-02-23 Thread Jakub Jelinek via Gcc-patches
On Tue, Feb 23, 2021 at 09:43:51PM +, Kwok Cheung Yeung wrote:
> On 19/02/2021 7:12 pm, Kwok Cheung Yeung wrote:
> > I have included the current state of my patch. All task-detach-* tests
> > pass when executed without offloading or with offloading to GCN, but
> > with offloading to Nvidia, task-detach-6.* hangs consistently but
> > everything else passes (probably because of the missing
> > gomp_team_barrier_done?).
> > 
> 
> It looks like the hang has nothing to do with the detach patch - this hangs
> consistently for me when offloaded to NVPTX:
> 
> #include 
> 
> int main (void)
> {
> #pragma omp target
>   #pragma omp parallel
> #pragma omp task
>   ;
> }
> 
> This doesn't hang when offloaded to GCN or the host device, or if
> num_threads(1) is specified on the omp parallel.

Then it can be solved separately, I'll try to have a look if I see something
bad from the dumps, but I admit I don't have much experience with debugging
NVPTX offloaded code...

Jakub



[PATCH] [libstdc++] Refactor/cleanup of atomic wait implementation

2021-02-23 Thread Thomas Rodgers
From: Thomas Rodgers 

* This revises the previous version to fix std::__condvar::wait_until() usage.

This is a substantial rewrite of the atomic wait/notify (and timed wait
counterparts) implementation.

The previous __platform_wait looped on EINTR however this behavior is
not required by the standard. A new _GLIBCXX_HAVE_PLATFORM_WAIT macro
now controls whether wait/notify are implemented using a platform
specific primitive or with a platform agnostic mutex/condvar. This
patch only supplies a definition for linux futexes. A future update
could add support __ulock_wait/wake on Darwin, for instance.

The members of __waiters were lifted to a new base class. The members
are now arranged such that overall sizeof(__waiters_base) fits in two
cache lines (on platforms with at least 64 byte cache lines). The
definition will also use destructive_interference_size for this if it
is available.

The __waiters type is now specific to untimed waits. Timed waits have a
corresponding __timed_waiters type. Much of the code has been moved from
the previous __atomic_wait() free function to the __waiter_base template
and a __waiter derived type is provided to implement the un-timed wait
operations. A similar change has been made to the timed wait
implementation.

The __atomic_spin code has been extended to take a spin policy which is
invoked after the initial busy wait loop. The default policy is to
return from the spin. The timed wait code adds a timed backoff spinning
policy. The code from  which implements this_thread::sleep_for,
sleep_until has been moved to a new  header
which allows the thread sleep code to be consumed without pulling in the
whole of .

The entry points into the wait/notify code have been restructured to
support either -
   * Testing the current value of the atomic stored at the given address
 and waiting on a notification.
   * Applying a predicate to determine if the wait was satisfied.
The entry points were renamed to make it clear that the wait and wake
operations operate on addresses. The first variant takes the expected
value and a function which returns the current value that should be used
in comparison operations, these operations are named with a _v suffix
(e.g. 'value'). All atomic<_Tp> wait/notify operations use the first
variant. Barriers, latches and semaphores use the predicate variant.

This change also centralizes what it means to compare values for the
purposes of atomic::wait rather than scattering through individual
predicates.

This change also centralizes the repetitive code which adjusts for
different user supplied clocks (this should be moved elsewhere
and all such adjustments should use a common implementation).

libstdc++-v3/ChangeLog:
* include/Makefile.am: Add new  header.
* include/Makefile.in: Regenerate.
* include/bits/atomic_base.h: Adjust all calls
to __atomic_wait/__atomic_notify for new call signatures.
* include/bits/atomic_wait.h: Extensive rewrite.
* include/bits/atomic_timed_wait.h: Likewise.
* include/bits/semaphore_base.h: Adjust all calls
to __atomic_wait/__atomic_notify for new call signatures.
* include/bits/std_thread_sleep.h: New file.
* include/std/atomic: Likewise.
* include/std/barrier: Likewise.
* include/std/latch: Likewise.
* testsuite/29_atomics/atomic/wait_notify/bool.cc: Simplify
test.
* testsuite/29_atomics/atomic/wait_notify/generic.cc: Likewise.
* testsuite/29_atomics/atomic/wait_notify/pointers.cc: Likewise.
* testsuite/29_atomics/atomic_flag/wait_notify.cc: Likewise.
* testsuite/29_atomics/atomic_float/wait_notify.cc: Likewise.
* testsuite/29_atomics/atomic_integral/wait_notify.cc: Likewise.
* testsuite/29_atomics/atomic_ref/wait_notify.cc: Likewise.
---
 libstdc++-v3/include/Makefile.am  |   1 +
 libstdc++-v3/include/Makefile.in  |   1 +
 libstdc++-v3/include/bits/atomic_base.h   |  36 +-
 libstdc++-v3/include/bits/atomic_timed_wait.h | 410 +++---
 libstdc++-v3/include/bits/atomic_wait.h   | 400 +++--
 libstdc++-v3/include/bits/semaphore_base.h|  73 +---
 libstdc++-v3/include/bits/std_thread_sleep.h  | 119 +
 libstdc++-v3/include/std/atomic   |  15 +-
 libstdc++-v3/include/std/barrier  |   4 +-
 libstdc++-v3/include/std/latch|   4 +-
 libstdc++-v3/include/std/thread   |  68 +--
 .../29_atomics/atomic/wait_notify/bool.cc |  37 +-
 .../29_atomics/atomic/wait_notify/generic.cc  |  19 +-
 .../29_atomics/atomic/wait_notify/pointers.cc |  36 +-
 .../29_atomics/atomic_flag/wait_notify/1.cc   |  37 +-
 .../29_atomics/atomic_float/wait_notify.cc|  26 +-
 .../29_atomics/atomic_integral/wait_notify.cc |  73 ++--
 .../29_atomics/atomic_ref/wait_notify.cc  |  74 +---
 18 files changed, 802 insertions(+), 631 deletions(-)
 create mode 100644 libstdc++-v3/include/bits/

Re: [PATCH] rs6000: Fix MMA API - Add support for compatibility built-ins

2021-02-23 Thread Peter Bergner via Gcc-patches
On 2/5/21 12:28 PM, Segher Boessenkool wrote:
> On Fri, Feb 05, 2021 at 04:11:30PM +0100, Florian Weimer wrote:
>> * Peter Bergner:
>>> On 2/5/21 4:28 AM, Florian Weimer wrote:
 Maybe add a check that the compatibility builtins are flagged as
 availble using __has_builtin?
>>>
>>> Do you mean add a test in the testsuite for this?  I can check on
>>> adding that to the test case.
>>
>> Right, in the test case.  Given that it's a new kind of built-in.
>> (Not sure if it makes sense.)
> 
> There aren't many such tests yet, so it will be helpful just because of
> that.  But __has_builtin should work for any builtin function
> whatsoever, and there is nothing special about these compatibility
> builtins (it is just a name, it is defined as any other).

__has_builtin does work for the compat builtins, so I added tests using it.
Here is the updated patch given the review comments:



rs6000: Add support for compatibility built-ins

The LLVM and GCC teams agreed to rename the __builtin_mma_assemble_pair and
__builtin_mma_disassemble_pair built-ins to __builtin_vsx_assemble_pair and
__builtin_vsx_disassemble_pair respectively.  It's too late to remove the
old names, so this patch renames the built-ins to the new names and then
adds support for creating compatibility built-ins (ie, multiple built-in
functions generate the same code) and then creates compatibility built-ins
using the old names.

This passed bootstrap and regtesting on powerpc64le-linux with no regressions.
Ok for mainline?

This will need backporting to GCC 10.  Ok there too once it's baked on
trunk for a little while?

Peter

2021-02-23  Peter Bergner  

gcc/
* config/rs6000/mma.md (mma_assemble_pair): Rename from this...
(vsx_assemble_pair): ...to this.
(*mma_assemble_pair): Rename from this...
(*vsx_assemble_pair): ...to this.
(mma_disassemble_pair): Rename from this...
(vsx_disassemble_pair): ...to this.
(*mma_disassemble_pair): Rename from this...
(*vsx_disassemble_pair): ...to this.
* gcc/config/rs6000/rs6000-builtin.def (BU_MMA_V2, BU_MMA_V3,
BU_COMPAT): New macros.
(mma_assemble_pair): Rename from this...
(vsx_assemble_pair): ...to this.
(mma_disassemble_pair): Rename from this...
(vsx_disassemble_pair): ...to this.
(mma_assemble_pair): Add compatibility built-in.
(mma_disassemble_pair): Likewise.
* config/rs6000/rs6000-call.c (struct builtin_compatibility): New.
(RS6000_BUILTIN_COMPAT): Define.
(bdesc_compat): New.
(mma_expand_builtin): Use VSX_BUILTIN_DISASSEMBLE_PAIR_INTERNAL.
(rs6000_gimple_fold_mma_builtin): Use MMA_BUILTIN_DISASSEMBLE_PAIR
and VSX_BUILTIN_ASSEMBLE_PAIR.
(rs6000_init_builtins): Register compatibility built-ins.
(mma_init_builtins): USE VSX_BUILTIN_ASSEMBLE_PAIR,
VSX_BUILTIN_ASSEMBLE_PAIR_INTERNAL, VSX_BUILTIN_DISASSEMBLE_PAIR and
VSX_BUILTIN_DISASSEMBLE_PAIR_INTERNAL.
* doc/extend.texi (__builtin_mma_assemble_pair): Rename from this...
(__builtin_vsx_assemble_pair): ...to this.
(__builtin_mma_disassemble_pair): Rename from this...
(__builtin_vsx_disassemble_pair): ...to this.

gcc/testsuite/
* gcc/testsuite/gcc.target/powerpc/mma-builtin-4.c: Add tests for
__builtin_vsx_assemble_pair and __builtin_vsx_disassemble_pair.
Add __has_builtin tests for built-ins.
Update expected instruction counts.

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 87569f1c31d..c40501f2e09 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -321,7 +321,7 @@
(set_attr "length" "*,*,16")
(set_attr "max_prefixed_insns" "2,2,*")])
 
-(define_expand "mma_assemble_pair"
+(define_expand "vsx_assemble_pair"
   [(match_operand:OO 0 "vsx_register_operand")
(match_operand:V16QI 1 "mma_assemble_input_operand")
(match_operand:V16QI 2 "mma_assemble_input_operand")]
@@ -334,7 +334,7 @@
   DONE;
 })
 
-(define_insn_and_split "*mma_assemble_pair"
+(define_insn_and_split "*vsx_assemble_pair"
   [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
@@ -351,7 +351,7 @@
   DONE;
 })
 
-(define_expand "mma_disassemble_pair"
+(define_expand "vsx_disassemble_pair"
   [(match_operand:V16QI 0 "mma_disassemble_output_operand")
(match_operand:OO 1 "vsx_register_operand")
(match_operand 2 "const_0_to_1_operand")]
@@ -366,7 +366,7 @@
   DONE;
 })
 
-(define_insn_and_split "*mma_disassemble_pair"
+(define_insn_and_split "*vsx_disassemble_pair"
   [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
(unspec:V16QI [(match_operand:OO 1 "vsx_register_operand" "wa")
  (match_operand 2 "const_0_to_1_operand")]
diff --git a/gcc/config/rs600

[PATCH, rs6000] Rename variable for clarity

2021-02-23 Thread Pat Haugen via Gcc-patches
Rename next_insn_prefixed_p for improved clarity.

Bootstrap/regtest on powerpc64le with no new regressions. Ok for trunk?

-Pat


2021-02-22  Pat Haugen  

gcc/
* config/rs6000/rs6000.c (next_insn_prefixed_p): Rename.
(rs6000_final_prescan_insn): Adjust.
(rs6000_asm_output_opcode): Likewise.


diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index ec068c58aa5..4e608073358 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -26191,7 +26191,7 @@ prefixed_paddi_p (rtx_insn *insn)
 
 /* Whether the next instruction needs a 'p' prefix issued before the
instruction is printed out.  */
-static bool next_insn_prefixed_p;
+static bool prepend_p_to_next_insn;
 
 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
outputting the assembler code.  On the PowerPC, we remember if the current
@@ -26202,7 +26202,7 @@ static bool next_insn_prefixed_p;
 void
 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
 {
-  next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
+  prepend_p_to_next_insn = (get_attr_prefixed (insn) != PREFIXED_NO);
   return;
 }
 
@@ -26212,7 +26212,7 @@ rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
 void
 rs6000_asm_output_opcode (FILE *stream)
 {
-  if (next_insn_prefixed_p)
+  if (prepend_p_to_next_insn)
 fprintf (stream, "p");
 
   return;


Re: [PATCH 4/4] libstdc++: More efficient last day of month.

2021-02-23 Thread Matthias Kretz
I like the idea.

On Dienstag, 23. Februar 2021 14:25:10 CET Cassio Neri via Libstdc++ wrote:
> ((__m ^ (__m >> 3)) & 1) | 30

Note that you can drop the `& 1` part. 30 in binary is 0b0. ORing with a 
value in [0, 0b01101] will only toggle the last bit.

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 std::experimental::simd  https://github.com/VcDevel/std-simd
──


Re: [PATCH] doc: c: c++: Document the C/C++ extended asm empty input constraints

2021-02-23 Thread Segher Boessenkool
Hi!

On Tue, Feb 23, 2021 at 12:05:34AM +, Neven Sajko wrote:
> On Mon, 22 Feb 2021 at 16:30, Segher Boessenkool
>  wrote:
> > On Mon, Feb 15, 2021 at 11:22:52PM +, Neven Sajko via Gcc-patches wrote:
> > > There is a long-standing, but undocumented GCC inline assembly feature
> > > that's part of the extended asm GCC extension to C and C++: extended
> > > asm empty input constraints.
> >
> > There is no such thing.  *All* empty constraints have the same
> > semantics: anything whatsoever will do.  Any register, any constant, any
> > memory.
> 
> What I was trying to express is that input operand constraints are
> unlike output operand constraints in that they can be empty. I now
> realize I ended up being slightly confusing, though.

Ah, that is actually true for inline asm, yes; as long as you use only
a single alternative.  If you have more alternatives you can use an
empty constraint in an output just fine (with just one constraint you
will run into the "operand is not directly addressable" check, which
acdtually should *pass* for empty constraints).  Also, all this (except
that check, which is for inline asm only) is identical for the RTL that
GCC uses internally, and there empty constraints are quite common (for
situations where we really do not care what the operand is; almost
always the operands are constrained some other way already then).

So
  int f(void) { int x; asm("" : "=,r"(x)); }
works fine.

> > A length zero string is allowed as well.  This could be made more
> > explicit sure; OTOH, it isn't very often useful.  So your example
> > (using it for making a dependency) is certainly useful to have.  But
> > it is not a special case at all.
> 
> Syntactically, it's not a special case; but I definitely think the
> semantics could be better documented. Proof:
> 
> * There's a relevant Stack Overflow question.

There are SO questions for anything not obvious to some.  And you cannot
trust most answers there, either.

> If I didn't know better
> I'd conclude from the discussion there that empty input constraints
> are undocumented and unsupported,

They are not *usable* for almost anything.

> and there would surely be an answer
> if the documentation on the GCC side was a bit better:

Sure.  On the other hand, this is just one case of a much more general
issue: it is easy to write incorrect inline asm.

> * Clang erroneously doesn't support empty constraints for many years
> now (even though their internal documentation still says empty input
> constraints are supported, and external documentation says they
> support all the same constraints as GCC does).

Yes, they do not support many other features of inline asm either, and
they do not implement the same semantics for basic constructs.

> > > An empty input
> > > +constraint can be used to create an artificial dependency on a C or C++
> > > +variable (the variable that appears in the expression associated with the
> > > +constraint) without incurring unnecessary costs to performance.
> >
> > It still needs a register (or memory) reserved there (or sometimes a
> > constant can be used, but you have no dependency in that case!)
> 
> Yeah, this is a bit more complicated than I perhaps implied. An asm
> volatile can tell the compiler "I need this value calculated at this
> point",

No, that is not what asm volatile is or does.

"asm volatile" means the asm has an unspecified needed side effect.  In
other words, it has to be executed on the real machine exactly like on
the abstract machine; as often, and in the same order.

A volatile asm can be moved out of loops, or even out of functions, and
other similar things, just fine.

(All inline asms without outputs are always volatile (they could just
always be deleted if that was not true), you do not often need to say
"asm volatile" explicitly; mostly if the asm changes some machine state
the compiler does not know about, which you do not see in user code a
lot, just in OS code etc.)

> but the compiler may still choose to eliminate the calculation
> from the generated code if it can perform it itself at compilation
> time.

The compiler can *never* do what an asm does.  The compiler can never
know what a piece of assembler code (if that is what it is!) means!

If the compiler knows it will not need any outputs from the asm, and
there are no other side effects needed either, then it can delete the
asm.

> > > Specific applications may include direct
> > > +interaction with hardware features; or things like testing, fuzzing and
> > > +benchmarking.
> >
> > What does this mean?
> 
> The manual already has examples for "direct interaction with hardware 
> features".

I could not find anything with non-trivial length substrings of that in
the manual.  Do you have an URL maybe?  Something under
  https://gcc.gnu.org/onlinedocs/gcc/
preferably.

> > Here is a simple example showing why this isn't as simple to use as
> > you imply here:
> >
> > ===
> > void f(int x)
> > {
> > asm volatile("" :

Re: [PATCH] rs6000: Fix MMA API - Add support for compatibility built-ins

2021-02-23 Thread Segher Boessenkool
Hi!

On Tue, Feb 23, 2021 at 04:00:42PM -0600, Peter Bergner wrote:
>   (mma_assemble_pair): Add compatibility built-in.
s/Add/New/ is better (it makes clear you do not add something to the
(already existing) mma_assemble_pair, that it is in fact new here).

>   (mma_init_builtins): USE VSX_BUILTIN_ASSEMBLE_PAIR,

s/USE/Use/

The patch is okay for trunk and for 10.  Thank you!


Segher


Re: [PATCH, rs6000] Rename variable for clarity

2021-02-23 Thread Segher Boessenkool
Hi!

On Tue, Feb 23, 2021 at 04:12:42PM -0600, Pat Haugen wrote:
> gcc/
>   * config/rs6000/rs6000.c (next_insn_prefixed_p): Rename.
>   (rs6000_final_prescan_insn): Adjust.
>   (rs6000_asm_output_opcode): Likewise.

Excellent.  Okay for trunk and 10.  Thank you!


Segher


Re: PING [PATCH] avoid -Warray-bounds checks for vtable assignments (PR 98266)

2021-02-23 Thread Martin Sebor via Gcc-patches

On 2/23/21 2:52 PM, Jason Merrill wrote:

On 2/23/21 11:02 AM, Martin Sebor wrote:

[CC Jason for any further comments/clarification]

On 2/9/21 10:49 AM, Martin Sebor wrote:

On 2/8/21 4:11 PM, Jeff Law wrote:



On 2/8/21 3:44 PM, Martin Sebor wrote:

On 2/8/21 3:26 PM, Jeff Law wrote:



On 2/8/21 2:56 PM, Martin Sebor wrote:

On 2/8/21 12:59 PM, Jeff Law wrote:



On 1/19/21 5:56 PM, Martin Sebor via Gcc-patches wrote:

Similar to the problem reported for -Wstringop-overflow in pr98266
and already fixed, -Warray-bounds is also susceptible to false
positives in assignments and copies involving virtual inheritance.
Because the two warnings don't share code yet (hopefully in GCC 
12)

the attached patch adds its own workaround for this problem to
gimple-array-bounds.cc, this one slightly more crude because of
the limited insight the array bounds checking has into the checked
expressions.

Tested on x86_64-linux.

Martin

gcc-98266.diff

PR middle-end/98266 - bogus array subscript is partly outside 
array

bounds on virtual inheritance

gcc/ChangeLog:

  PR middle-end/98266
  * gimple-array-bounds.cc
(array_bounds_checker::check_array_bounds):
  Avoid checking references involving artificial members.

gcc/testsuite/ChangeLog:

  PR middle-end/98266
  * g++.dg/warn/Warray-bounds-15.C: New test.

It seems to me that we've got the full statement at some point  and
thus
the full expression so at some point couldn't we detect when
TYPE_SIZE_UNIT!= DECL_SIZE_UNIT?  Or should we be using 
TYPE_SIZE_UNIT

rather than DECL_SIZE_UNIT in gimple-array-bounds.cc

Am I missing something?


The expression we're looking at when the false positive is issued
is the MEM_REF in the LHS of:

MEM[(struct D *)&D.2652 + 24B]._vptr.D = &MEM  [(void
*)&_ZTC1E24_1D + 24B];

TREE_TYPE(LHS) is D, DECL_SIZE_UNIT (D.2652) is 24, and
TYPE_SIZE_UNIT(D) is also 24, so there's no discrepancy between
DECL_SIZE and TYPE_SIZE.

So that seems like it's a different issue then, unrelated to 97595.
Right?


I think the underlying problem is the same.  We're getting a size
that doesn't correspond to what's actually being accessed, and it
happens because of the virtual inheritance.  In pr97595 Jason
suggested to use the decl/type size inequality to identify this
case but I think we could have just as well used DECL_ARTIFICIAL
instead.  At least the test cases from pr97595 both pass with
this change.

But in the 98266 case the type and decl sizes are the same.  So to be
true that would mean that the underlying type we're using to access
memory differs from its actual type.  Is that the case in the IL?  And
does this have wider implications for diagnostics or optimizations that
rely on accurate type sizing?

I'm just trying to make sure I understand, not accepting or rejecting
the patch yet.


The part of the IL with the MEM_REF is this:

void g ()
{
   void * D.2789;
   struct E D.2652;

    [local count: 1073741824]:
   E::E (&D.2652, "");
   f (&D.2652);

    [local count: 1073741824]:
   MEM[(struct D *)&D.2652 + 24B]._vptr.D = &MEM  [(void 
*)&_ZTC1E24_1D + 24B];

   ...

The access here is to the _vptr.D pointer member of D.2652 which is
just past the end of the parent object (as reflected by its SIZE):
it sets sets up the virtual table pointer.

The access in pr97595 is to the member subobject, which, as Jason
explained (and I accordingly documented under DECL_SIZE in tree.h),
is also laid out separately from the parent object.

These cases aren't exactly the same (which is also why the test
I added for -Warray-bounds in pr97595 didn't expose this bug) but
they are closely related.  The one here can be distinguished by
DECL_ARTIFICAL.  The other by the DECL_SIZE != TYPE_SIZE member
inequality.

Might this impact other warnings?  I'd say so if they don't take
these things into account.  I just learned about this in pr97595
which was a -Wstringop-overflow false positive but I also saw
a similar instance of -Warray-bounds with my patch to improve
caching and enhance array bounds checking.  I dealt with that
instance of the warning in that patch but proactively added
a test case to the fix for pr97595.  But the test case is focused
on the subobject access and not on one to the virtual table so
(as I said above) it didn't expose this bug.

Might this also impact optimizations?  I can imagine someone
unaware of this "gotcha" making the same "naive" assumption
I did, but I'd also expect such an invalid assumption to be
found either in code review or quickly cause problems in
testing.


Jeff, does this answer your question?


I don't see how the issue here depends on the artificiality of the vptr; 
I'd expect to see the same problem with a data member.  The problem is 
that a D base subobject is smaller than a complete D object, and in this 
case the base subobject is allocated such that if it were a full D 
object, it would overlap the end of E.  And we're checking the MEM_REF 
as though accessing a full D object, so we get a warn

Re: [PATCH] rs6000: Fix MMA API - Add support for compatibility built-ins

2021-02-23 Thread Peter Bergner via Gcc-patches
On 2/23/21 4:53 PM, Segher Boessenkool wrote:
> Hi!
> 
> On Tue, Feb 23, 2021 at 04:00:42PM -0600, Peter Bergner wrote:
>>  (mma_assemble_pair): Add compatibility built-in.
> s/Add/New/ is better (it makes clear you do not add something to the
> (already existing) mma_assemble_pair, that it is in fact new here).
> 
>>  (mma_init_builtins): USE VSX_BUILTIN_ASSEMBLE_PAIR,
> 
> s/USE/Use/
> 
> The patch is okay for trunk and for 10.  Thank you!

Fixed and pushed to trunk.  I'll push the backport after a day or two
of burn in on trunk.  Thanks!

Peter




[PATCH v2] rs6000: Convert the vector element register to SImode [PR98914]

2021-02-23 Thread Xionghu Luo via Gcc-patches
vec_insert defines the element argument type to be signed int by ELFv2
ABI, When expanding a vector with a variable rtx, convert the rtx type
SImode.

gcc/ChangeLog:

2021-02-24  Xionghu Luo  

PR target/98914
* config/rs6000/rs6000.c (rs6000_expand_vector_set): Convert
elt_rtx to SImode.
(rs6000_expand_vector_set_var_p9): Remove assert.
(rs6000_expand_vector_set_var_p8): Likewise.

gcc/testsuite/ChangeLog:

2021-02-24  Xionghu Luo  

* gcc.target/powerpc/pr98914.c: New test.
---
 gcc/config/rs6000/rs6000.c | 17 ++---
 gcc/testsuite/gcc.target/powerpc/pr98914.c | 11 +++
 2 files changed, 21 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr98914.c

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index ec068c58aa5..9f7f8da56c6 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -7000,8 +7000,6 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx 
idx)
 
   gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
 
-  gcc_assert (GET_MODE (idx) == E_SImode);
-
   machine_mode inner_mode = GET_MODE (val);
 
   rtx tmp = gen_reg_rtx (GET_MODE (idx));
@@ -7047,8 +7045,6 @@ rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx 
idx)
 
   gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
 
-  gcc_assert (GET_MODE (idx) == E_SImode);
-
   machine_mode inner_mode = GET_MODE (val);
   HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
 
@@ -7144,7 +7140,7 @@ rs6000_expand_vector_set (rtx target, rtx val, rtx 
elt_rtx)
   machine_mode mode = GET_MODE (target);
   machine_mode inner_mode = GET_MODE_INNER (mode);
   rtx reg = gen_reg_rtx (mode);
-  rtx mask, mem, x;
+  rtx mask, mem, x, elt_si;
   int width = GET_MODE_SIZE (inner_mode);
   int i;
 
@@ -7154,16 +7150,23 @@ rs6000_expand_vector_set (rtx target, rtx val, rtx 
elt_rtx)
 {
   if (!CONST_INT_P (elt_rtx))
{
+ /* elt_rtx should be SImode from ELFv2 ABI.  */
+ elt_si = gen_reg_rtx (E_SImode);
+ if (GET_MODE (elt_rtx) != E_SImode)
+   convert_move (elt_si, elt_rtx, 0);
+ else
+   elt_si = elt_rtx;
+
  /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
 when elt_rtx is variable.  */
  if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
{
- rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
+ rs6000_expand_vector_set_var_p9 (target, val, elt_si);
  return;
}
  else if (TARGET_P8_VECTOR && TARGET_DIRECT_MOVE_64BIT)
{
- rs6000_expand_vector_set_var_p8 (target, val, elt_rtx);
+ rs6000_expand_vector_set_var_p8 (target, val, elt_si);
  return;
}
}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr98914.c 
b/gcc/testsuite/gcc.target/powerpc/pr98914.c
new file mode 100644
index 000..e4d78e3e6b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr98914.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-Og -mvsx" } */
+
+vector int
+foo (vector int v)
+{
+  for (long k = 0; k < 1; ++k)
+v[k] = 0;
+  return v;
+}
-- 
2.25.1



Re: PR 96391? Can we fix it for gcc11?

2021-02-23 Thread Richard Biener
On Tue, 23 Feb 2021, Qing Zhao wrote:

> Hi, Richard,
> 
> > On Feb 9, 2021, at 11:36 AM, Richard Biener  wrote:
> > 
> > On Tue, 9 Feb 2021, Qing Zhao wrote:
> >> 
> >> Yes, I understand that without a working testing case to repeat the error, 
> >> it’s very hard to debug and fix the issue. 
> >> 
> >> However, providing a testing case for this bug is really challenging from 
> >> our side due to multiple reasons…
> >> 
> >> 
> > 
> > Note you can try reducing a proprietary testcase with tools like
> > cvise or creduce.  Does your case also happen in a mingw/windows
> > environment?
> 
> We are trying to install a creduce on our system, and noticed that it depend 
> on LLVM, I am wondering whether there is
> a similar tool that depends on GCC? 

There's 'delta' but it doesn't rely on GCC internals but avoids the llvm
dependency.  Note that using the clang parser the reduction can work
way better, it's still GCC that is used to see if the reduction is valid.

Richard.

> Qing
> > 
> > Richard.
> > 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


Re: [PATCH v2 0/5] RISC-V big endian support

2021-02-23 Thread Kito Cheng via Gcc-patches
Hi Marcus:

I just spend some time on those two testcase, I think this those two
testcase could just skip in big-endinan.

> FAIL: gcc.target/riscv/shift-and-1.c scan-assembler-not andi
> FAIL: gcc.target/riscv/shift-and-2.c scan-assembler-not andi

However seems like rv32be has still has some strange fail there,
do you mind take a look for that?

../configure --prefix=$PREFIX --with-arch=rv32gc
--with-multilib-generator=rv32gc-ilp32--


diff --git a/gcc/testsuite/gcc.target/riscv/shift-and-1.c
b/gcc/testsuite/gcc.target/riscv/shift-and-1.c
index d1f3a05db2c..6f4dccc709f 100644
--- a/gcc/testsuite/gcc.target/riscv/shift-and-1.c
+++ b/gcc/testsuite/gcc.target/riscv/shift-and-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv32gc -mabi=ilp32 -O" } */
+/* { dg-options "-march=rv32gc -mabi=ilp32 -O -mlittle-endian" } */

/* Test for si3_mask.  */
int
diff --git a/gcc/testsuite/gcc.target/riscv/shift-and-2.c
b/gcc/testsuite/gcc.target/riscv/shift-and-2.c
index 2c98e50101b..19ce5a60b30 100644
--- a/gcc/testsuite/gcc.target/riscv/shift-and-2.c
+++ b/gcc/testsuite/gcc.target/riscv/shift-and-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target { riscv64*-*-* } } } */
-/* { dg-options "-march=rv64gc -mabi=lp64 -O" } */
+/* { dg-options "-march=rv64gc -mabi=lp64 -O -mlittle-endian" } */

/* Test for si3_mask_1.  */
extern int k;
On Tue, Feb 23, 2021 at 3:23 PM Marcus Comstedt  wrote:
>
>
> Hi Kito,
>
> Kito Cheng  writes:
>
> > FAIL: gcc.c-torture/execute/string-opt-5.c
> > FAIL: gcc.target/riscv/shift-and-1.c scan-assembler-not andi
> > FAIL: gcc.target/riscv/shift-and-2.c scan-assembler-not andi
>
> string-opt-5.c is one of the newlib issues I mentioned (handcoded
> assembler for strcmp which assumed LE (it was intended to #error out
> on BE, but used "BYTE_ORDER" instead of "__BYTE_ORDER__", so the check
> never worked)).  I'll send the fixes later today.
>
> The shift-and tests don't generate incorrect code or anything, but
> it's still puzzling why the generated code is different from with
> -mlittle-endian.
>
>
>   // Marcus
>
>