date:20220823

[PATCH] middle-end: Fix unexpected warnings for RISC-V port.

2022-08-23 Thread juzhe . zhong

From: zhongjuzhe 

gcc/ChangeLog:

* tree-vect-loop-manip.cc (vect_gen_vector_loop_niters): Simply 
initialize const_vf to 0.

---
 gcc/tree-vect-loop-manip.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 86d2264054a..c3a24e6c39a 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1977,8 +1977,9 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, 
tree niters,
 }
   else
 ni_minus_gap = niters;
-
-  unsigned HOST_WIDE_INT const_vf;
+  
+  /* To silence some unexpected warnings, simply initialize to 0. */
+  unsigned HOST_WIDE_INT const_vf = 0;
   if (vf.is_constant (&const_vf)
   && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
 {
-- 
2.36.1

Re: [PATCH] middle-end: Fix unexpected warnings for RISC-V port.

2022-08-23 Thread Richard Biener via Gcc-patches

On Tue, 23 Aug 2022, juzhe.zh...@rivai.ai wrote:

> From: zhongjuzhe 

OK.

> gcc/ChangeLog:
> 
> * tree-vect-loop-manip.cc (vect_gen_vector_loop_niters): Simply 
> initialize const_vf to 0.
> 
> ---
>  gcc/tree-vect-loop-manip.cc | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index 86d2264054a..c3a24e6c39a 100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -1977,8 +1977,9 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, 
> tree niters,
>  }
>else
>  ni_minus_gap = niters;
> -
> -  unsigned HOST_WIDE_INT const_vf;
> +  
> +  /* To silence some unexpected warnings, simply initialize to 0. */
> +  unsigned HOST_WIDE_INT const_vf = 0;
>if (vf.is_constant (&const_vf)
>&& !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
>  {
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

Re: [PATCH] Enhance final_value_replacement_loop to handle bitop with an invariant induction.[PR105735]

2022-08-23 Thread Richard Biener via Gcc-patches

On Thu, Aug 18, 2022 at 8:48 AM Kong, Lingling via Gcc-patches
 wrote:
>
> Hi,
>
> This patch is for pr105735/pr101991. It will enable below optimization:
> {
> -  long unsigned int bit;
> -
> -   [local count: 32534376]:
> -
> -   [local count: 1041207449]:
> -  # tmp_10 = PHI 
> -  # bit_12 = PHI 
> -  tmp_7 = bit2_6(D) & tmp_10;
> -  bit_8 = bit_12 + 1;
> -  if (bit_8 != 32)
> -goto ; [96.97%]
> -  else
> -goto ; [3.03%]
> -
> -   [local count: 1009658865]:
> -  goto ; [100.00%]
> -
> -   [local count: 32534376]:
> -  # tmp_11 = PHI 
> -  return tmp_11;
> +  tmp_11 = tmp_4(D) & bit2_6(D);
> +  return tmp_11;
>
> }
>
> Ok for master ?
>
> gcc/ChangeLog:
>
> PR middle-end/105735
> * match.pd (bitop_with_inv_p): New match.
> * tree-scalar-evolution.cc (gimple_bitop_with_inv_p): Declare.
> (analyze_and_compute_bitop_with_inv_effect): New function.
> (final_value_replacement_loop): Enhanced to handle bitop
> with inv induction.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr105735-1.c: New test.
> * gcc.target/i386/pr105735-2.c: New test.
> ---
>  gcc/match.pd   |  4 +
>  gcc/testsuite/gcc.target/i386/pr105735-1.c | 88 ++  
> gcc/testsuite/gcc.target/i386/pr105735-2.c | 28 +++
>  gcc/tree-scalar-evolution.cc   | 59 +++
>  4 files changed, 179 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-2.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd index 562138a8034..cfe593ebb02 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -8050,6 +8050,10 @@ and,
>   (bit_not
>(nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 @2)) @3
>
> +(for bit_op (bit_and bit_ior bit_xor)
> + (match (bitop_with_inv_p @0 @1)
> +  (bit_op:c @0 @1)))
> +

That's a single-stmt match, you shouldn't use match.pd matching for
this.  Instead
just do

  if (is_gimple_assign (stmt)
  && ((code = gimple_assign_rhs_code (stmt)), true)
  && (code == BIT_AND_EXPR || code == BIT_IOR_EXPR || code == BIT_XOR_EXPR))
   ..

and pick gimple_assign_rhs{1,2} (stmt) as the operands.  The :c in
bit_op:c is redundant
btw. - while the name suggests "with invariant" you don't actually
check for that.  But again,
given canonicalization rules the invariant will be rhs2 so above add

&& TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST

for example.

>  /* n - (((n > C1) ? n : C1) & -C2) ->  n & C1 for unsigned case.
> n - (((n > C1) ? n : C1) & -C2) ->  (n <= C1) ? n : (n & C1) for signed 
> case.  */  (simplify diff --git a/gcc/testsuite/gcc.target/i386/pr105735-1.c 
> b/gcc/testsuite/gcc.target/i386/pr105735-1.c
> new file mode 100644
> index 000..8d2123ed351
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr105735-1.c
> @@ -0,0 +1,88 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-sccp-details" } */
> +/* { dg-final { scan-tree-dump-times {final value replacement} 8 "sccp"
> +} } */
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo (unsigned long long tmp, unsigned long long bit2) {

you probably need dg-require-effective-target longlong,
but is it necessary to use long long for the testcases in the first place?
The IV seems to be unused, if it should match the variables bit size
use sizeof (type) * 8

> +  for (int bit = 0; bit < 64; bit++)
> +tmp &= bit2;
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo1 (unsigned long long tmp, unsigned long long bit2) {
> +  for (int bit = 63; bit >= 0; bit -=3)
> +tmp &= bit2;
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo2 (unsigned long long tmp, unsigned long long bit2) {
> +  for (int bit = 0; bit < 64; bit++)
> +tmp |= bit2;
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo3 (unsigned long long tmp, unsigned long long bit2) {
> +  for (int bit = 63; bit >= 0; bit -=3)
> +tmp |= bit2;
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo4 (unsigned long long tmp, unsigned long long bit2) {
> +  for (int bit = 0; bit < 64; bit++)
> +tmp ^= bit2;
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo5 (unsigned long long tmp, unsigned long long bit2) {
> +  for (int bit = 0; bit < 63; bit++)
> +tmp ^= bit2;
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +f (unsigned long long tmp, long long bit, unsigned long long bit2) {
> +  unsigned long long res = tmp;
> +  for (long long i = 0; i < bit; i++)
> +res &= bit2;
> +  return res;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +f1 (unsigned long long tmp, long long bit, unsigned long long bit2) {
> +  unsigned long long res = tmp;
> +  for (long long i = 0; i < bit; i++)
> +res |= bit2;
> +  return res;
> +}
> +
> +unsigned long long
>

Patch ping (Re: [PATCH] Implement __builtin_issignaling)

2022-08-23 Thread Jakub Jelinek via Gcc-patches

On Tue, Aug 16, 2022 at 11:41:06AM +, Richard Biener wrote:
> I'm OK with the rest of the patch if Joseph doesn't have comments
> on the actual issignaling lowerings (which I didn't review for
> correctness due to lack of knowledge).

I'd like to ping this patch.
Patch: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599697.html
with
https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599794.html
https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599800.html
incremental additions.
gcc/testsuite/gcc.dg/torture/builtin-issignaling-1.c's testcase f4-f7
functions (i.e. functions which just return __builtin_issignaling of
the argument which is float, double, long double and _Float128 on
x86_64-linux) with -O2 are in assembly:

.p2align 4
.globl  f4
.type   f4, @function
f4:
.LFB0:
.cfi_startproc
movd%xmm0, %eax
xorl$4194304, %eax
andl$2147483647, %eax
cmpl$2143289344, %eax
seta%al
movzbl  %al, %eax
ret
.cfi_endproc
.LFE0:
.size   f4, .-f4
.p2align 4
.globl  f5
.type   f5, @function
f5:
.LFB1:
.cfi_startproc
movabsq $9221120237041090560, %rdx
movq%xmm0, %rax
btcq$51, %rax
btrq$63, %rax
cmpq%rax, %rdx
setb%al
movzbl  %al, %eax
ret
.cfi_endproc
.LFE1:
.size   f5, .-f5
.p2align 4
.globl  f6
.type   f6, @function
f6:
.LFB2:
.cfi_startproc
fldt8(%rsp)
fstpt   -24(%rsp)
movl-20(%rsp), %edx
movl-24(%rsp), %eax
movl%edx, %ecx
negl%eax
orl -24(%rsp), %eax
notl%edx
xorl$1073741824, %ecx
shrl$31, %eax
orl %ecx, %eax
movzwl  -16(%rsp), %ecx
cmpl$-1073741824, %eax
movl%ecx, %esi
seta%al
notl%ecx
andl$32767, %esi
movzbl  %al, %eax
negl%esi
testw   $32767, %cx
sete%cl
andl%esi, %edx
movzbl  %cl, %ecx
shrl$31, %edx
andl%ecx, %eax
orl %edx, %eax
ret
.cfi_endproc
.LFE2:
.size   f6, .-f6
.p2align 4
.globl  f7
.type   f7, @function
f7:
.LFB3:
.cfi_startproc
movaps  %xmm0, -24(%rsp)
movq-24(%rsp), %rsi
movq-16(%rsp), %rdi
movq%rsi, %rdx
movq%rdi, %rax
negq%rdx
btcq$47, %rax
orq %rsi, %rdx
shrq$63, %rdx
orq %rdx, %rax
movabsq $9223231299366420480, %rdx
btrq$63, %rax
cmpq%rax, %rdx
setb%al
movzbl  %al, %eax
ret
.cfi_endproc
.LFE3:
.size   f7, .-f7

Thanks

Jakub

Re: [PATCH 1/3] libstdc++: Separate construct/convertibility tests for std::tuple

2022-08-23 Thread Jonathan Wakely via Gcc-patches

On Tue, 23 Aug 2022 at 02:35, Patrick Palka via Libstdc++
 wrote:
>
> P2321R2 adds new conditionally explicit constructors to std::tuple which
> we'll concisely implement in a subsequent patch using explicit(bool), like
> in our C++20 std::pair implementation.  But before we can do that, this
> patch first adds members to _TupleConstraints that test for constructibility
> and convertibility separately; we'll use the first in the new constructors'
> constraints, and the second in their explicit specifier.
>
> In passing, this patch also redefines the existing predicates
> __is_ex/implicitly_constructible in terms of these new members.  This
> seems to reduce compile time and memory usage by about 10% for large

Nice.

> tuples when using the relevant constructors constrained by
> _Explicit/_ImplicitCtor (since we no longer have to redundantly expand
> and process is_constructible<_Types, _UTypes>... twice for each pair of
> such constructors).  In order to retain maximal short circuiting, do
> this only when constexpr if is available.

Would we get similar benefits for C++11 and C++14 by doing:

   return __and_<__and_...>,
 __and_...>
 >::value;

This is slightly more work in total, but if we have __and_ and
__and_> then the A and B instantiations will be cached and
can be reused.


> Tested on x86_64-pc-linux-gnu, does this look OK for trunk?

Yes, thanks.

Re: [PATCH] RISC-V: Add runtime invariant support

2022-08-23 Thread Richard Biener via Gcc-patches

On Mon, Aug 22, 2022 at 8:15 AM Richard Biener via Gcc-patches
 wrote:
>
> On Sat, 20 Aug 2022, Andrew Pinski wrote:
>
> > On Sat, Aug 20, 2022 at 3:34 PM Andreas Schwab  
> > wrote:
> > >
> > > This breaks bootstrap:
> > >
> > > ../../gcc/tree-vect-loop-manip.cc: In function 'void 
> > > vect_gen_vector_loop_niters(loop_vec_info, tree, tree_node**, 
> > > tree_node**, bool)':
> > > ../../gcc/tree-vect-loop-manip.cc:1981:26: error: 'const_vf' may be used 
> > > uninitialized [-Werror=maybe-uninitialized]
> > >  1981 |   unsigned HOST_WIDE_INT const_vf;
> > >   |  ^~~~
> > > cc1plus: all warnings being treated as errors
> > > make[3]: *** [Makefile:1146: tree-vect-loop-manip.o] Error 1
> > > make[2]: *** [Makefile:4977: all-stage2-gcc] Error 2
> > > make[1]: *** [Makefile:30363: stage2-bubble] Error 2
> > > make: *** [Makefile:1065: all] Error 2
> >
> >
> > This looks like a real uninitialized variable issue.
> > I even can't tell if the paths that lead to using const_vf will be
> > always set so how we expect GCC to do the same.
> > The code that uses const_vf was added with r11-5820-cdcbef3c3310,
> > CCing the author there.
>
> The key is
>
>   tree log_vf = NULL_TREE;
> ...
>   unsigned HOST_WIDE_INT const_vf;
>   if (vf.is_constant (&const_vf)
>   && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
> {
> ...
>   log_vf = build_int_cst (type, exact_log2 (const_vf));
> ...
> }
> ...
>   if (stmts != NULL && log_vf)
> {
> ... use const_vf ...
>
> so it's uninit analysis little mind that is confused.  There is code
> that's supposed to handle the situation (setting flag under condition,
> testing that flag instead of condition) but maybe it's too twisted
> here.  One could refector this as
>
>  bool const_vf_p = vf.is_constant (&const_vf);
>  if (const_vf_p
>  && ...)
> ...
>if (stmts != NULL && const_vf_p)
> ...
>
> and hope uninits mind is good enough to see log_vf is not used
> uninitialized.
>
> I can also look into why uninit doesn't get it, but preprocessed
> source would be handy then.

Btw, the riscv speciality is

gcc/config/riscv/riscv.h:#define LOGICAL_OP_NON_SHORT_CIRCUIT 0

with --param logical-op-non-short-circuit=1 the diagnostic does not occur.

>
> Richard.

[PATCH] Add newline when checking path profitability.

2022-08-23 Thread Aldy Hernandez via Gcc-patches

It looks like we're missing a newline for cases where we don't print
anything.

OK?

gcc/ChangeLog:

* tree-ssa-threadbackward.cc (possibly_profitable_path_p): Always
add newline.
(profitable_path_p): Same.
---
 gcc/tree-ssa-threadbackward.cc | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-threadbackward.cc b/gcc/tree-ssa-threadbackward.cc
index 3218ad931ef..9725f50e639 100644
--- a/gcc/tree-ssa-threadbackward.cc
+++ b/gcc/tree-ssa-threadbackward.cc
@@ -719,7 +719,11 @@ back_threader_profitability::possibly_profitable_path_p
  gimple *stmt = gsi_stmt (gsi);
  if (gimple_call_internal_p (stmt, IFN_UNIQUE)
  || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
-   return false;
+   {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   fputc ('\n', dump_file);
+ return false;
+   }
  /* Do not count empty statements and labels.  */
  if (gimple_code (stmt) != GIMPLE_NOP
  && !is_gimple_debug (stmt))
@@ -821,6 +825,8 @@ back_threader_profitability::possibly_profitable_path_p
&& (m_n_insns * param_fsm_scale_path_stmts
>= param_max_jump_thread_duplication_stmts));
 
+  if (dump_file && (dump_flags & TDF_DETAILS))
+fputc ('\n', dump_file);
   return true;
 }
 
@@ -947,6 +953,8 @@ back_threader_profitability::profitable_path_p (const 
vec &m_path,
 "non-empty latch\n");
   return false;
 }
+  if (dump_file && (dump_flags & TDF_DETAILS))
+fputc ('\n', dump_file);
   return true;
 }
 
-- 
2.37.1

[COMMITTED] Copy range from op2 in foperator_equal::op1_range.

2022-08-23 Thread Aldy Hernandez via Gcc-patches

Like the integer version, when op1 == op2 is known to be true the
ranges are also equal.

gcc/ChangeLog:

* range-op-float.cc (foperator_equal::op1_range): Set range to
range of op2.
---
 gcc/range-op-float.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc
index 4fbd96a7479..ad2fae578d2 100644
--- a/gcc/range-op-float.cc
+++ b/gcc/range-op-float.cc
@@ -252,7 +252,8 @@ foperator_equal::op1_range (frange &r, tree type,
   switch (get_bool_state (r, lhs, type))
 {
 case BRS_TRUE:
-  r.set_varying (type);
+  // If it's true, the result is the same as OP2.
+  r = op2;
   // The TRUE side of op1 == op2 implies op1 is !NAN.
   r.set_nan (fp_prop::NO);
   break;
-- 
2.37.1

[PATCH] Add set/get functions for negative infinity in real.*

2022-08-23 Thread Aldy Hernandez via Gcc-patches

For the frange implementation with endpoints I'm about to contribute,
we need to set REAL_VALUE_TYPEs with negative infinity.  The support
is already there in real.cc, but it is awkward to get at.  One could
call real_inf() and then negate the value, but I've added the ability
to pass the sign argument like many of the existing real.* functions.

I've declared the functions in such a way to avoid changes to the
existing code base:

// Unchanged function returning true for either +-INF.
bool real_isinf (const REAL_VALUE_TYPE *r);
// New overload to be able to specify the sign.
bool real_isinf (const REAL_VALUE_TYPE *r, int sign);
// Replacement function for setting INF, defaults to +INF.
void real_inf (REAL_VALUE_TYPE *, int sign = 0);

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

* real.cc (real_isinf): New overload.
(real_inf): Add sign argument.
* real.h (real_isinf): New overload.
(real_inf): Add sign argument.
---
 gcc/real.cc | 14 +++---
 gcc/real.h  |  5 -
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/gcc/real.cc b/gcc/real.cc
index 4e63b1449c5..f570ca8e85b 100644
--- a/gcc/real.cc
+++ b/gcc/real.cc
@@ -1234,6 +1234,14 @@ real_isinf (const REAL_VALUE_TYPE *r)
   return (r->cl == rvc_inf);
 }
 
+/* Determine whether a floating-point value X is infinite with SIGN.  */
+
+bool
+real_isinf (const REAL_VALUE_TYPE *r, int sign)
+{
+  return real_isinf (r) && r->sign == sign;
+}
+
 /* Determine whether a floating-point value X is a NaN.  */
 
 bool
@@ -2484,12 +2492,12 @@ dconst_sqrt2_ptr (void)
   return &value;
 }
 
-/* Fills R with +Inf.  */
+/* Fills R with Inf with SIGN.  */
 
 void
-real_inf (REAL_VALUE_TYPE *r)
+real_inf (REAL_VALUE_TYPE *r, int sign)
 {
-  get_inf (r, 0);
+  get_inf (r, sign);
 }
 
 /* Fills R with a NaN whose significand is described by STR.  If QUIET,
diff --git a/gcc/real.h b/gcc/real.h
index 845ef29e3a4..76360b603fb 100644
--- a/gcc/real.h
+++ b/gcc/real.h
@@ -277,6 +277,9 @@ extern bool real_compare (int, const REAL_VALUE_TYPE *, 
const REAL_VALUE_TYPE *)
 /* Determine whether a floating-point value X is infinite.  */
 extern bool real_isinf (const REAL_VALUE_TYPE *);
 
+/* Determine whether a floating-point value X is infinite with SIGN.  */
+extern bool real_isinf (const REAL_VALUE_TYPE *, int sign);
+
 /* Determine whether a floating-point value X is a NaN.  */
 extern bool real_isnan (const REAL_VALUE_TYPE *);
 
@@ -331,7 +334,7 @@ extern long real_to_target (long *, const REAL_VALUE_TYPE 
*, format_helper);
 extern void real_from_target (REAL_VALUE_TYPE *, const long *,
  format_helper);
 
-extern void real_inf (REAL_VALUE_TYPE *);
+extern void real_inf (REAL_VALUE_TYPE *, int sign = 0);
 
 extern bool real_nan (REAL_VALUE_TYPE *, const char *, int, format_helper);
 
-- 
2.37.1

[committed] gfortran.dg/gomp/depend-4.f90: Minor fix

2022-08-23 Thread Tobias Burnus


Not really crucial as it is a compile-time test, but there was an off-by-one 
typo:

  !$omp depobj(elem(5)) depend(in: daaa(2))
  !$omp depobj(elem(6)) depend(in: daap(2))
-  !$omp depobj(elem(6)) depend(in: doaa(2))
+  !$omp depobj(elem(7)) depend(in: doaa(2))
  !$omp depobj(elem(8)) depend(in: doaaa(2))

Committed as 
https://gcc.gnu.org/r13-2151-g6b2a584ed5bed1b851ee7b4668ac705f20cbb2c4

Tobias


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit 6b2a584ed5bed1b851ee7b4668ac705f20cbb2c4
Author: Tobias Burnus 
Date:   Tue Aug 23 12:31:43 2022 +0200

gfortran.dg/gomp/depend-4.f90: Minor fix

gcc/testsuite/

* gfortran.dg/gomp/depend-4.f90: Fix array index use for
depobj var + update scan-tree-dump-times.
---
 gcc/testsuite/gfortran.dg/gomp/depend-4.f90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/gomp/depend-4.f90 b/gcc/testsuite/gfortran.dg/gomp/depend-4.f90
index f6cf2fd2dd4..edc30fe7291 100644
--- a/gcc/testsuite/gfortran.dg/gomp/depend-4.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/depend-4.f90
@@ -52,7 +52,7 @@ subroutine foo(dss, dsp, dsa, daa, daaa, daap, doss, dosp, dosa, doaa, doaaa, do
   !$omp depobj(elem(4)) depend(in: daa(2))
   !$omp depobj(elem(5)) depend(in: daaa(2))
   !$omp depobj(elem(6)) depend(in: daap(2))
-  !$omp depobj(elem(6)) depend(in: doaa(2))
+  !$omp depobj(elem(7)) depend(in: doaa(2))
   !$omp depobj(elem(8)) depend(in: doaaa(2))
   !$omp depobj(elem(9)) depend(in: doaap(2))
 
@@ -199,7 +199,7 @@ end
 ! { dg-final { scan-tree-dump-times "&elem\\\[3\\\] = &\\(\\*daa\\)\\\[1\\\];" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "&elem\\\[4\\\] = &\\(\\*\\(integer.kind=4.\\\[0:\\\] \\* restrict\\) daaa->data\\)\\\[daaa->offset \\+ 2\\\];" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "&elem\\\[5\\\] = \\(integer.kind=4. \\*\\) \\(daap->data \\+ .sizetype. \\(\\(daap->offset \\+ daap->dim\\\[0\\\].stride \\* 2\\) \\* daap->span\\)\\);" 1 "original" } }
-! { dg-final { scan-tree-dump-times "&elem\\\[5\\\] = &\\(\\*doaa\\)\\\[1\\\];" 1 "original" } }
+! { dg-final { scan-tree-dump-times "&elem\\\[6\\\] = &\\(\\*doaa\\)\\\[1\\\];" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "&elem\\\[7\\\] = &\\(\\*\\(integer.kind=4.\\\[0:\\\] \\* restrict\\) doaaa->data\\)\\\[doaaa->offset \\+ 2\\\];" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "&elem\\\[8\\\] = \\(integer.kind=4. \\*\\) \\(doaap->data \\+ .sizetype. \\(\\(doaap->offset \\+ doaap->dim\\\[0\\\].stride \\* 2\\) \\* doaap->span\\)\\);" 1 "original" } }

Re: [committed] gfortran.dg/gomp/depend-4.f90: Minor fix

2022-08-23 Thread Tobias Burnus


And the same for depend-6.f90, which was obviously copied from depend-4.f90.

Committed as 
https://gcc.gnu.org/r13-2152-gf05e3b2c63f3307ba405900f1a80c25b2e87b0a3

On 23.08.22 12:34, Tobias Burnus wrote:

Not really crucial as it is a compile-time test, but there was an off-by-one 
typo:

  !$omp depobj(elem(5)) depend(in: daaa(2))
  !$omp depobj(elem(6)) depend(in: daap(2))
-  !$omp depobj(elem(6)) depend(in: doaa(2))
+  !$omp depobj(elem(7)) depend(in: doaa(2))
  !$omp depobj(elem(8)) depend(in: doaaa(2))

Committed as 
https://gcc.gnu.org/r13-2151-g6b2a584ed5bed1b851ee7b4668ac705f20cbb2c4

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit f05e3b2c63f3307ba405900f1a80c25b2e87b0a3
Author: Tobias Burnus 
Date:   Tue Aug 23 13:14:19 2022 +0200

gfortran.dg/gomp/depend-6.f90: Minor fix

Exactly the same as previous commit for depend-4.f90, r13-2151.

gcc/testsuite/

* gfortran.dg/gomp/depend-6.f90: Fix array index use for
depobj var + update scan-tree-dump-times.
---
 gcc/testsuite/gfortran.dg/gomp/depend-6.f90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/gomp/depend-6.f90 b/gcc/testsuite/gfortran.dg/gomp/depend-6.f90
index b6c1afee127..fbac14b1b94 100644
--- a/gcc/testsuite/gfortran.dg/gomp/depend-6.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/depend-6.f90
@@ -52,7 +52,7 @@ subroutine foo(dss, dsp, dsa, daa, daaa, daap, doss, dosp, dosa, doaa, doaaa, do
   !$omp depobj(elem(4)) depend(in: daa(2))
   !$omp depobj(elem(5)) depend(in: daaa(2))
   !$omp depobj(elem(6)) depend(in: daap(2))
-  !$omp depobj(elem(6)) depend(in: doaa(2))
+  !$omp depobj(elem(7)) depend(in: doaa(2))
   !$omp depobj(elem(8)) depend(in: doaaa(2))
   !$omp depobj(elem(9)) depend(in: doaap(2))
 
@@ -199,7 +199,7 @@ end
 ! { dg-final { scan-tree-dump-times "&elem\\\[3\\\] = &\\(\\*daa\\)\\\[1\\\];" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "&elem\\\[4\\\] = &\\(\\*\\(void \\*\\\[0:\\\] \\* restrict\\) daaa->data\\)\\\[daaa->offset \\+ 2\\\];" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "&elem\\\[5\\\] = \\(void \\* \\*\\) \\(daap->data \\+ .sizetype. \\(\\(daap->offset \\+ daap->dim\\\[0\\\].stride \\* 2\\) \\* daap->span\\)\\);" 1 "original" } }
-! { dg-final { scan-tree-dump-times "&elem\\\[5\\\] = &\\(\\*doaa\\)\\\[1\\\];" 1 "original" } }
+! { dg-final { scan-tree-dump-times "&elem\\\[6\\\] = &\\(\\*doaa\\)\\\[1\\\];" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "&elem\\\[7\\\] = &\\(\\*\\(void \\*\\\[0:\\\] \\* restrict\\) doaaa->data\\)\\\[doaaa->offset \\+ 2\\\];" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "&elem\\\[8\\\] = \\(void \\* \\*\\) \\(doaap->data \\+ .sizetype. \\(\\(doaap->offset \\+ doaap->dim\\\[0\\\].stride \\* 2\\) \\* doaap->span\\)\\);" 1 "original" } }

[PATCH] Add support for floating point endpoints to frange.

2022-08-23 Thread Aldy Hernandez via Gcc-patches

The current implementation of frange is just a type with some bits to
represent NAN and INF.  We can do better and represent endpoints to
ultimately solve longstanding PRs such as PR24021.  This patch adds
these endpoints.  In follow-up patches I will add support for
relational operators using them, as well as a bare bones PLUS_EXPR
range-op-float entry to solve the PR.

I have chosen to use REAL_VALUE_TYPEs for the endpoints, since that's
what we use underneath the trees.  This will be somewhat analogous to
our eventual use of wide-ints in the irange.  No sense going through
added levels of indirection if we can avoid it.  That, plus real.*
already has a nice API for dealing with floats.

With this patch, ranges will be closed float point intervals, which
make the implementation simpler, since we don't have to keep track of
open/closed intervals.  This is conservative enough for use in the
ranger world, as we'd rather err on the side of more elements in a
range, than less.

For example, even though we cannot precisely represent the open
interval (3.0, 5.0) with this approach, it is perfectably reasonable
to represent it as [3.0, 5.0] since the closed interval is a super set
of the open one.  In the VRP/ranger world, it is always better to
err on the side of more information in a range, than not.  After all,
when we don't know anything about a range, we just use VARYING which
is a fancy term for a range spanning the entire domain.

Since REAL_VALUE_TYPEs have properly defined infinity and NAN
semantics, all the math can be made to work:

[-INF, 3.0] !NAN=> Numbers <= 3.0 (NAN cannot happen)
[3.0, +INF] => Numbers >= 3.0 (NAN is possible)
[-INF, +INF]=> VARYING (NAN is possible)
[-INF, +INF] !NAN   => Entire domain.  NAN cannot happen.

Also, since REAL_VALUE_TYPEs can represent the minimum and maximum
representable values of a TYPE_MODE, we can disambiguate between them
and negative and positive infinity (see get_max_float in real.cc).

This also makes the math all work.  For example, suppose we know
nothing about x and y (VARYING).  On the TRUE side of x > y, we can
deduce that:

(a) x cannot be NAN
(b) y cannot be NAN
(c) y cannot be +INF.

(c) means that we can drop the upper bound of "y" from +INF to the
maximum representable value for its type.

Having endpoints with different representation for infinity and the
maximum representable values, means we can drop the +-INF properties
we currently have in the frange.  I will do this as a follow-up patch,
as well as contributing more detailed relational operators.

I am still tweaking some regressed tests because of the usual premature
threading and VRP in the presence of smarter ranges.  I figured I'd post
now to give others a chance to comment in the meantime.

gcc/ChangeLog:

* value-range-pretty-print.cc (vrange_printer::visit): Adapt for
endpoints.
* value-range.cc (frange::set): Same.
(frange::normalize_kind): Same.
(frange::union_): Same.
(frange::intersect): Same.
(frange::operator=): Same.
(frange::verify_range): Same.
(frange::contains_p): New.
(frange::singleton_p): New.
(frange_float): New.
(real_max_representable): New.
(real_min_representable): New.
(range_tests_floats): New.
(range_tests): Call range_tests_floats.
* value-range.h (frange::lower_bound): New.
(frange::upper_bound): New.
(vrp_val_min): Use real_inf with a sign argument.
(frange::frange): New.
(frange::set_varying): Adapt for endpoints.
(frange::set_undefined): Adapt for endpoints.
---
 gcc/value-range-pretty-print.cc |  12 +-
 gcc/value-range.cc  | 259 ++--
 gcc/value-range.h   |  53 ++-
 3 files changed, 312 insertions(+), 12 deletions(-)

diff --git a/gcc/value-range-pretty-print.cc b/gcc/value-range-pretty-print.cc
index cbf50d3d854..1a026c22f99 100644
--- a/gcc/value-range-pretty-print.cc
+++ b/gcc/value-range-pretty-print.cc
@@ -122,19 +122,29 @@ vrange_printer::print_irange_bitmasks (const irange &r) 
const
 void
 vrange_printer::visit (const frange &r) const
 {
+  tree type = r.type ();
+
   pp_string (pp, "[frange] ");
   if (r.undefined_p ())
 {
   pp_string (pp, "UNDEFINED");
   return;
 }
-  dump_generic_node (pp, r.type (), 0, TDF_NONE, false);
+  dump_generic_node (pp, type, 0, TDF_NONE, false);
   pp_string (pp, " ");
   if (r.varying_p ())
 {
   pp_string (pp, "VARYING");
   return;
 }
+  pp_character (pp, '[');
+  dump_generic_node (pp,
+build_real (type, r.lower_bound ()), 0, TDF_NONE, false);
+  pp_string (pp, ", ");
+  dump_generic_node (pp,
+build_real (type, r.upper_bound ()), 0, TDF_NONE, false);
+  pp_string (pp, "] ");
+
   print_frange_prop ("NAN", r.get_nan ());
   print_frange_prop ("INF", r.get_inf ())

[PATCH] sched1: Fix -fcompare-debug issue in schedule_region [PR105586]

2022-08-23 Thread Surya Kumari Jangala via Gcc-patches

sched1: Fix -fcompare-debug issue in schedule_region [PR105586]

In schedule_region(), a basic block that does not contain any real insns
is not scheduled and the dfa state at the entry of the bb is not copied
to the fallthru basic block. However a DEBUG insn is treated as a real
insn, and if a bb contains non-real insns and a DEBUG insn, it's dfa
state is copied to the fallthru bb. This was resulting in
-fcompare-debug failure as the incoming dfa state of the fallthru block
is different with -g. We should always copy the dfa state of a bb to
it's fallthru bb even if the bb does not contain real insns.

2022-08-22  Surya Kumari Jangala  

gcc/
PR rtl-optimization/105586
* sched-rgn.cc (schedule_region): Always copy dfa state to
fallthru block.

gcc/testsuite/
PR rtl-optimization/105586
* gcc.target/powerpc/pr105586.c: New test.


diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc
index 0dc2a8f2851..6c9202c0b2b 100644
--- a/gcc/sched-rgn.cc
+++ b/gcc/sched-rgn.cc
@@ -3082,6 +3082,24 @@ free_bb_state_array (void)
   bb_state = NULL;
 }
 
+static void
+save_state_for_fallthru_edge (basic_block last_bb, state_t state)
+{
+  edge f = find_fallthru_edge (last_bb->succs);
+  if (f
+  && (!f->probability.initialized_p ()
+ || (f->probability.to_reg_br_prob_base () * 100
+ / REG_BR_PROB_BASE
+ >= param_sched_state_edge_prob_cutoff)))
+  {
+memcpy (bb_state[f->dest->index], state,
+   dfa_state_size);
+if (sched_verbose >= 5)
+  fprintf (sched_dump, "saving state for edge %d->%d\n",
+  f->src->index, f->dest->index);
+  }
+}
+
 /* Schedule a region.  A region is either an inner loop, a loop-free
subroutine, or a single basic block.  Each bb in the region is
scheduled after its flow predecessors.  */
@@ -3155,6 +3173,7 @@ schedule_region (int rgn)
   if (no_real_insns_p (head, tail))
{
  gcc_assert (first_bb == last_bb);
+ save_state_for_fallthru_edge (last_bb, bb_state[first_bb->index]);
  continue;
}
 
@@ -3173,26 +3192,13 @@ schedule_region (int rgn)
   curr_bb = first_bb;
   if (dbg_cnt (sched_block))
 {
- edge f;
  int saved_last_basic_block = last_basic_block_for_fn (cfun);
 
  schedule_block (&curr_bb, bb_state[first_bb->index]);
  gcc_assert (EBB_FIRST_BB (bb) == first_bb);
  sched_rgn_n_insns += sched_n_insns;
  realloc_bb_state_array (saved_last_basic_block);
- f = find_fallthru_edge (last_bb->succs);
- if (f
- && (!f->probability.initialized_p ()
- || (f->probability.to_reg_br_prob_base () * 100
- / REG_BR_PROB_BASE
- >= param_sched_state_edge_prob_cutoff)))
-   {
- memcpy (bb_state[f->dest->index], curr_state,
- dfa_state_size);
- if (sched_verbose >= 5)
-   fprintf (sched_dump, "saving state for edge %d->%d\n",
-f->src->index, f->dest->index);
-   }
+ save_state_for_fallthru_edge (last_bb, curr_state);
 }
   else
 {
diff --git a/gcc/testsuite/gcc.target/powerpc/pr105586.c 
b/gcc/testsuite/gcc.target/powerpc/pr105586.c
new file mode 100644
index 000..bd397f58bc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr105586.c
@@ -0,0 +1,19 @@
+/* { dg-options "-mdejagnu-tune=power4 -O2 -fcompare-debug -fno-if-conversion 
-fno-guess-branch-probability" } */
+
+extern int bar(int i);
+
+typedef unsigned long u64;
+int g;
+
+__int128 h;
+
+void
+foo(int a, int b) {
+  int i;
+  char u8_1 = g, u8_3 = a;
+  u64 u64_1 = bar(0), u64_3 = u8_3 * u64_1;
+  __int128 u128_1 = h ^ __builtin_expect(i, 0);
+  u64 u64_4 = u64_1 << ((__builtin_sub_overflow_p(0, u8_1, (u64)0) ^ u128_1) & 
8);
+  u64 u64_r = b + u64_3 + u64_4;
+  bar((short)u64_r + u8_3);
+}

Re: [PATCH 2/3] libstdc++: Implement std::pair/tuple/misc enhancements from P2321R2

2022-08-23 Thread Jonathan Wakely via Gcc-patches

On Tue, 23 Aug 2022 at 02:36, Patrick Palka via Libstdc++
 wrote:
> --- a/libstdc++-v3/include/bits/stl_pair.h
> +++ b/libstdc++-v3/include/bits/stl_pair.h
> @@ -212,6 +212,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> swap(second, __p.second);
>}
>
> +#if __cplusplus > 202002L
> +  /// Swap the first members and then the second members.
> +  constexpr void
> +  swap(const pair& __p) const
> +  noexcept(__and_<__is_nothrow_swappable,
> + __is_nothrow_swappable>::value)

This could use __and_v (which is just __and_::value today, but could
theoretically be optimized to use a requires expression and avoid
instantiating __and_ one day).

Is consistency with the C++11 overload more important? I *hope* we
won't need to make many changes to these noexcept-specifiers, so the
maintenance burden of using __ad_::value in one and __and_v in the
other shouldn't be too high.

> @@ -710,6 +792,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  noexcept(noexcept(__x.swap(__y)))
>  { __x.swap(__y); }
>
> +#if __cplusplus > 202002L
> +  template
> +requires is_swappable::value && is_swappable::value

is_swappable_v instead of ::value here ... this is already using a
requires-clause and so is substantially different to the old overload
anyway.

> +
>// tuple swap
>_GLIBCXX20_CONSTEXPR
>void
>swap(tuple& __in)
>noexcept(__and_<__is_nothrow_swappable<_Elements>...>::value)
>{ _Inherited::_M_swap(__in); }
> +
> +#if __cplusplus > 202002L
> +  constexpr void
> +  swap(const tuple& __in) const
> +  noexcept(__and_<__is_nothrow_swappable...>::value)

__and_v ?

>_GLIBCXX20_CONSTEXPR
>void
>swap(tuple& __in)
>noexcept(__and_<__is_nothrow_swappable<_T1>,
>   __is_nothrow_swappable<_T2>>::value)
>{ _Inherited::_M_swap(__in); }
> +
> +#if __cplusplus > 202002L
> +  constexpr void
> +  swap(const tuple& __in) const
> +  noexcept(__and_<__is_nothrow_swappable,
> + __is_nothrow_swappable>::value)

__and_v ?

Thanks for doing this, those changes looked tedious to implement and test!

If you agree with the suggestions to use _v variable templates, this
is OK for trunk with those changes. I am willing to be persuaded to
not use the variable templates if there's a good reason I've missed.

Re: Ping [PATCH V2] libcpp: Optimize #pragma once with a hash table [PR58770]

2022-08-23 Thread Nathan Sidwell via Gcc-patches


On 8/22/22 13:39, Paul Hollinsky wrote:

On Mon, Aug 22, 2022 at 09:19:29AM -0400, Nathan Sidwell wrote:

On 8/19/22 16:27, Paul Hollinsky wrote:

Hi all,

Would love some feedback on this patch!

Thanks,
Paul

On Mon, Aug 01, 2022 at 05:18:40AM +, Paul Hollinsky wrote:

Rather than traversing the all_files linked list for every include,
this factors out the quick idempotency checks (modification time
and size) to be the keys in a hash table so we can find matching
files quickly.


I'm probably confused, but why is the existing idempotency logic
insufficiently optimal?  Can it be improved directly?


The idempotency logic breaks down to a couple integer comparisons, I doubt
it can be made much faster. Usually a file will be skipped after the very
first idempotency check. The reason it's suboptimal right now is that it's
O(n^2) behavior -- for each include that's seen we follow the entire linked
list and do those integeger comparisons.


Traversing a list is O(N) -- why are you encountering N^2 here?  (Are 
you summing over all includes?)




The cost of hashing, performing a lookup against the hash table, and then
traversing the list of results (any hash collisions) is much lower than
traversing the full list of includes. I suspect that has something to do
with cache locality, or lack thereof, when accessing each member of the
linked list. It's likely also just far fewer instructions executed.


I don't see why a hash table cannot be applied to all idempotent files, 
not just #import/pragma once.






WRT using modification time as a key.  I don't see how that provides
sufficient additional randomization to file size?  Groups of headers are
often installed with the same mtime.


This is a fair point, and I was going to say something here about the
codebases being compiled, but git also doesn't preserve mtime.

The real reason mtime is because that's what was done in the previous
implementation, and I felt it best not to mess with.

But, if there are better attributes that could be used without adding
much overhead we could use those instead. I was personally worried that
reading the file to perform any sort of hash would be expensive.


doing anything other than compare the pathname is going to mean going to 
disk.  That's likely cached by the OS from the first read.  I suppose 
one could encounter horrible LRU behaviour, but you're probably going to 
lose in that case anyway.  Hm, why not just key on the inode/drive 
number? that way one even avoids the inode read (oh, I think stat isn't 
that fine grained is it)


I suppose one could have two hashtables -- one keyed by pathname, which 
one could check before the inode, which would deal with the majority of 
non-symlink-farm cases.  Then one keyed by inode/m_time or whatever to 
deal with symlinks.


nathan



In any case, thank you very much for the feedback!

--Paul





The hash table value type is a linked list, in case more than one
file matches the quick check.

The table is only built if a once-only file is seen, so include
guard performance is not affecte

My laptop would previously complete Ricardo's benchmark from the
PR in ~1.1s using #pragma once, and ~0.35s using include guards.

After this change, both benchmarks now complete in ~0.35s. I did
have to randomize the modification dates on the benchmark headers
so the files did not all end up in the same hash table list, but
that would likely not come up outside of the contrived benchmark.

I bootstrapped and ran the testsuite on x86_64 Darwin, as well as
ppc64le and aarch64 Linux.

libcpp/ChangeLog:

PR preprocessor/58770
* internal.h: Add hash table for #pragma once
* files.cc: Optimize #pragma once with the hash table

Signed-off-by: Paul Hollinsky 
---
   libcpp/files.cc   | 116 +++---
   libcpp/internal.h |   3 ++
   2 files changed, 112 insertions(+), 7 deletions(-)

diff --git a/libcpp/files.cc b/libcpp/files.cc
index 24208f7b0f8..d4ffd77578e 100644
--- a/libcpp/files.cc
+++ b/libcpp/files.cc
@@ -167,6 +167,33 @@ struct file_hash_entry_pool
 struct cpp_file_hash_entry pool[FILE_HASH_POOL_SIZE];
   };
+/* A set of attributes designed to quickly identify obviously different files
+   in a hashtable.  Just in case there are collisions, we still maintain a
+   list.  These sub-lists can then be checked for #pragma once rather than
+   interating through all_files.  */
+struct file_quick_idempotency_attrs
+{
+  file_quick_idempotency_attrs(const _cpp_file *f)
+: mtime(f->st.st_mtime), size(f->st.st_size) {}
+
+  time_t mtime;
+  off_t size;
+
+  static hashval_t hash (/* _cpp_file* */ const void *p);
+};
+
+/* Sub-list of very similar files kept in a hashtable to check for #pragma
+   once.  */
+struct file_sublist
+{
+  _cpp_file *f;
+  file_sublist *next;
+
+  static int eq (/* _cpp_file* */ const void *p,
+/* file_sublist* */ const void *q);
+  static void del (/* file_sublist* */ void *p);
+};
+

[PATCH] Speedup path discovery in predicate::use_cannot_happen

2022-08-23 Thread Richard Biener via Gcc-patches

The following reverts a hunk from r8-5789-g11ef0b22d68cd1 that
made compute_control_dep_chain start from function entry rather
than the immediate dominator of the source block of the edge with
the undefined value on the PHI node.  Reverting at that point
does not reveal any testsuite FAIL, in particular the added
testcase still passes.  The following adjusts this to the other
function that computes predicates that hold on the PHI incoming
edges with undefined values, predicate::init_from_phi_def, which
starts at the immediate dominator of the PHI.  That's much less
likely to run into the CFG walking limit.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Aldy - you did this change, do you remember anything here?  In
fact the whole thing that's now called predicate::use_cannot_happen
seems to be redundant - the two testcases attributed to its history
do not fail when that's disabled, nor did they fail when it was
introduced.  In principle whats now called predicate::superset_of
should cover this (but different implementation limits might apply).

OK?

* gimple-predicate-analysis.cc (predicate::use_cannot_happen):
Start the compute_control_dep_chain walk from the immediate
dominator of the PHI.
---
 gcc/gimple-predicate-analysis.cc | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-predicate-analysis.cc b/gcc/gimple-predicate-analysis.cc
index a4291657d0b..ea81daacd4f 100644
--- a/gcc/gimple-predicate-analysis.cc
+++ b/gcc/gimple-predicate-analysis.cc
@@ -1293,6 +1293,12 @@ predicate::use_cannot_happen (gphi *phi, unsigned opnds)
   use_guard = &phi_use_guard_intersection;
 }
 
+  basic_block phi_bb = gimple_bb (phi);
+  /* Find the closest dominating bb to be the control dependence root.  */
+  basic_block cd_root = get_immediate_dominator (CDI_DOMINATORS, phi_bb);
+  if (!cd_root)
+return false;
+
   /* Look for the control dependencies of all the interesting operands
  and build guard predicates describing them.  */
   unsigned n = gimple_phi_num_args (phi);
@@ -1308,7 +1314,7 @@ predicate::use_cannot_happen (gphi *phi, unsigned opnds)
   unsigned num_calls = 0;
 
   /* Build the control dependency chain for the PHI argument...  */
-  if (!compute_control_dep_chain (ENTRY_BLOCK_PTR_FOR_FN (cfun),
+  if (!compute_control_dep_chain (cd_root,
  e->src, dep_chains, &num_chains,
  cur_chain, &num_calls))
{
-- 
2.35.3

[PATCH] New uninit testcase

2022-08-23 Thread Richard Biener via Gcc-patches

I've reduced the following which doesn't seem covered in a good enough
way in the testsuite.

Pushed.

* gcc.dg/uninit-pred-10.c: New testcase.
---
 gcc/testsuite/gcc.dg/uninit-pred-10.c | 36 +++
 1 file changed, 36 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/uninit-pred-10.c

diff --git a/gcc/testsuite/gcc.dg/uninit-pred-10.c 
b/gcc/testsuite/gcc.dg/uninit-pred-10.c
new file mode 100644
index 000..468b4d3ad4b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/uninit-pred-10.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wuninitialized" } */
+
+enum demangle_component_type {
+  DEMANGLE_COMPONENT_NAME,
+  DEMANGLE_COMPONENT_REFERENCE
+};
+struct demangle_component {
+  enum demangle_component_type type;
+} d_print_comp_inner_mod_inner, *d_print_comp_inner_dc;
+struct d_print_mod {
+  struct d_print_mod *next;
+};
+struct d_print_info {
+  int templates;
+  struct d_print_mod *modifiers;
+};
+void d_print_comp_inner(struct d_print_info *dpi)
+{
+  int saved_templates, need_template_restore = 0;
+  switch (d_print_comp_inner_dc->type) {
+  case DEMANGLE_COMPONENT_NAME:
+goto modifier;
+  case DEMANGLE_COMPONENT_REFERENCE:
+saved_templates = dpi->templates;
+need_template_restore = 1;
+  modifier:
+struct d_print_mod dpm;
+dpm.next = dpi->modifiers;
+d_print_comp_inner_mod_inner = *d_print_comp_inner_dc;
+d_print_comp_inner(dpi);
+dpi->modifiers = dpm.next;
+if (need_template_restore)
+  dpi->templates = saved_templates; /* { dg-bogus "uninitialized" } */
+  }
+}
-- 
2.35.3

Re: [PATCH] Add newline when checking path profitability.

2022-08-23 Thread Richard Biener via Gcc-patches

On Tue, Aug 23, 2022 at 12:12 PM Aldy Hernandez  wrote:
>
> It looks like we're missing a newline for cases where we don't print
> anything.
>
> OK?

OK

> gcc/ChangeLog:
>
> * tree-ssa-threadbackward.cc (possibly_profitable_path_p): Always
> add newline.
> (profitable_path_p): Same.
> ---
>  gcc/tree-ssa-threadbackward.cc | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/tree-ssa-threadbackward.cc b/gcc/tree-ssa-threadbackward.cc
> index 3218ad931ef..9725f50e639 100644
> --- a/gcc/tree-ssa-threadbackward.cc
> +++ b/gcc/tree-ssa-threadbackward.cc
> @@ -719,7 +719,11 @@ back_threader_profitability::possibly_profitable_path_p
>   gimple *stmt = gsi_stmt (gsi);
>   if (gimple_call_internal_p (stmt, IFN_UNIQUE)
>   || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
> -   return false;
> +   {
> + if (dump_file && (dump_flags & TDF_DETAILS))
> +   fputc ('\n', dump_file);
> + return false;
> +   }
>   /* Do not count empty statements and labels.  */
>   if (gimple_code (stmt) != GIMPLE_NOP
>   && !is_gimple_debug (stmt))
> @@ -821,6 +825,8 @@ back_threader_profitability::possibly_profitable_path_p
> && (m_n_insns * param_fsm_scale_path_stmts
> >= param_max_jump_thread_duplication_stmts));
>
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +fputc ('\n', dump_file);
>return true;
>  }
>
> @@ -947,6 +953,8 @@ back_threader_profitability::profitable_path_p (const 
> vec &m_path,
>  "non-empty latch\n");
>return false;
>  }
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +fputc ('\n', dump_file);
>return true;
>  }
>
> --
> 2.37.1
>

Re: [PATCH] sched1: Fix -fcompare-debug issue in schedule_region [PR105586]

2022-08-23 Thread Peter Bergner via Gcc-patches

On 8/23/22 6:49 AM, Surya Kumari Jangala wrote:
> sched1: Fix -fcompare-debug issue in schedule_region [PR105586]
> 
> In schedule_region(), a basic block that does not contain any real insns
> is not scheduled and the dfa state at the entry of the bb is not copied
> to the fallthru basic block. However a DEBUG insn is treated as a real
> insn, and if a bb contains non-real insns and a DEBUG insn, it's dfa
> state is copied to the fallthru bb. This was resulting in
> -fcompare-debug failure as the incoming dfa state of the fallthru block
> is different with -g. We should always copy the dfa state of a bb to
> it's fallthru bb even if the bb does not contain real insns.
> 
> 2022-08-22  Surya Kumari Jangala  
> 
> gcc/
>   PR rtl-optimization/105586
>   * sched-rgn.cc (schedule_region): Always copy dfa state to
>   fallthru block.


It looks good to me, but I cannot approve it.  That said, you're missing
a ChangeLog entry for your new helper function.  The ChangeLog mentions
what changed, not why it changed.  Maybe something like the following?


gcc/
PR rtl-optimization/105586
* sched-rgn.cc (save_state_for_fallthru_edge): New function.
(schedule_region): Use it for all blocks.


Peter

[PATCH] tree-optimization/106722 - uninit analysis with long def -> use path

2022-08-23 Thread Richard Biener via Gcc-patches

The following applies similar measures as r13-2133-ge66cf626c72d58
to the computation of the use predicate when the path from PHI def
to use is too long and we run into compute_control_dep_chain limits.

It also moves the preprocessor define limits internal.

This resolves the reduced testcase but not the original one.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/106722
* gimple-predicate-analysis.h (MAX_NUM_CHAINS, MAX_CHAIN_LEN,
MAX_POSTDOM_CHECK, MAX_SWITCH_CASES): Move ...
* gimple-predicate-analysis.cc: ... here and document.
(simple_control_dep_chain): New function, factored from
predicate::use_cannot_happen.
(predicate::use_cannot_happen): Adjust.
(predicate::predicate): Use simple_control_dep_chain as fallback.

* g++.dg/uninit-pr106722-1.C: New testcase.
---
 gcc/gimple-predicate-analysis.cc | 68 ++--
 gcc/gimple-predicate-analysis.h  |  4 --
 gcc/testsuite/g++.dg/uninit-pr106722-1.C | 65 ++
 3 files changed, 117 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/uninit-pr106722-1.C

diff --git a/gcc/gimple-predicate-analysis.cc b/gcc/gimple-predicate-analysis.cc
index e8e2dbf7034..a4291657d0b 100644
--- a/gcc/gimple-predicate-analysis.cc
+++ b/gcc/gimple-predicate-analysis.cc
@@ -46,6 +46,19 @@
 
 #define DEBUG_PREDICATE_ANALYZER 1
 
+/* In our predicate normal form we have MAX_NUM_CHAINS or predicates
+   and in those MAX_CHAIN_LEN (inverted) and predicates.  */
+#define MAX_NUM_CHAINS 8
+#define MAX_CHAIN_LEN 5
+
+/* When enumerating paths between two blocks this limits the number of
+   post-dominator skips between two edges possibly defining a predicate.  */
+#define MAX_POSTDOM_CHECK 8
+
+/* The limit for the number of switch cases when we do the linear search
+   for the case corresponding to an edge.  */
+#define MAX_SWITCH_CASES 40
+
 /* Return true if, when BB1 is postdominating BB2, BB1 is a loop exit.  */
 
 static bool
@@ -1034,6 +1047,36 @@ is_degenerate_phi (gimple *phi, pred_info *pred)
   return true;
 }
 
+/* If compute_control_dep_chain bailed out due to limits this routine
+   tries to build a partial sparse path using dominators.  Returns
+   path edges whose predicates are always true when reaching E.  */
+
+static void
+simple_control_dep_chain (vec& chain, basic_block from, basic_block to)
+{
+  if (!dominated_by_p (CDI_DOMINATORS, to, from))
+return;
+
+  basic_block src = to;
+  while (src != from
+&& chain.length () <= MAX_CHAIN_LEN)
+{
+  basic_block dest = src;
+  src = get_immediate_dominator (CDI_DOMINATORS, src);
+  edge pred_e;
+  if (single_pred_p (dest)
+ && (pred_e = find_edge (src, dest)))
+   chain.safe_push (pred_e);
+}
+}
+
+static void
+simple_control_dep_chain (vec& chain, basic_block from, edge to)
+{
+  chain.safe_push (to);
+  simple_control_dep_chain (chain, from, to->src);
+}
+
 /* Recursively compute the control dependence chains (paths of edges)
from the dependent basic block, DEP_BB, up to the dominating basic
block, DOM_BB (the head node of a chain should be dominated by it),
@@ -1273,20 +1316,8 @@ predicate::use_cannot_happen (gphi *phi, unsigned opnds)
  /* If compute_control_dep_chain bailed out due to limits
 build a partial sparse path using dominators.  Collect
 only edges whose predicates are always true when reaching E.  */
- cur_chain.truncate (0);
- cur_chain.quick_push (e);
- basic_block src = e->src;
- while (src->index != ENTRY_BLOCK
-&& cur_chain.length () <= MAX_CHAIN_LEN)
-   {
- basic_block dest = src;
- src = get_immediate_dominator (CDI_DOMINATORS, src);
- edge pred_e;
- if (single_pred_p (dest)
- && (pred_e = find_edge (src, dest)))
-   cur_chain.quick_push (pred_e);
-   }
- dep_chains[0] = cur_chain.copy ();
+ simple_control_dep_chain (dep_chains[0],
+   ENTRY_BLOCK_PTR_FOR_FN (cfun), e);
  num_chains++;
}
 
@@ -1737,8 +1768,13 @@ predicate::predicate (basic_block def_bb, basic_block 
use_bb, func_t &eval)
   auto_vec dep_chains[MAX_NUM_CHAINS];
   auto_vec cur_chain;
 
-  compute_control_dep_chain (cd_root, use_bb, dep_chains, &num_chains,
-cur_chain, &num_calls);
+  if (!compute_control_dep_chain (cd_root, use_bb, dep_chains, &num_chains,
+ cur_chain, &num_calls))
+{
+  gcc_assert (num_chains == 0);
+  simple_control_dep_chain (dep_chains[0], cd_root, use_bb);
+  num_chains++;
+}
 
   if (DEBUG_PREDICATE_ANALYZER && dump_file)
 {
diff --git a/gcc/gimple-predicate-analysis.h b/gcc/gimple-predicate-analysis.h
index 204cdbccfc7..77672291c36 100644
--- a/gcc/gimple-predicate-

Re: [PATCH] sched1: Fix -fcompare-debug issue in schedule_region [PR105586]

2022-08-23 Thread Segher Boessenkool

Hi!

On Tue, Aug 23, 2022 at 07:55:22AM -0500, Peter Bergner wrote:
> It looks good to me, but I cannot approve it.

Same here (both statements).

> That said, you're missing
> a ChangeLog entry for your new helper function.  The ChangeLog mentions
> what changed, not why it changed.

And that is correct!  Changelogs should not say that, that isn't their
purpose (in GCC), not what they are used for.  Explanations like that go
in the email and/or the commit message.

The main remaining usefulness of changelogs is to spot unintended
commmits.

> Maybe something like the following?
> 
> gcc/
>   PR rtl-optimization/105586
>   * sched-rgn.cc (save_state_for_fallthru_edge): New function.
>   (schedule_region): Use it for all blocks.

That looks perfect, it doesn't say "why" either :-)

Segher

Re: [PATCH 1/3] libstdc++: Separate construct/convertibility tests for std::tuple

2022-08-23 Thread Patrick Palka via Gcc-patches

On Tue, 23 Aug 2022, Jonathan Wakely wrote:

> On Tue, 23 Aug 2022 at 02:35, Patrick Palka via Libstdc++
>  wrote:
> >
> > P2321R2 adds new conditionally explicit constructors to std::tuple which
> > we'll concisely implement in a subsequent patch using explicit(bool), like
> > in our C++20 std::pair implementation.  But before we can do that, this
> > patch first adds members to _TupleConstraints that test for constructibility
> > and convertibility separately; we'll use the first in the new constructors'
> > constraints, and the second in their explicit specifier.
> >
> > In passing, this patch also redefines the existing predicates
> > __is_ex/implicitly_constructible in terms of these new members.  This
> > seems to reduce compile time and memory usage by about 10% for large
> 
> Nice.
> 
> > tuples when using the relevant constructors constrained by
> > _Explicit/_ImplicitCtor (since we no longer have to redundantly expand
> > and process is_constructible<_Types, _UTypes>... twice for each pair of
> > such constructors).  In order to retain maximal short circuiting, do
> > this only when constexpr if is available.
> 
> Would we get similar benefits for C++11 and C++14 by doing:
> 
>return __and_<__and_...>,
>  __and_...>
>  >::value;
> 
> This is slightly more work in total, but if we have __and_ and
> __and_> then the A and B instantiations will be cached and
> can be reused.

Good idea, it seems we get pretty much the same 10% improvement for
C++11/14 with this approach.  I reckon we might as well then define
__convertible and __constructible as alias templates instead of as
variable templates and use them unconditionally in
__is_im/explicitly_constructible for benefit of all language versions.
Using constexpr if instead of the outer __and_ seems to yield a marginal
improvement at best and __and_v is currently just __and_::value, so it
doesn't seem worth it to have different definitions for C++17 at least
for now.

What do you think about the following?

-- >8 --

libstdc++-v3/ChangeLog:

* include/std/tuple (_TupleConstraints::__convertible): Define.
(_TupleConstraints::__constructible): Likewise.
(_TupleConstraints::__is_explicitly_constructible): Redefine this
in terms of __convertible and __constructible.
(_TupleConstraints::__is_implicitly_constructible): Likewise.
---
 libstdc++-v3/include/std/tuple | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/include/std/tuple b/libstdc++-v3/include/std/tuple
index 6d0060a191c..f8f48ccc370 100644
--- a/libstdc++-v3/include/std/tuple
+++ b/libstdc++-v3/include/std/tuple
@@ -553,15 +553,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct _TupleConstraints
 {
+  template
+   using __constructible = __and_...>;
+
+  template
+   using __convertible = __and_...>;
+
   // Constraint for a non-explicit constructor.
   // True iff each Ti in _Types... can be constructed from Ui in _UTypes...
   // and every Ui is implicitly convertible to Ti.
   template
static constexpr bool __is_implicitly_constructible()
{
- return __and_...,
-   is_convertible<_UTypes, _Types>...
-   >::value;
+ return __and_<__constructible<_UTypes...>,
+   __convertible<_UTypes...>>::value;
}
 
   // Constraint for a non-explicit constructor.
@@ -570,9 +575,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
static constexpr bool __is_explicitly_constructible()
{
- return __and_...,
-   __not_<__and_...>>
-   >::value;
+ return __and_<__constructible<_UTypes...>,
+   __not_<__convertible<_UTypes...>>>::value;
}
 
   static constexpr bool __is_implicitly_default_constructible()
-- 
2.37.2.382.g795ea8776b

Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-08-23 Thread Segher Boessenkool

Hi!

On Fri, Aug 19, 2022 at 10:35:54AM +0800, HAO CHEN GUI wrote:
> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
> @@ -1,7 +1,8 @@
>  /* { dg-do compile { target { powerpc*-*-* } } } */
> -/* { dg-require-effective-target lp64 } */
> -/* { dg-require-effective-target powerpc_p9vector_ok } */
>  /* { dg-options "-mdejagnu-cpu=power9" } */
> +/* { dg-additional-options "-mpowerpc64" { target { powerpc*-*-linux* && 
> ilp32 } } } */

You can add this always.  It is default on 64-bit systems, but it is
simpler to just always add it:
/* { dg-additional-options "-mpowerpc64" } */

Or are there subtargets that will error on this?

> +/* { dg-require-effective-target has_arch_ppc64 } */

This is redundant, the previous line makes this always pass.

Segher

Re: [PATCH v3] rs6000: Rework ELFv2 support for -fpatchable-function-entry* [PR99888]

2022-08-23 Thread Segher Boessenkool

On Fri, Aug 19, 2022 at 10:40:10AM +0800, Kewen.Lin wrote:
> Since you proposed to update the documentation, I'm thinking if we can
> reconsider Fangrui's proposal in the PR which Alan seconded: Put preceding
> nops before GEP and succeeding nops after LEP.  Previously I had the concern
> that the nops inserted doesn't respect to a same function entry, it looks
> inconsistent to the documentation, and you also noted that "The nops have
> to be consecutive".  If we want to update the documentation, could we reword
> it for PowerPC ELFv2 ABI?
> 
> What's your opinion?

I'm not sure what the question is, sorry.

If you want different semantics for ELFv2 (which might well be useful),
we need some new command line option for that.

I suggested here to just describe in the existing doc what is done for
global and local entry points on ELFv2.

Segher

LoongArch: add model attribute

2022-08-23 Thread Xi Ruoyao via Gcc-patches

Another attempt to make kernel module happy.

One remaining issue: the patch cannot diagnostic some insane thing like

int x __attribute__((model("normal")));
int x __attribute__((model("extreme")));

It seems incredibly difficult to diagnose such thing: I can't figure out
any solution w/o invasion into the frontend.  IA-64 model() only accepts
small so this is not a problem for them, and M32R model() also cannot
diagnose such error.

Any suggestion is welcomed.

-- >8 --

A linker script and/or a section attribute may locate some object
specially, so we need to handle the code model for such objects
differently than the -mcmodel setting. This happens when the Linux
kernel loads a module with per-CPU variables.

Add an attribute to override the code model for a specific variable.

gcc/ChangeLog:

* config/loongarch/loongarch-protos.h (loongarch_symbol_type):
Add SYMBOL_PCREL32 and SYMBOL_PCREL64.
* config/loongarch/loongarch.cc (loongarch_attribute_table):
New attribute table.
(TARGET_ATTRIBUTE_TABLE): Define the target hook.
(loongarch_handle_model_attribute): New static function.
(loongarch_classify_symbol): Return SYMBOL_PCREL{32,64} for
SYMBOL_REF_DECL with model attribute.
(loongarch_use_anchors_for_symbol_p): New static function.
(TARGET_USE_ANCHORS_FOR_SYMBOL_P): Define the target hook.
* doc/extend.texi (Variable Attributes): Document new
LoongArch specific attribute.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/attr-model-test.c: New test.
* gcc.target/loongarch/attr-model-1.c: New test.
* gcc.target/loongarch/attr-model-2.c: New test.
* gcc.target/loongarch/attr-model-diag.c: New test.
---
 gcc/config/loongarch/loongarch-protos.h   |   8 +
 gcc/config/loongarch/loongarch.cc | 175 +-
 gcc/doc/extend.texi   |  16 ++
 .../gcc.target/loongarch/attr-model-1.c   |   6 +
 .../gcc.target/loongarch/attr-model-2.c   |   6 +
 .../gcc.target/loongarch/attr-model-diag.c|   7 +
 .../gcc.target/loongarch/attr-model-test.c|  25 +++
 7 files changed, 236 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-diag.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-test.c

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index cadaad7519c..4e925aa3876 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -30,6 +30,12 @@ along with GCC; see the file COPYING3.  If not see
SYMBOL_PCREL
The symbol's value will be loaded directly from data section.
 
+   SYMBOL_PCREL32
+   SYMBOL_PCREL64
+   Like SYMBOL_PCREL, but the PC-relative offset of the symbol is
+   assumed to be in the +/- 2GiB or +/- 8 EiB range, instead of the
+   code model specification.
+
SYMBOL_TLS
A thread-local symbol.
 
@@ -42,6 +48,8 @@ along with GCC; see the file COPYING3.  If not see
 enum loongarch_symbol_type {
   SYMBOL_GOT_DISP,
   SYMBOL_PCREL,
+  SYMBOL_PCREL32,
+  SYMBOL_PCREL64,
   SYMBOL_TLS,
   SYMBOL_TLS_IE,
   SYMBOL_TLS_LE,
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 207ac2762c6..3917c53dd27 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1643,6 +1643,32 @@ loongarch_classify_symbol (const_rtx x)
   && !loongarch_symbol_binds_local_p (x))
 return SYMBOL_GOT_DISP;
 
+  if (SYMBOL_REF_P (x))
+{
+  tree t = SYMBOL_REF_DECL (x);
+  if (!t)
+   return SYMBOL_PCREL;
+
+  t = lookup_attribute ("model", DECL_ATTRIBUTES (t));
+  if (!t)
+   return SYMBOL_PCREL;
+
+  t = TREE_VALUE (TREE_VALUE (t));
+
+  /* loongarch_handle_model_attribute should reject other values.  */
+  gcc_assert (TREE_CODE (t) == STRING_CST);
+
+  const char *model = TREE_STRING_POINTER (t);
+  if (strcmp (model, "normal") == 0)
+   return SYMBOL_PCREL32;
+  if (strcmp (model, "extreme") == 0)
+   return SYMBOL_PCREL64;
+
+  /* loongarch_handle_model_attribute should reject unknown model
+name.  */
+  gcc_unreachable ();
+}
+
   return SYMBOL_PCREL;
 }
 
@@ -1696,6 +1722,8 @@ loongarch_symbolic_constant_p (rtx x, enum 
loongarch_symbol_type *symbol_type)
 case SYMBOL_TLSGD:
 case SYMBOL_TLSLDM:
 case SYMBOL_PCREL:
+case SYMBOL_PCREL32:
+case SYMBOL_PCREL64:
   /* GAS rejects offsets outside the range [-2^31, 2^31-1].  */
   return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
 
@@ -1721,7 +1749,7 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, 
machine_mode mode)
 
   return 3;
 
-case SYMBOL_PCREL:
+case SYMBOL

Re: [PATCH 1/3] libstdc++: Separate construct/convertibility tests for std::tuple

2022-08-23 Thread Jonathan Wakely via Gcc-patches

On Tue, 23 Aug 2022 at 14:44, Patrick Palka  wrote:
>
> On Tue, 23 Aug 2022, Jonathan Wakely wrote:
>
> > On Tue, 23 Aug 2022 at 02:35, Patrick Palka via Libstdc++
> >  wrote:
> > >
> > > P2321R2 adds new conditionally explicit constructors to std::tuple which
> > > we'll concisely implement in a subsequent patch using explicit(bool), like
> > > in our C++20 std::pair implementation.  But before we can do that, this
> > > patch first adds members to _TupleConstraints that test for 
> > > constructibility
> > > and convertibility separately; we'll use the first in the new 
> > > constructors'
> > > constraints, and the second in their explicit specifier.
> > >
> > > In passing, this patch also redefines the existing predicates
> > > __is_ex/implicitly_constructible in terms of these new members.  This
> > > seems to reduce compile time and memory usage by about 10% for large
> >
> > Nice.
> >
> > > tuples when using the relevant constructors constrained by
> > > _Explicit/_ImplicitCtor (since we no longer have to redundantly expand
> > > and process is_constructible<_Types, _UTypes>... twice for each pair of
> > > such constructors).  In order to retain maximal short circuiting, do
> > > this only when constexpr if is available.
> >
> > Would we get similar benefits for C++11 and C++14 by doing:
> >
> >return __and_<__and_...>,
> >  __and_...>
> >  >::value;
> >
> > This is slightly more work in total, but if we have __and_ and
> > __and_> then the A and B instantiations will be cached and
> > can be reused.
>
> Good idea, it seems we get pretty much the same 10% improvement for
> C++11/14 with this approach.  I reckon we might as well then define
> __convertible and __constructible as alias templates instead of as
> variable templates and use them unconditionally in
> __is_im/explicitly_constructible for benefit of all language versions.

I had a similar thought after hitting send.


> Using constexpr if instead of the outer __and_ seems to yield a marginal
> improvement at best and __and_v is currently just __and_::value, so it
> doesn't seem worth it to have different definitions for C++17 at least
> for now.
>
> What do you think about the following?

OK for trunk - thanks.

Re: [PATCH 2/3] libstdc++: Implement std::pair/tuple/misc enhancements from P2321R2

2022-08-23 Thread Patrick Palka via Gcc-patches

On Tue, 23 Aug 2022, Jonathan Wakely wrote:

> On Tue, 23 Aug 2022 at 02:36, Patrick Palka via Libstdc++
>  wrote:
> > --- a/libstdc++-v3/include/bits/stl_pair.h
> > +++ b/libstdc++-v3/include/bits/stl_pair.h
> > @@ -212,6 +212,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> > swap(second, __p.second);
> >}
> >
> > +#if __cplusplus > 202002L
> > +  /// Swap the first members and then the second members.
> > +  constexpr void
> > +  swap(const pair& __p) const
> > +  noexcept(__and_<__is_nothrow_swappable,
> > + __is_nothrow_swappable>::value)
> 
> This could use __and_v (which is just __and_::value today, but could
> theoretically be optimized to use a requires expression and avoid
> instantiating __and_ one day).
> 
> Is consistency with the C++11 overload more important? I *hope* we
> won't need to make many changes to these noexcept-specifiers, so the
> maintenance burden of using __ad_::value in one and __and_v in the
> other shouldn't be too high.

Makes sense.

> 
> > @@ -710,6 +792,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >  noexcept(noexcept(__x.swap(__y)))
> >  { __x.swap(__y); }
> >
> > +#if __cplusplus > 202002L
> > +  template
> > +requires is_swappable::value && is_swappable > _T2>::value
> 
> is_swappable_v instead of ::value here ... this is already using a
> requires-clause and so is substantially different to the old overload
> anyway.
> 
> 
> 
> > +
> >// tuple swap
> >_GLIBCXX20_CONSTEXPR
> >void
> >swap(tuple& __in)
> >noexcept(__and_<__is_nothrow_swappable<_Elements>...>::value)
> >{ _Inherited::_M_swap(__in); }
> > +
> > +#if __cplusplus > 202002L
> > +  constexpr void
> > +  swap(const tuple& __in) const
> > +  noexcept(__and_<__is_nothrow_swappable...>::value)
> 
> __and_v ?
> 
> 
> 
> 
> >_GLIBCXX20_CONSTEXPR
> >void
> >swap(tuple& __in)
> >noexcept(__and_<__is_nothrow_swappable<_T1>,
> >   __is_nothrow_swappable<_T2>>::value)
> >{ _Inherited::_M_swap(__in); }
> > +
> > +#if __cplusplus > 202002L
> > +  constexpr void
> > +  swap(const tuple& __in) const
> > +  noexcept(__and_<__is_nothrow_swappable,
> > + __is_nothrow_swappable>::value)
> 
> __and_v ?
> 
> 
> Thanks for doing this, those changes looked tedious to implement and test!
> 
> If you agree with the suggestions to use _v variable templates, this
> is OK for trunk with those changes. I am willing to be persuaded to
> not use the variable templates if there's a good reason I've missed.

Agreed on all points!  Thanks a lot.

Re: [PATCH v2] ipa-visibility: Optimize TLS access [PR99619]

2022-08-23 Thread Alexander Monakov via Gcc-patches

Ping^3.

On Fri, 5 Aug 2022, Alexander Monakov wrote:

> Ping^2.
> 
> On Wed, 20 Jul 2022, Alexander Monakov wrote:
> 
> > 
> > Ping.
> > 
> > On Thu, 7 Jul 2022, Alexander Monakov via Gcc-patches wrote:
> > 
> > > From: Artem Klimov 
> > > 
> > > Fix PR99619, which asks to optimize TLS model based on visibility.
> > > The fix is implemented as an IPA optimization: this allows to take
> > > optimized visibility status into account (as well as avoid modifying
> > > all language frontends).
> > > 
> > > 2022-04-17  Artem Klimov  
> > > 
> > > gcc/ChangeLog:
> > > 
> > >   * ipa-visibility.cc (function_and_variable_visibility): Promote
> > >   TLS access model afer visibility optimizations.
> > >   * varasm.cc (have_optimized_refs): New helper.
> > >   (optimize_dyn_tls_for_decl_p): New helper. Use it ...
> > >   (decl_default_tls_model): ... here in place of 'optimize' check.
> > > 
> > > gcc/testsuite/ChangeLog:
> > > 
> > >   * gcc.dg/tls/vis-attr-gd.c: New test.
> > >   * gcc.dg/tls/vis-attr-hidden-gd.c: New test.
> > >   * gcc.dg/tls/vis-attr-hidden.c: New test.
> > >   * gcc.dg/tls/vis-flag-hidden-gd.c: New test.
> > >   * gcc.dg/tls/vis-flag-hidden.c: New test.
> > >   * gcc.dg/tls/vis-pragma-hidden-gd.c: New test.
> > >   * gcc.dg/tls/vis-pragma-hidden.c: New test.
> > > 
> > > Co-Authored-By:  Alexander Monakov  
> > > Signed-off-by: Artem Klimov 
> > > ---
> > > 
> > > v2: run the new loop in ipa-visibility only in the whole-program IPA pass;
> > > in decl_default_tls_model, check if any referring function is 
> > > optimized
> > > when 'optimize == 0' (when running in LTO mode)
> > > 
> > > 
> > > Note for reviewers: I noticed there's a place which tries to avoid TLS
> > > promotion, but the comment seems wrong and I could not find a testcase.
> > > I'd suggest we remove it. The compiler can only promote general-dynamic
> > > to local-dynamic and initial-exec to local-exec. The comment refers to
> > > promoting x-dynamic to y-exec, but that cannot happen AFAICT:
> > > https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=8e1ba78f1b8eedd6c65c6f0e6d6d09a801de5d3d
> > > 
> > > 
> > >  gcc/ipa-visibility.cc | 19 +++
> > >  gcc/testsuite/gcc.dg/tls/vis-attr-gd.c| 12 +++
> > >  gcc/testsuite/gcc.dg/tls/vis-attr-hidden-gd.c | 13 
> > >  gcc/testsuite/gcc.dg/tls/vis-attr-hidden.c| 12 +++
> > >  gcc/testsuite/gcc.dg/tls/vis-flag-hidden-gd.c | 13 
> > >  gcc/testsuite/gcc.dg/tls/vis-flag-hidden.c| 12 +++
> > >  .../gcc.dg/tls/vis-pragma-hidden-gd.c | 17 ++
> > >  gcc/testsuite/gcc.dg/tls/vis-pragma-hidden.c  | 16 ++
> > >  gcc/varasm.cc | 32 ++-
> > >  9 files changed, 145 insertions(+), 1 deletion(-)
> > >  create mode 100644 gcc/testsuite/gcc.dg/tls/vis-attr-gd.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tls/vis-attr-hidden-gd.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tls/vis-attr-hidden.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tls/vis-flag-hidden-gd.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tls/vis-flag-hidden.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tls/vis-pragma-hidden-gd.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tls/vis-pragma-hidden.c
> > > 
> > > diff --git a/gcc/ipa-visibility.cc b/gcc/ipa-visibility.cc
> > > index 8a27e7bcd..3ed2b7cf6 100644
> > > --- a/gcc/ipa-visibility.cc
> > > +++ b/gcc/ipa-visibility.cc
> > > @@ -873,6 +873,25 @@ function_and_variable_visibility (bool whole_program)
> > >   }
> > >  }
> > >  
> > > +  if (symtab->state >= IPA_SSA)
> > > +{
> > > +  FOR_EACH_VARIABLE (vnode)
> > > + {
> > > +   tree decl = vnode->decl;
> > > +
> > > +   /* Upgrade TLS access model based on optimized visibility status,
> > > +  unless it was specified explicitly or no references remain.  */
> > > +   if (DECL_THREAD_LOCAL_P (decl)
> > > +   && !lookup_attribute ("tls_model", DECL_ATTRIBUTES (decl))
> > > +   && vnode->ref_list.referring.length ())
> > > + {
> > > +   enum tls_model new_model = decl_default_tls_model (decl);
> > > +   gcc_checking_assert (new_model >= decl_tls_model (decl));
> > > +   set_decl_tls_model (decl, new_model);
> > > + }
> > > + }
> > > +}
> > > +
> > >if (dump_file)
> > >  {
> > >fprintf (dump_file, "\nMarking local functions:");
> > > diff --git a/gcc/varasm.cc b/gcc/varasm.cc
> > > index 4db8506b1..de149e82c 100644
> > > --- a/gcc/varasm.cc
> > > +++ b/gcc/varasm.cc
> > > @@ -6679,6 +6679,36 @@ init_varasm_once (void)
> > >  #endif
> > >  }
> > >  
> > > +/* Determine whether SYMBOL is used in any optimized function.  */
> > > +
> > > +static bool
> > > +have_optimized_refs (struct symtab_node *symbol)
> > > +{
> > > +  struct ipa_ref *ref;
> > > +
> > > +  for (int i = 0; symbol->iterate_referring (i, ref); i++)
> > > +{
> > > +  cgraph_node *cnode = dyn_cast  (ref->referring);
> > > +
> > > +  if (

Re: [PATCH] Speedup path discovery in predicate::use_cannot_happen

2022-08-23 Thread Aldy Hernandez via Gcc-patches

On Tue, Aug 23, 2022 at 2:16 PM Richard Biener  wrote:
>
> The following reverts a hunk from r8-5789-g11ef0b22d68cd1 that
> made compute_control_dep_chain start from function entry rather
> than the immediate dominator of the source block of the edge with
> the undefined value on the PHI node.  Reverting at that point
> does not reveal any testsuite FAIL, in particular the added
> testcase still passes.  The following adjusts this to the other
> function that computes predicates that hold on the PHI incoming
> edges with undefined values, predicate::init_from_phi_def, which
> starts at the immediate dominator of the PHI.  That's much less
> likely to run into the CFG walking limit.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> Aldy - you did this change, do you remember anything here?  In
> fact the whole thing that's now called predicate::use_cannot_happen
> seems to be redundant - the two testcases attributed to its history
> do not fail when that's disabled, nor did they fail when it was
> introduced.  In principle whats now called predicate::superset_of
> should cover this (but different implementation limits might apply).

OMG, I'm drawing a complete blank here.  I have no recollection of
this.  I'm tempted to say either my account was hacked or that old
code was all wrong ;-).

Sorry.
Aldy

[committed] libstdc++: Fix visit(v) for non-void visitors [PR106589]

2022-08-23 Thread Jonathan Wakely via Gcc-patches

Tested powerpc64le-linux, pushed to trunk. Backport to gcc-12 needed.

-- >8 --

The optimization for the common case of std::visit forgot to handle the
edge case of passing zero variants to a non-void visitor and converting
the result to void.

libstdc++-v3/ChangeLog:

PR libstdc++/106589
* include/std/variant (__do_visit): Handle is_void for zero
argument case.
* testsuite/20_util/variant/visit_r.cc: Check std::visit(v).
---
 libstdc++-v3/include/std/variant  | 7 ++-
 libstdc++-v3/testsuite/20_util/variant/visit_r.cc | 8 
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index f8f15665433..c234b54421e 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -1728,7 +1728,12 @@ namespace __variant
 {
   // Get the silly case of visiting no variants out of the way first.
   if constexpr (sizeof...(_Variants) == 0)
-   return std::forward<_Visitor>(__visitor)();
+   {
+ if constexpr (is_void_v<_Result_type>)
+   return (void) std::forward<_Visitor>(__visitor)();
+ else
+   return std::forward<_Visitor>(__visitor)();
+   }
   else
{
  constexpr size_t __max = 11; // "These go to eleven."
diff --git a/libstdc++-v3/testsuite/20_util/variant/visit_r.cc 
b/libstdc++-v3/testsuite/20_util/variant/visit_r.cc
index 712459f25e3..c77b259c386 100644
--- a/libstdc++-v3/testsuite/20_util/variant/visit_r.cc
+++ b/libstdc++-v3/testsuite/20_util/variant/visit_r.cc
@@ -54,10 +54,18 @@ void test02()
   std::visit(Visitor(), v);
 }
 
+void test03()
+{
+  // PR libstdc++/106589 - visit rejects lambdas that do not return void
+  auto visitor = []{ return 0; };
+  std::visit(visitor);
+  std::visit(static_cast(visitor));
+}
 
 int
 main()
 {
   test01();
   test02();
+  test03();
 }
-- 
2.37.2

New template for 'gcc' made available

2022-08-23 Thread Translation Project Robot

Hello, gentle maintainer.

This is a message from the Translation Project robot.  (If you have
any questions, send them to .)

A new POT file for textual domain 'gcc' has been made available
to the language teams for translation.  It is archived as:

https://translationproject.org/POT-files/gcc-12.2.0.pot

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

Below is the URL which has been provided to the translators of your
package.  Please inform the translation coordinator, at the address
at the bottom, if this information is not current:

https://gcc.gnu.org/pub/gcc/releases/gcc-12.2.0/gcc-12.2.0.tar.xz

Translated PO files will later be automatically e-mailed to you.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.

[PATCH v4] RISC-V: Add support for inlining subword atomic operations

2022-08-23 Thread Palmer Dabbelt

From: Patrick O'Neill 

RISC-V has no support for subword atomic operations; code currently
generates libatomic library calls.

This patch changes the default behavior to inline subword atomic calls
(using the same logic as the existing library call).
Behavior can be specified using the -minline-atomics and
-mno-inline-atomics command line flags.

gcc/libgcc/config/riscv/atomic.c has the same logic implemented in asm.
This will need to stay for backwards compatibility and the
-mno-inline-atomics flag.

2022-04-19 Patrick O'Neill 

PR target/104338
* riscv-protos.h: Add helper function stubs.
* riscv.cc: Add helper functions for subword masking.
* riscv.opt: Add command-line flag.
* sync.md: Add masking logic and inline asm for fetch_and_op,
fetch_and_nand, CAS, and exchange ops.
* invoke.texi: Add blurb regarding command-line flag.
* inline-atomics-1.c: New test.
* inline-atomics-2.c: Likewise.
* inline-atomics-3.c: Likewise.
* inline-atomics-4.c: Likewise.
* inline-atomics-5.c: Likewise.
* inline-atomics-6.c: Likewise.
* inline-atomics-7.c: Likewise.
* inline-atomics-8.c: Likewise.
* atomic.c: Add reference to duplicate logic.

Signed-off-by: Patrick O'Neill 
Signed-off-by: Palmer Dabbelt 
---
This is pretty much the same as the v3, I just rebased it onto trunk and
adjusted the documentation to mention libatomic.  I think there's still
room to do a more optimized implemnetation here, but we still need to
sort out the memory model implications.  Given that this is causing a
headache for the distro folks I think it's best to just take the simple
version now rather than waiting for something better -- we can always do
something better, it's just probably going to miss the next release.

This alone is pretty safe: we're just inlining the same code sequences
that libatomic generates, and since we provide static libatomic that
doesn't add to any future binary compatibility issues.

This has no new failures on trunk.  I also backported it to gcc-10
(as that's what my buildroot is using, which itself is a problem I need
to fix) and it can build/boot a simple glibc-based userspace fine.

OK for trunk?
---
 gcc/config/riscv/riscv-protos.h   |   2 +
 gcc/config/riscv/riscv.cc |  52 ++
 gcc/config/riscv/riscv.opt|   4 +
 gcc/config/riscv/sync.md  | 318 ++
 gcc/doc/invoke.texi   |   8 +
 .../gcc.target/riscv/inline-atomics-1.c   |  18 +
 .../gcc.target/riscv/inline-atomics-2.c   |  19 +
 .../gcc.target/riscv/inline-atomics-3.c   | 569 ++
 .../gcc.target/riscv/inline-atomics-4.c   | 566 +
 .../gcc.target/riscv/inline-atomics-5.c   |  87 +++
 .../gcc.target/riscv/inline-atomics-6.c   |  87 +++
 .../gcc.target/riscv/inline-atomics-7.c   |  69 +++
 .../gcc.target/riscv/inline-atomics-8.c   |  69 +++
 libgcc/config/riscv/atomic.c  |   2 +
 14 files changed, 1870 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-8.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 20c2381c21a..14f3c8f0d4e 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -74,6 +74,8 @@ extern bool riscv_expand_block_move (rtx, rtx, rtx);
 extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *);
 extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *);
 extern bool riscv_gpr_save_operation_p (rtx);
+extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *);
+extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);
 
 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 9d70974c893..1d8730d80e9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5791,6 +5791,58 @@ riscv_init_libfuncs (void)
   set_optab_libfunc (unord_optab, HFmode, NULL);
 }
 
+/* Helper function for extracting a subword from memory.  */
+
+void
+riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
+  rtx *not_mask)
+{
+  /* Align the memory addess to a word.  */
+  rtx addr = force_reg (Pmode, XEXP (mem, 0));
+
+  rtx aligned_addr = gen_reg_rtx (Pmode);
+  emit_move_insn (aligned_addr,  gen_rtx_AND (Pmo

[PATCH] i386: avoid zero extension for crc32q

2022-08-23 Thread Alexander Monakov via Gcc-patches

The crc32q instruction takes 64-bit operands, but ignores high 32 bits
of the destination operand, and zero-extends the result from 32 bits.

Let's model this in the RTL pattern to avoid zero-extension when the
_mm_crc32_u64 intrinsic is used with a 32-bit type.

PR target/106453

gcc/ChangeLog:

* config/i386/i386.md (sse4_2_crc32di): Model that only low 32
bits of operand 0 are consumed, and the result is zero-extended
to 64 bits.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr106453.c: New test.
---
 gcc/config/i386/i386.md  |  6 +++---
 gcc/testsuite/gcc.target/i386/pr106453.c | 13 +
 2 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106453.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 58fcc382f..b5760bb23 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -23823,10 +23823,10 @@
 
 (define_insn "sse4_2_crc32di"
   [(set (match_operand:DI 0 "register_operand" "=r")
-   (unspec:DI
- [(match_operand:DI 1 "register_operand" "0")
+   (zero_extend:DI (unspec:SI
+ [(match_operand:SI 1 "register_operand" "0")
   (match_operand:DI 2 "nonimmediate_operand" "rm")]
- UNSPEC_CRC32))]
+ UNSPEC_CRC32)))]
   "TARGET_64BIT && TARGET_CRC32"
   "crc32{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog1")
diff --git a/gcc/testsuite/gcc.target/i386/pr106453.c 
b/gcc/testsuite/gcc.target/i386/pr106453.c
new file mode 100644
index 0..bab5b1cb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106453.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-msse4.2 -O2 -fdump-rtl-final" } */
+/* { dg-final { scan-rtl-dump-not "zero_extendsidi" "final" } } */
+
+#include 
+#include 
+
+uint32_t f(uint32_t c, uint64_t *p, size_t n)
+{
+for (size_t i = 0; i < n; i++)
+c = _mm_crc32_u64(c, p[i]);
+return c;
+}
-- 
2.35.1

Re: [PATCH] libstdc++: Optimize operator+(string/char, string/char) equally

2022-08-23 Thread Jonathan Wakely via Gcc-patches

On Mon, 22 Aug 2022 at 19:15, Will Hawkins wrote:
>
> Until now operator+(char*, string) and operator+(string, char*) had
> different performance characteristics. The former required a single
> memory allocation and the latter required two. This patch makes the
> performance equal.

If you don't have a GCC copyright assignment on file with the FSF then
please follow the procedure described at https://gcc.gnu.org/dco.html


> libstdc++-v3/ChangeLog:
> * libstdc++-v3/include/bits/basic_string.h (operator+(string, char*)):
> Remove naive implementation.
> * libstdc++-v3/include/bits/basic_string.tcc (operator+(string, 
> char*)):
> Add single-allocation implementation.
> ---
>  libstdc++-v3/include/bits/basic_string.h   |  7 +--
>  libstdc++-v3/include/bits/basic_string.tcc | 21 +
>  2 files changed, 22 insertions(+), 6 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/basic_string.h 
> b/libstdc++-v3/include/bits/basic_string.h
> index b04fba95678..bc048fc0689 100644
> --- a/libstdc++-v3/include/bits/basic_string.h
> +++ b/libstdc++-v3/include/bits/basic_string.h
> @@ -3523,12 +3523,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
>  _GLIBCXX20_CONSTEXPR
>  inline basic_string<_CharT, _Traits, _Alloc>

Please remove the 'inline' specifier here, since you're moving the
definition into the non-inline .tcc file.

There's a separate discussion to be had about whether these operator+
overloads *should* be inline. But for the purposes of this change, we
want these two operator+ overloads to be consistent, and so they
should both be non-inline.

>  operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
> - const _CharT* __rhs)
> -{
> -  basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
> -  __str.append(__rhs);
> -  return __str;
> -}
> + const _CharT* __rhs);
>
>/**
> *  @brief  Concatenate string and character.
> diff --git a/libstdc++-v3/include/bits/basic_string.tcc 
> b/libstdc++-v3/include/bits/basic_string.tcc
> index 4563c61429a..95ba8e503e9 100644
> --- a/libstdc++-v3/include/bits/basic_string.tcc
> +++ b/libstdc++-v3/include/bits/basic_string.tcc
> @@ -640,6 +640,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>return __str;
>  }
>
> +  template
> +_GLIBCXX20_CONSTEXPR
> +basic_string<_CharT, _Traits, _Alloc>
> +operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
> + const _CharT* __rhs)
> +{
> +  __glibcxx_requires_string(__rhs);
> +  typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
> +  typedef typename __string_type::size_type  __size_type;
> +  typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
> +   rebind<_CharT>::other _Char_alloc_type;
> +  typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
> +  const __size_type __len = _Traits::length(__rhs);
> +  __string_type __str(_Alloc_traits::_S_select_on_copy(
> +  __lhs.get_allocator()));
> +  __str.reserve(__len + __lhs.size());
> +  __str.append(__lhs);
> +  __str.append(__rhs, __len);
> +  return __str;
> +}
> +
>template
>  _GLIBCXX_STRING_CONSTEXPR
>  typename basic_string<_CharT, _Traits, _Alloc>::size_type
> --
> 2.34.1
>

[PATCH v4] c++: Implement -Wself-move warning [PR81159]

2022-08-23 Thread Marek Polacek via Gcc-patches

On Sat, Aug 20, 2022 at 05:31:52PM -0400, Jason Merrill wrote:
> On 8/19/22 15:34, Marek Polacek wrote:
> > On Thu, Aug 18, 2022 at 08:33:47PM -0400, Jason Merrill wrote:
> > > On 8/18/22 13:19, Marek Polacek wrote:
> > > > On Mon, Aug 15, 2022 at 03:54:05PM -0400, Jason Merrill wrote:
> > > > > On 8/9/22 09:37, Marek Polacek wrote:
> > > > > > +  /* We're looking for *std::move ((T &) &arg), or
> > > > > > + *std::move ((T &) (T *) r) if the argument it a 
> > > > > > reference.  */
> > > > > > +  if (!REFERENCE_REF_P (rhs)
> > > > > > +  || TREE_CODE (TREE_OPERAND (rhs, 0)) != CALL_EXPR)
> > > > > > +return;
> > > > > > +  tree fn = TREE_OPERAND (rhs, 0);
> > > > > > +  if (!is_std_move_p (fn))
> > > > > > +return;
> > > > > > +  tree arg = CALL_EXPR_ARG (fn, 0);
> > > > > > +  if (TREE_CODE (arg) != NOP_EXPR)
> > > > > > +return;
> > > > > > +  /* Strip the (T &).  */
> > > > > > +  arg = TREE_OPERAND (arg, 0);
> > > > > > +  /* Strip the (T *) or &.  */
> > > > > > +  arg = TREE_OPERAND (arg, 0);
> > > > > 
> > > > > Are you sure these are the only two expressions that can make it 
> > > > > here? What
> > > > > if the argument to move is *Tptr?
> > > > 
> > > > Not 100% sure but I couldn't find any other form.  For *Tptr we get
> > > > *std::move ((int * &) &Tptr)
> > > 
> > > That likes like what you'd get when the argument is Tptr, not when it's
> > > *Tptr.  And indeed that's what I see in the testcase:
> > > 
> > > > +  Tptr = std::move (Tptr); // { dg-warning "moving a variable to 
> > > > itself" }
> > > 
> > > is missing the *
> > 
> > Duh, sorry.  The previous patch didn't handle the *Tptr case.  Further 
> > poking
> > revealed that we need special care to handle (*(Tptr)) and **Tptr etc.  So 
> > in
> > this patch I'm stripping all *s and V_C_Es.  Sigh.
> 
> Ah, I was just thinking that the old patch needed more checking of
> TREE_CODEs.  But I suppose it's also good to diagnose these cases as well.

Probably.  I notice that clang++ doesn't detect it, but that doesn't mean
we can't.
 
> > +  if (cp_tree_equal (lhs, arg))
> > +{
> > +  auto_diagnostic_group d;
> > +  if (warning_at (loc, OPT_Wself_move, "moving a variable to itself"))
> 
> Maybe only say "variable" when the original operand is in fact a variable?

Fixed by the print_var_p thingie I introduced.  I saw that clang++ points
out the type so I'm printing it too in this patch.
 
> > +// Test various other arguments to std::move.
> > +template
> > +void
> > +testargs (T *Tptr, T **Tpptr, T& Tref, T&& Trref, const T *Tcptr)
> > +{
> > +  Tptr = std::move (Tptr); // { dg-warning "moving a variable to itself" }
> > +  *Tptr = std::move (*Tptr); // { dg-warning "moving a variable to itself" 
> > }
> > +  *Tptr = std::move (*(Tptr)); // { dg-warning "moving a variable to 
> > itself" }
> > +  *(Tptr) = std::move (*Tptr); // { dg-warning "moving a variable to 
> > itself" }
> > +  *(Tptr + 1) = std::move (*Tptr + 1);
> > +  *(Tptr + 1) = std::move (*Tptr + 2);
> 
> Do you mean to have *(Tptr + 1) on both sides?  The difference in * operand
> is surprising.

Definitely not intended, good spotting!  Fixed (we emit a warning for the
+1 case but not the +2 case -- good).

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
About 5 years ago we got a request to implement -Wself-move, which
warns about useless moves like this:

  int x;
  x = std::move (x);

This patch implements that warning.

PR c++/81159

gcc/c-family/ChangeLog:

* c.opt (Wself-move): New option.

gcc/cp/ChangeLog:

* typeck.cc (maybe_warn_self_move): New.
(cp_build_modify_expr): Call maybe_warn_self_move.

gcc/ChangeLog:

* doc/invoke.texi: Document -Wself-move.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wself-move1.C: New test.
---
 gcc/c-family/c.opt  |   4 +
 gcc/cp/typeck.cc|  61 +++-
 gcc/doc/invoke.texi |  23 -
 gcc/testsuite/g++.dg/warn/Wself-move1.C | 125 
 4 files changed, 211 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/warn/Wself-move1.C

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index dfdebd596ef..f776efd39d8 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1229,6 +1229,10 @@ Wselector
 ObjC ObjC++ Var(warn_selector) Warning
 Warn if a selector has multiple methods.
 
+Wself-move
+C++ ObjC++ Var(warn_self_move) Warning LangEnabledBy(C++ ObjC++, Wall)
+Warn when a value is moved to itself with std::move.
+
 Wsequence-point
 C ObjC C++ ObjC++ Var(warn_sequence_point) Warning LangEnabledBy(C ObjC C++ 
ObjC++,Wall)
 Warn about possible violations of sequence point rules.
diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 992ebfd99fb..194c403bd2b 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -8897,7 +8897,64 @@ cp_build_c_cast (location_t loc, tree type, tree expr,
 
   return error_mark_node;
 }
-
+
+/* Warn wh

New German PO file for 'gcc' (version 12.2.0)

2022-08-23 Thread Translation Project Robot

Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'gcc' has been submitted
by the German team of translators.  The file is available at:

https://translationproject.org/latest/gcc/de.po

(This file, 'gcc-12.2.0.de.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

https://translationproject.org/latest/gcc/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

https://translationproject.org/domain/gcc.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.

Re: Patch ping (Re: [PATCH] Implement __builtin_issignaling)

2022-08-23 Thread Joseph Myers

On Tue, 23 Aug 2022, Jakub Jelinek via Gcc-patches wrote:

> On Tue, Aug 16, 2022 at 11:41:06AM +, Richard Biener wrote:
> > I'm OK with the rest of the patch if Joseph doesn't have comments
> > on the actual issignaling lowerings (which I didn't review for
> > correctness due to lack of knowledge).
> 
> I'd like to ping this patch.
> Patch: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599697.html
> with
> https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599794.html
> https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599800.html
> incremental additions.

One testsuite comment:

> +#ifdef __SIZEOF_FLOAT128__
> +  if (f7 (w) || !f7 (__builtin_nansf128 ("0x123")) || f7 (42.0Q) || f7 
> (__builtin_nanf128 ("0x234"))
> +  || f7 (__builtin_inff128 ()) || f7 (-__builtin_inff128 ()) || f7 
> (-42.0Q) || f7 (-0.0Q) || f7 (0.0Q))
> +__builtin_abort ();

__SIZEOF_FLOAT128__ is a target-specific macro for two targets.  I think 
it's better to model things on the existing _FloatN and _FloatNx tests, so 
have such a test for each such type, preferably with most of the test 
implementation type-generic (see e.g. floatn-builtin.h) and then with the 
individual tests including e.g.

/* { dg-add-options float128 } */
/* { dg-require-effective-target float128_runtime } */

to enable and test for the relevant support.  That would mean _Float128 
gets tested wherever supported - and also that the support is properly 
tested for _Float16, which doesn't look like it's covered by the tests in 
this patch at all at present.

-- 
Joseph S. Myers
jos...@codesourcery.com

[PATCH] v2: Implement __builtin_issignaling

2022-08-23 Thread Jakub Jelinek via Gcc-patches

On Tue, Aug 23, 2022 at 06:23:23PM +, Joseph Myers wrote:
> On Tue, 23 Aug 2022, Jakub Jelinek via Gcc-patches wrote:
> 
> > On Tue, Aug 16, 2022 at 11:41:06AM +, Richard Biener wrote:
> > > I'm OK with the rest of the patch if Joseph doesn't have comments
> > > on the actual issignaling lowerings (which I didn't review for
> > > correctness due to lack of knowledge).
> > 
> > I'd like to ping this patch.
> > Patch: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599697.html
> > with
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599794.html
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599800.html
> > incremental additions.
> 
> One testsuite comment:
> 
> > +#ifdef __SIZEOF_FLOAT128__
> > +  if (f7 (w) || !f7 (__builtin_nansf128 ("0x123")) || f7 (42.0Q) || f7 
> > (__builtin_nanf128 ("0x234"))
> > +  || f7 (__builtin_inff128 ()) || f7 (-__builtin_inff128 ()) || f7 
> > (-42.0Q) || f7 (-0.0Q) || f7 (0.0Q))
> > +__builtin_abort ();
> 
> __SIZEOF_FLOAT128__ is a target-specific macro for two targets.  I think 
> it's better to model things on the existing _FloatN and _FloatNx tests, so 
> have such a test for each such type, preferably with most of the test 
> implementation type-generic (see e.g. floatn-builtin.h) and then with the 
> individual tests including e.g.
> 
> /* { dg-add-options float128 } */
> /* { dg-require-effective-target float128_runtime } */
> 
> to enable and test for the relevant support.  That would mean _Float128 
> gets tested wherever supported - and also that the support is properly 
> tested for _Float16, which doesn't look like it's covered by the tests in 
> this patch at all at present.

Ok, implemented in the updated patch.
Tested with
make check-gcc RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} 
dg-torture.exp=*builtin-issignaling* i386.exp=*builtin-issignaling*'
ok if it passes full bootstrap/regtest?

2022-08-23  Jakub Jelinek  

gcc/
* builtins.def (BUILT_IN_ISSIGNALING): New built-in.
* builtins.cc (expand_builtin_issignaling): New function.
(expand_builtin_signbit): Don't overwrite target.
(expand_builtin): Handle BUILT_IN_ISSIGNALING.
(fold_builtin_classify): Likewise.
(fold_builtin_1): Likewise.
* optabs.def (issignaling_optab): New.
* fold-const-call.cc (fold_const_call_ss): Handle
BUILT_IN_ISSIGNALING.
* config/i386/i386.md (issignalingxf2): New expander.
* doc/extend.texi (__builtin_issignaling): Document.
(__builtin_isinf, __builtin_isnan): Clarify behavior with
-ffinite-math-only.
* doc/md.texi (issignaling2): Likewise.
gcc/c-family/
* c-common.cc (check_builtin_function_arguments): Handle
BUILT_IN_ISSIGNALING.
gcc/c/
* c-typeck.cc (convert_arguments): Handle BUILT_IN_ISSIGNALING.
gcc/fortran/
* f95-lang.cc (gfc_init_builtin_functions): Initialize
BUILT_IN_ISSIGNALING.
gcc/testsuite/
* gcc.dg/torture/builtin-issignaling-1.c: New test.
* gcc.dg/torture/builtin-issignaling-2.c: New test.
* gcc.dg/torture/float16-builtin-issignaling-1.c: New test.
* gcc.dg/torture/float32-builtin-issignaling-1.c: New test.
* gcc.dg/torture/float32x-builtin-issignaling-1.c: New test.
* gcc.dg/torture/float64-builtin-issignaling-1.c: New test.
* gcc.dg/torture/float64x-builtin-issignaling-1.c: New test.
* gcc.dg/torture/float128-builtin-issignaling-1.c: New test.
* gcc.dg/torture/float128x-builtin-issignaling-1.c: New test.
* gcc.target/i386/builtin-issignaling-1.c: New test.

--- gcc/builtins.def.jj 2022-08-17 16:58:47.666820640 +0200
+++ gcc/builtins.def2022-08-23 20:59:35.286564045 +0200
@@ -908,6 +908,7 @@ DEF_GCC_BUILTIN(BUILT_IN_ISLESS,
 DEF_GCC_BUILTIN(BUILT_IN_ISLESSEQUAL, "islessequal", BT_FN_INT_VAR, 
ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
 DEF_GCC_BUILTIN(BUILT_IN_ISLESSGREATER, "islessgreater", 
BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
 DEF_GCC_BUILTIN(BUILT_IN_ISUNORDERED, "isunordered", BT_FN_INT_VAR, 
ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
+DEF_GCC_BUILTIN(BUILT_IN_ISSIGNALING, "issignaling", BT_FN_INT_VAR, 
ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
 DEF_LIB_BUILTIN(BUILT_IN_LABS, "labs", BT_FN_LONG_LONG, 
ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_C99_BUILTIN(BUILT_IN_LLABS, "llabs", BT_FN_LONGLONG_LONGLONG, 
ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN(BUILT_IN_LONGJMP, "longjmp", BT_FN_VOID_PTR_INT, 
ATTR_NORETURN_NOTHROW_LIST)
--- gcc/builtins.cc.jj  2022-08-17 16:58:47.637821012 +0200
+++ gcc/builtins.cc 2022-08-23 20:59:35.287564032 +0200
@@ -123,6 +123,7 @@ static rtx expand_builtin_fegetround (tr
 static rtx expand_builtin_feclear_feraise_except (tree, rtx, machine_mode,
  optab);
 static rtx expand_builtin_cexpi (tree, rtx);
+static rtx expand_builtin_issignaling (tree, rtx)

[PATCH] Fortran: improve error recovery while simplifying size of bad array [PR103694]

2022-08-23 Thread Harald Anlauf via Gcc-patches

Dear all,

the simplification of the size of an array with a bad decl
should not be successful.  Improve the error recovery for
such a situation.

The patch is nearly obvious.  I therefore intend to commit
it in the next few days unless someone comes up with a
better solution.

Regtested on x86_64-pc-linux-gnu.

The PR is marked as a 12/13 regression.  Will therefore
commit to both.

Thanks,
Harald

From d306c0b171e502e3c87b92b6bc63b532f734e754 Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Tue, 23 Aug 2022 22:16:14 +0200
Subject: [PATCH] Fortran: improve error recovery while simplifying size of bad
 array [PR103694]

gcc/fortran/ChangeLog:

	PR fortran/103694
	* simplify.cc (simplify_size): The size expression of an array cannot
	be simplified if an error occurs while resolving the array spec.

gcc/testsuite/ChangeLog:

	PR fortran/103694
	* gfortran.dg/pr103694.f90: New test.
---
 gcc/fortran/simplify.cc|  5 +++--
 gcc/testsuite/gfortran.dg/pr103694.f90 | 11 +++
 2 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/pr103694.f90

diff --git a/gcc/fortran/simplify.cc b/gcc/fortran/simplify.cc
index f992c31e5d7..bc178d54891 100644
--- a/gcc/fortran/simplify.cc
+++ b/gcc/fortran/simplify.cc
@@ -7536,8 +7536,9 @@ simplify_size (gfc_expr *array, gfc_expr *dim, int k)
 }

   for (ref = array->ref; ref; ref = ref->next)
-if (ref->type == REF_ARRAY && ref->u.ar.as)
-  gfc_resolve_array_spec (ref->u.ar.as, 0);
+if (ref->type == REF_ARRAY && ref->u.ar.as
+	&& !gfc_resolve_array_spec (ref->u.ar.as, 0))
+  return NULL;

   if (dim == NULL)
 {
diff --git a/gcc/testsuite/gfortran.dg/pr103694.f90 b/gcc/testsuite/gfortran.dg/pr103694.f90
new file mode 100644
index 000..3ed8b2088da
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr103694.f90
@@ -0,0 +1,11 @@
+! { dg-do compile }
+! PR fortran/103694 - ICE in gfc_conv_expr_op
+! Contributed by G.Steinmetz
+
+subroutine s
+  type t
+ integer :: a(2)
+  end type
+  type(t) :: x((0.)/0)
+  integer :: n = size(x(1)%a) ! { dg-error "does not reduce to a constant expression" }
+end
--
2.35.3

[PATCH] x86: Replace vmovdqu with movdqu in BF16 XMM ABI tests

2022-08-23 Thread H.J. Lu via Gcc-patches

I am checking in this as an obvious fix.

H.J.
---
Since XMM BF16 tests only require SSE2, replace vmovdqu with movdqu in
BF16 XMM ABI tests to support SSE2 machines without AVX.

Tested on x86-64 machines with and without AVX.

* gcc.target/x86_64/abi/bf16/asm-support.S: Replace vmovdqu with
movdqu.
---
 .../gcc.target/x86_64/abi/bf16/asm-support.S  | 36 +--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S 
b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
index a8165d86317..7559aa910c4 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
@@ -20,22 +20,22 @@ snapshot:
movq%r13, r13(%rip)
movq%r14, r14(%rip)
movq%r15, r15(%rip)
-   vmovdqu %xmm0, xmm_regs+0(%rip)
-   vmovdqu %xmm1, xmm_regs+16(%rip)
-   vmovdqu %xmm2, xmm_regs+32(%rip)
-   vmovdqu %xmm3, xmm_regs+48(%rip)
-   vmovdqu %xmm4, xmm_regs+64(%rip)
-   vmovdqu %xmm5, xmm_regs+80(%rip)
-   vmovdqu %xmm6, xmm_regs+96(%rip)
-   vmovdqu %xmm7, xmm_regs+112(%rip)
-   vmovdqu %xmm8, xmm_regs+128(%rip)
-   vmovdqu %xmm9, xmm_regs+144(%rip)
-   vmovdqu %xmm10, xmm_regs+160(%rip)
-   vmovdqu %xmm11, xmm_regs+176(%rip)
-   vmovdqu %xmm12, xmm_regs+192(%rip)
-   vmovdqu %xmm13, xmm_regs+208(%rip)
-   vmovdqu %xmm14, xmm_regs+224(%rip)
-   vmovdqu %xmm15, xmm_regs+240(%rip)
+   movdqu  %xmm0, xmm_regs+0(%rip)
+   movdqu  %xmm1, xmm_regs+16(%rip)
+   movdqu  %xmm2, xmm_regs+32(%rip)
+   movdqu  %xmm3, xmm_regs+48(%rip)
+   movdqu  %xmm4, xmm_regs+64(%rip)
+   movdqu  %xmm5, xmm_regs+80(%rip)
+   movdqu  %xmm6, xmm_regs+96(%rip)
+   movdqu  %xmm7, xmm_regs+112(%rip)
+   movdqu  %xmm8, xmm_regs+128(%rip)
+   movdqu  %xmm9, xmm_regs+144(%rip)
+   movdqu  %xmm10, xmm_regs+160(%rip)
+   movdqu  %xmm11, xmm_regs+176(%rip)
+   movdqu  %xmm12, xmm_regs+192(%rip)
+   movdqu  %xmm13, xmm_regs+208(%rip)
+   movdqu  %xmm14, xmm_regs+224(%rip)
+   movdqu  %xmm15, xmm_regs+240(%rip)
jmp *callthis(%rip)
 .LFE3:
.size   snapshot, .-snapshot
@@ -50,8 +50,8 @@ snapshot_ret:
addq$8, %rsp
movq%rax, rax(%rip)
movq%rdx, rdx(%rip)
-   vmovdqu %xmm0, xmm_regs+0(%rip)
-   vmovdqu %xmm1, xmm_regs+16(%rip)
+   movdqu  %xmm0, xmm_regs+0(%rip)
+   movdqu  %xmm1, xmm_regs+16(%rip)
fstpt   x87_regs(%rip)
fstpt   x87_regs+16(%rip)
fldtx87_regs+16(%rip)
-- 
2.37.2

Re: [PATCH] Improve converting between 128-bit modes that use the same format

2022-08-23 Thread Segher Boessenkool

Please do not send new patches as replies to other patches.

On Thu, Aug 18, 2022 at 05:48:29PM -0400, Michael Meissner wrote:
> mprove converting between 128-bit modes that use the same format.

You are missing some characters?  But this is an edited version of the
subject anyway.  Just don't do that (neither the copying or the
editing), it just confuses things.

Please factor this patch into more pieces, pieces that can be reviewed
more easily, pieces that change one thing only.

As is you are just rewriting the lot, and it is not an improvement at
all this way.  No doubt there are many good pieces in it, but mixed with
a non-trivial amount of bad pieces I cannot approve it.  It also isn't
clear at all what you want to do; piece by piece it is easier to
explain.

> -; Iterators for converting to/from TFmode
> -(define_mode_iterator IFKF [IF KF])

Yes, IFmode and KFmode have almost nothing in common.  Good to see this
go.  It would be even better if we would not use
rs6000_expand_float128_convert when not needed, either, and all this
would be just gone after expand.

> +(define_expand "extendkfif2"
> +  [(set (match_operand:IF 0 "gpc_reg_operand")
> + (float_extend:IF (match_operand:KF 1 "gpc_reg_operand")))]
> +  "TARGET_FLOAT128_TYPE"
> +{
> +  rs6000_expand_float128_convert (operands[0], operands[1], false);
> +  DONE;
> +})

This does not belong here.

It really shouldn't *exist* at all: this is *not* a float_extend!  It is
not converting to a wider mode (as required!), but not even to a mode of
higher precision: both IFmode and KFmode can represent (finite, normal)
numbers the other can not.

But it certainly does not belong here in the middle of no-op moves.

> +(define_expand "trunckfif2"
> +  [(set (match_operand:IF 0 "gpc_reg_operand")
> + (float_truncate:IF (match_operand:KF 1 "gpc_reg_operand")))]
> +  "TARGET_FLOAT128_TYPE"
> +{
> +  rs6000_expand_float128_convert (operands[0], operands[1], false);
> +  DONE;
> +})

I also would expect IBM128 instead of just IF.  This would simplify a
lot.  Why do you not use that, is there a reason?

> +;; Convert between KFmode and TFmode when -mabi=ieeelongdouble
> +(define_insn_and_split "*extendkftf2_internal"

Same for IEEE128.  And this isn't a conversion at all (it's a no-op
move), please don't confuse things by saying it is.

> +  [(set_attr "type" "two")
> +   (set_attr "num_insns" "2")])

Btw, that really should never be needed.  insn_type "two" already means
exactly that.

> +(define_insn_and_split "*extendtfif2_internal"
> +  [(set (match_operand:IF 0 "gpc_reg_operand" "=d,&d")
> + (float_extend:IF
> +  (match_operand:TF 1 "input_operand" "0,d")))]
> +   "FLOAT128_IBM_P (TFmode)"
> +  "#"
> +  "&& reload_completed"

Why would this ever need reload_completed?  It is a no-op move!

Segher

Re: [PATCH v4] c++: Implement -Wself-move warning [PR81159]

2022-08-23 Thread Jason Merrill via Gcc-patches


On 8/23/22 09:39, Marek Polacek wrote:

On Sat, Aug 20, 2022 at 05:31:52PM -0400, Jason Merrill wrote:

On 8/19/22 15:34, Marek Polacek wrote:

On Thu, Aug 18, 2022 at 08:33:47PM -0400, Jason Merrill wrote:

On 8/18/22 13:19, Marek Polacek wrote:

On Mon, Aug 15, 2022 at 03:54:05PM -0400, Jason Merrill wrote:

On 8/9/22 09:37, Marek Polacek wrote:

+  /* We're looking for *std::move ((T &) &arg), or
+ *std::move ((T &) (T *) r) if the argument it a reference.  */
+  if (!REFERENCE_REF_P (rhs)
+  || TREE_CODE (TREE_OPERAND (rhs, 0)) != CALL_EXPR)
+return;
+  tree fn = TREE_OPERAND (rhs, 0);
+  if (!is_std_move_p (fn))
+return;
+  tree arg = CALL_EXPR_ARG (fn, 0);
+  if (TREE_CODE (arg) != NOP_EXPR)
+return;
+  /* Strip the (T &).  */
+  arg = TREE_OPERAND (arg, 0);
+  /* Strip the (T *) or &.  */
+  arg = TREE_OPERAND (arg, 0);


Are you sure these are the only two expressions that can make it here? What
if the argument to move is *Tptr?


Not 100% sure but I couldn't find any other form.  For *Tptr we get
*std::move ((int * &) &Tptr)


That likes like what you'd get when the argument is Tptr, not when it's
*Tptr.  And indeed that's what I see in the testcase:


+  Tptr = std::move (Tptr); // { dg-warning "moving a variable to itself" }


is missing the *


Duh, sorry.  The previous patch didn't handle the *Tptr case.  Further poking
revealed that we need special care to handle (*(Tptr)) and **Tptr etc.  So in
this patch I'm stripping all *s and V_C_Es.  Sigh.


Ah, I was just thinking that the old patch needed more checking of
TREE_CODEs.  But I suppose it's also good to diagnose these cases as well.


Probably.  I notice that clang++ doesn't detect it, but that doesn't mean
we can't.
  

+  if (cp_tree_equal (lhs, arg))
+{
+  auto_diagnostic_group d;
+  if (warning_at (loc, OPT_Wself_move, "moving a variable to itself"))


Maybe only say "variable" when the original operand is in fact a variable?


Fixed by the print_var_p thingie I introduced.  I saw that clang++ points
out the type so I'm printing it too in this patch.
  

+// Test various other arguments to std::move.
+template
+void
+testargs (T *Tptr, T **Tpptr, T& Tref, T&& Trref, const T *Tcptr)
+{
+  Tptr = std::move (Tptr); // { dg-warning "moving a variable to itself" }
+  *Tptr = std::move (*Tptr); // { dg-warning "moving a variable to itself" }
+  *Tptr = std::move (*(Tptr)); // { dg-warning "moving a variable to itself" }
+  *(Tptr) = std::move (*Tptr); // { dg-warning "moving a variable to itself" }
+  *(Tptr + 1) = std::move (*Tptr + 1);
+  *(Tptr + 1) = std::move (*Tptr + 2);


Do you mean to have *(Tptr + 1) on both sides?  The difference in * operand
is surprising.


Definitely not intended, good spotting!  Fixed (we emit a warning for the
+1 case but not the +2 case -- good).

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
About 5 years ago we got a request to implement -Wself-move, which
warns about useless moves like this:

   int x;
   x = std::move (x);

This patch implements that warning.

PR c++/81159

gcc/c-family/ChangeLog:

* c.opt (Wself-move): New option.

gcc/cp/ChangeLog:

* typeck.cc (maybe_warn_self_move): New.
(cp_build_modify_expr): Call maybe_warn_self_move.

gcc/ChangeLog:

* doc/invoke.texi: Document -Wself-move.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wself-move1.C: New test.
---
  gcc/c-family/c.opt  |   4 +
  gcc/cp/typeck.cc|  61 +++-
  gcc/doc/invoke.texi |  23 -
  gcc/testsuite/g++.dg/warn/Wself-move1.C | 125 
  4 files changed, 211 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/warn/Wself-move1.C

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index dfdebd596ef..f776efd39d8 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1229,6 +1229,10 @@ Wselector
  ObjC ObjC++ Var(warn_selector) Warning
  Warn if a selector has multiple methods.
  
+Wself-move

+C++ ObjC++ Var(warn_self_move) Warning LangEnabledBy(C++ ObjC++, Wall)
+Warn when a value is moved to itself with std::move.
+
  Wsequence-point
  C ObjC C++ ObjC++ Var(warn_sequence_point) Warning LangEnabledBy(C ObjC C++ 
ObjC++,Wall)
  Warn about possible violations of sequence point rules.
diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 992ebfd99fb..194c403bd2b 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -8897,7 +8897,64 @@ cp_build_c_cast (location_t loc, tree type, tree expr,
  
return error_mark_node;

  }
-
+
+/* Warn when a value is moved to itself with std::move.  LHS is the target,
+   RHS may be the std::move call, and LOC is the location of the whole
+   assignment.  */
+
+static void
+maybe_warn_self_move (location_t loc, tree lhs, tree rhs)
+{
+  if (!warn_self_move)
+return;
+
+  /* C++98 doesn't know move.  */
+  if (cxx_dialect < cxx11)
+return;
+
+  if (processing_templa

Re: [PATCH] c++: Quash bogus -Wredundant-move warning

2022-08-23 Thread Jason Merrill via Gcc-patches


On 8/22/22 17:10, Marek Polacek wrote:

This patch fixes a pretty stoopid thinko.  When I added code to warn
about pessimizing std::move in initializations like

   T t{std::move(T())};

I also added code to unwrap the expression from { }.  But when we have

   return {std::move(t)};

we cannot warn about a redundant std::move because the implicit move
wouldn't happen for "return {t};" because the expression isn't just
a name.  However, we still want to warn about

   return {std::move(T())};

so let's not disable the -Wpessimizing-move warning.  Tests added for
both cases.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK.


gcc/cp/ChangeLog:

* typeck.cc (maybe_warn_pessimizing_move): Don't warn about
redundant std::move when the expression was wrapped in { }.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/Wpessimizing-move10.C: New test.
* g++.dg/cpp0x/Wredundant-move12.C: New test.
---
  gcc/cp/typeck.cc  | 13 +--
  .../g++.dg/cpp0x/Wpessimizing-move10.C| 30 
  .../g++.dg/cpp0x/Wredundant-move12.C  | 36 +++
  3 files changed, 77 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/Wpessimizing-move10.C
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/Wredundant-move12.C

diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 992ebfd99fb..7fde65adaa4 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -10397,11 +10397,15 @@ maybe_warn_pessimizing_move (tree expr, tree type, 
bool return_p)
if (!CLASS_TYPE_P (type))
  return;
  
+  bool wrapped_p = false;

/* A a = std::move (A());  */
if (TREE_CODE (expr) == TREE_LIST)
  {
if (list_length (expr) == 1)
-   expr = TREE_VALUE (expr);
+   {
+ expr = TREE_VALUE (expr);
+ wrapped_p = true;
+   }
else
return;
  }
@@ -10410,7 +10414,10 @@ maybe_warn_pessimizing_move (tree expr, tree type, 
bool return_p)
else if (TREE_CODE (expr) == CONSTRUCTOR)
  {
if (CONSTRUCTOR_NELTS (expr) == 1)
-   expr = CONSTRUCTOR_ELT (expr, 0)->value;
+   {
+ expr = CONSTRUCTOR_ELT (expr, 0)->value;
+ wrapped_p = true;
+   }
else
return;
  }
@@ -10458,6 +10465,8 @@ maybe_warn_pessimizing_move (tree expr, tree type, bool 
return_p)
/* Warn if the move is redundant.  It is redundant when we would
 do maybe-rvalue overload resolution even without std::move.  */
else if (warn_redundant_move
+  /* This doesn't apply for return {std::move (t)};.  */
+  && !wrapped_p
   && !warning_suppressed_p (expr, OPT_Wredundant_move)
   && (moved = treat_lvalue_as_rvalue_p (arg, /*return*/true)))
{
diff --git a/gcc/testsuite/g++.dg/cpp0x/Wpessimizing-move10.C 
b/gcc/testsuite/g++.dg/cpp0x/Wpessimizing-move10.C
new file mode 100644
index 000..77314141da3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/Wpessimizing-move10.C
@@ -0,0 +1,30 @@
+// { dg-do compile { target c++11 } }
+// { dg-options "-Wpessimizing-move" }
+
+// Define std::move.
+namespace std {
+  template
+struct remove_reference
+{ typedef _Tp   type; };
+
+  template
+struct remove_reference<_Tp&>
+{ typedef _Tp   type; };
+
+  template
+struct remove_reference<_Tp&&>
+{ typedef _Tp   type; };
+
+  template
+constexpr typename std::remove_reference<_Tp>::type&&
+move(_Tp&& __t) noexcept
+{ return static_cast::type&&>(__t); }
+}
+
+struct S { };
+
+S
+f ()
+{
+   return {std::move(S())}; // { dg-warning "moving a temporary object prevents 
copy elision" }
+}
diff --git a/gcc/testsuite/g++.dg/cpp0x/Wredundant-move12.C 
b/gcc/testsuite/g++.dg/cpp0x/Wredundant-move12.C
new file mode 100644
index 000..192d981ba5e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/Wredundant-move12.C
@@ -0,0 +1,36 @@
+// { dg-do compile { target c++11 } }
+// { dg-options "-Wredundant-move" }
+
+// Define std::move.
+namespace std {
+  template
+struct remove_reference
+{ typedef _Tp   type; };
+
+  template
+struct remove_reference<_Tp&>
+{ typedef _Tp   type; };
+
+  template
+struct remove_reference<_Tp&&>
+{ typedef _Tp   type; };
+
+  template
+constexpr typename std::remove_reference<_Tp>::type&&
+move(_Tp&& __t) noexcept
+{ return static_cast::type&&>(__t); }
+}
+
+struct S1 {
+ S1();
+ S1(S1 const &) = delete;
+ S1(S1 &&);
+ S1 operator =(S1 const &) = delete;
+ S1 operator =(S1 &&);
+};
+
+struct S2 { S2(S1); };
+S2 f() {
+   S1 s;
+   return { std::move(s) }; // { dg-bogus "redundant move" }
+}

base-commit: cc4fa7a210b638d6a46f14dab17f2361389d18e1

Re: [PATCH] preprocessor: Implement C++23 P2437R1 - Support for #warning [PR106646]

2022-08-23 Thread Jason Merrill via Gcc-patches


On 8/22/22 02:53, Jakub Jelinek wrote:

Hi!

On Thu, Aug 18, 2022 at 11:02:44PM +, Joseph Myers wrote:

ISO C2x standardizes the existing #warning extension.  Arrange
accordingly for it not to be diagnosed with -std=c2x -pedantic, but to
be diagnosed with -Wc11-c2x-compat.


And here is the corresponding C++ version.
Don't pedwarn about this for C++23/GNU++23 and tweak the diagnostics
for C++ otherwise, + testsuite coverage.
The diagnostic wording is similar e.g. to the #elifdef diagnostics.

So far lightly tested, ok for trunk if it passes full bootstrap/regtest?


OK.


2022-08-22  Jakub Jelinek  

PR c++/106646
* init.cc: Implement C++23 P2437R1 - Support for #warning.
(lang_defaults): Set warning_directive for GNUCXX23 and CXX23.
* directives.cc (directive_diagnostics): Use different wording of
#warning pedwarn for C++.

* g++.dg/cpp/warning-1.C: New test.
* g++.dg/cpp/warning-2.C: New test.
* g++.dg/cpp/warning-3.C: New test.

--- libcpp/init.cc.jj   2022-08-20 10:25:17.613071845 +0200
+++ libcpp/init.cc  2022-08-22 11:30:57.642622570 +0200
@@ -123,8 +123,8 @@ static const struct lang_flags lang_defa
/* CXX17*/  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,1, 1,
 0,   1,  0,   1, 0,   0,   0,  0,  0 },
/* GNUCXX20 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,1, 1,
 0,   1,  1,   1, 0,   0,   0,  0,  0 },
/* CXX20*/  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,1, 1,
 0,   1,  1,   1, 0,   0,   0,  0,  0 },
-  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,1, 1, 
0,   1,  1,   1, 0,   1,   1,  0,  1 },
-  /* CXX23*/  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,1, 1, 
0,   1,  1,   1, 0,   1,   1,  0,  1 },
+  /* GNUCXX23 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,1, 1, 
0,   1,  1,   1, 0,   1,   1,  1,  1 },
+  /* CXX23*/  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,1, 1, 
0,   1,  1,   1, 0,   1,   1,  1,  1 },
/* ASM  */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,0, 0,
 0,   0,  0,   0, 0,   0,   0,  0,  0 }
  };
  
--- libcpp/directives.cc.jj	2022-08-19 16:00:05.295386974 +0200

+++ libcpp/directives.cc2022-08-22 11:30:03.239357642 +0200
@@ -388,8 +388,14 @@ directive_diagnostics (cpp_reader *pfile
else if (dir == &dtable[T_WARNING])
{
  if (CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, warning_directive))
-   cpp_error (pfile, CPP_DL_PEDWARN,
-  "#%s before C2X is a GCC extension", dir->name);
+   {
+ if (CPP_OPTION (pfile, cplusplus))
+   cpp_error (pfile, CPP_DL_PEDWARN,
+  "#%s before C++23 is a GCC extension", dir->name);
+ else
+   cpp_error (pfile, CPP_DL_PEDWARN,
+  "#%s before C2X is a GCC extension", dir->name);
+   }
  else if (CPP_OPTION (pfile, cpp_warn_c11_c2x_compat) > 0)
cpp_warning (pfile, CPP_W_C11_C2X_COMPAT,
 "#%s before C2X is a GCC extension", dir->name);
--- gcc/testsuite/g++.dg/cpp/warning-1.C.jj 2022-08-22 11:39:41.284547323 
+0200
+++ gcc/testsuite/g++.dg/cpp/warning-1.C2022-08-22 11:44:03.925988332 
+0200
@@ -0,0 +1,6 @@
+// P2437R1 - Support for #warning
+// { dg-do preprocess }
+// { dg-options "-pedantic-errors" }
+
+#warning example text /* { dg-warning "example text" } */
+// { dg-error "#warning before C\\\+\\\+23 is a GCC extension" "pedantic" { 
target c++20_down } .-1 }
--- gcc/testsuite/g++.dg/cpp/warning-2.C.jj 2022-08-22 11:39:44.116509055 
+0200
+++ gcc/testsuite/g++.dg/cpp/warning-2.C2022-08-22 11:44:14.933839041 
+0200
@@ -0,0 +1,6 @@
+// P2437R1 - Support for #warning
+// { dg-do preprocess }
+// { dg-options "-pedantic" }
+
+#warning example text /* { dg-warning "example text" } */
+// { dg-warning "#warning before C\\\+\\\+23 is a GCC extension" "pedantic" { 
target c++20_down } .-1 }
--- gcc/testsuite/g++.dg/cpp/warning-3.C.jj 2022-08-22 11:39:47.020469826 
+0200
+++ gcc/testsuite/g++.dg/cpp/warning-3.C2022-08-22 11:42:23.640348405 
+0200
@@ -0,0 +1,6 @@
+// P2437R1 - Support for #warning
+// { dg-do preprocess }
+// { dg-options "" }
+
+#warning example text /* { dg-warning "example text" } */
+// { dg-bogus "#warning before C\\\+\\\+23 is a GCC extension" "" { target 
*-*-* } .-1 }


Jakub

Re: [PATCH V4] rs6000: Optimize cmp on rotated 16bits constant

2022-08-23 Thread Segher Boessenkool

Hi!

On Mon, Jul 25, 2022 at 09:29:22PM +0800, Jiufu Guo wrote:
> When checking eq/neq with a constant which has only 16bits, it can be
> optimized to check the rotated data.  By this, the constant building
> is optimized.

"ne", not "neq".

> gcc/ChangeLog:
> 
>   * config/rs6000/rs6000-protos.h (rotate_from_leading_zeros_const):
>   New decl.

"New declaration." or just "New".  Also, don't break lines early please,
especially if that means ending a line in a colon, which then looks as
if you forgot to write something there.

>   * config/rs6000/rs6000.cc (rotate_from_leading_zeros_const): New
>   define for checking simply rotated constant.

"New." or "New definition." or such.

> +/* Check if C can be rotated from an immediate which contains leading
> +   zeros at least CLZ.

"Which starts (as 64 bit integer) with at least CLZ bits zero" or such.

> +  /* xx0..0xx: rotate enough bits firstly, then check case a.  */
> +  const int rot_bits = HOST_BITS_PER_WIDE_INT - clz + 1;
> +  unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
> +  tz = ctz_hwi (rc);
> +  if (clz_hwi (rc) + tz >= clz)
> +return tz + rot_bits;

This could use some more explanation.

> +(define_code_iterator eqne [eq ne])

> +;; "i == C" ==> "rotl(i,N) == rotl(C,N)"
> +(define_insn_and_split "*rotate_on_cmpdi"
> +  [(set (pc)
> + (if_then_else (eqne (match_operand:DI 1 "gpc_reg_operand" "r")

Wrong indentation.  The ( should be in the same column as the preceding (
it matches.

Setting "pc" to either 0 or 1 is never correct.

> +  "TARGET_POWERPC64 && !reload_completed && can_create_pseudo_p ()

reload_completed in splitters is almost always wrong.  It isn't any
better if it is in the insn condition of a define_insn_and_split :-)

> +   && num_insns_constant (operands[2], DImode) > 1
> +   && (rotate_from_leading_zeros_const (~UINTVAL (operands[2]), 49) > 0
> +   || rotate_from_leading_zeros_const (UINTVAL (operands[2]), 48) > 0)"

There must be a better way to describe this.

> +  if (rot < 0)
> +  {
> + sgn = true;
> + rot = rotate_from_leading_zeros_const (~C, 49);
> +  }

Bad indentation.

> +  rtx cmp = ne ? gen_rtx_NE (CCmode, cc, const0_rtx)
> +: gen_rtx_EQ (CCmode, cc, const0_rtx);

  rtx cmp = gen_rtx_ (...);

(and define a code_attr EQNE to just output the uppercase EQ or NE).

Why is this doing a conditional branch at all?  Unpredictable
conditional branches are extremely costly.

> +/* { dg-require-effective-target lp64 } */

arch_ppc64

> +/* { dg-final { scan-assembler-times "cmpldi" 10  } } */
> +/* { dg-final { scan-assembler-times "cmpdi" 4  } } */
> +/* { dg-final { scan-assembler-times "rotldi" 14  } } */

Please use \m and \M .

Thanks,

Segher

Re: Ping^2: 2 libcpp patches

2022-08-23 Thread Lewis Hyatt via Gcc-patches

On Tue, Aug 16, 2022 at 9:50 AM Lewis Hyatt  wrote:
>
> On Wed, Jul 20, 2022 at 8:56 PM Lewis Hyatt  wrote:
> >
> > Hello-
> >
> > May I please ping these two preprocessor patches?
> >
> > For PR103902:
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-June/596704.html
> >
> > For PR55971:
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-June/596820.html
> >
> > Thanks!
>
> Hello-
>
> I would very much appreciate feedback on these two patches please?
>
> For the first patch, I think it is a worthwhile goal to fix all the
> places where libcpp fails to support UTF-8 correctly, and this is one
> of two remaining ones that I'm aware of. I can fix the other case
> (handling of #pragma push_macro) once this one is in place.
>
> The second patch is about libcpp not allowing raw strings containing
> newlines in preprocessor directives, which is a nearly decade-old
> glitch that I think is also worth addressing.
>
> Thanks!
>
> -Lewis

Hello-

I  CCed Joseph on this ping last week, but he suggested asking other
maintainers to take a look, so I hope it's OK I am doing that now?
Jakub, I thought it could make sense to ask you please, since I saw
you working on lex.cc recently as well, and also since you commented
on PR55971 nine years ago :). With an uptick in development of libcpp
to support new C++23 features, I think the chances that my patches
will eventually conflict with those are increasing, so it would be
great to get them reviewed. As of current master branch, they still
apply and regtest fine. Thanks very much.

-Lewis

tree.cc: Fix optimization of DFP default initialization

2022-08-23 Thread Joseph Myers

When an object of decimal floating-point type is default-initialized,
GCC is inconsistent about whether it is given the all-zero-bits
representation (zero with the least quantum exponent) or whether it
acts like a conversion of integer 0 to the DFP type (zero with quantum
exponent 0).  In particular, the representation stored in memory can
have all zero bits, but optimization of access to the same object
based on its known constant value can then produce zero with quantum
exponent 0 instead.

C2x leaves the quantum exponent for default initialization
implementation-defined, but that doesn't allow such inconsistency in
the interpretation of a single object.  All zero bits seems most
appropriate; change build_real to special-case dconst0 the same way
other constants are special-cased and ensure that the correct zero for
the type is generated.

Bootstrapped with no regressions for x86_64-pc-linux-gnu.  OK to commit?

gcc/
* tree.cc (build_real): Give DFP dconst0 the minimum quantum
exponent for the type.

gcc/testsuite/
* gcc.dg/torture/dfp-default-init-1.c,
gcc.dg/torture/dfp-default-init-2.c,
gcc.dg/torture/dfp-default-init-3.c: New tests.

diff --git a/gcc/testsuite/gcc.dg/torture/dfp-default-init-1.c 
b/gcc/testsuite/gcc.dg/torture/dfp-default-init-1.c
new file mode 100644
index 000..f893ddb52b9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/dfp-default-init-1.c
@@ -0,0 +1,113 @@
+/* Test that default-initialized DFP values consistently have the least quantum
+   exponent.  */
+/* { dg-do run } */
+/* { dg-require-effective-target dfp } */
+
+extern void exit (int);
+extern void abort (void);
+void *memset (void *, int, __SIZE_TYPE__);
+int memcmp (const void *, const void *, __SIZE_TYPE__);
+
+#ifndef TYPE
+#define TYPE _Decimal32
+#endif
+
+#ifndef ZEROFP
+#define ZEROFP 0e-101DF
+#endif
+
+TYPE zero_int = 0;
+TYPE zero_fp = ZEROFP;
+TYPE default_init;
+TYPE zero_bytes;
+TYPE x;
+
+struct s { TYPE a, b; };
+struct s s_default_init;
+struct s s_empty_init = {};
+struct s s_first_int = { 0 };
+struct s s_both_int = { 0, 0 };
+struct s sx;
+
+const TYPE a_default_init[10];
+const TYPE a_empty_init[10] = {};
+const TYPE a_first_int[10] = { 0 };
+const TYPE a_two_int[10] = { 0, 0 };
+
+#define CHECK_ZERO_BYTES(expr) \
+  do   \
+{  \
+  if (memcmp (expr, &zero_bytes, sizeof zero_bytes) != 0)  \
+   abort ();   \
+  TYPE tmp = *expr;\
+  if (memcmp (&tmp, &zero_bytes, sizeof zero_bytes) != 0)  \
+   abort ();   \
+}  \
+  while (0)
+
+#define CHECK_INT_BYTES(expr)  \
+  do   \
+{  \
+  if (memcmp (expr, &zero_int, sizeof zero_int) != 0)  \
+   abort ();   \
+  TYPE tmp = *expr;\
+  if (memcmp (&tmp, &zero_int, sizeof zero_int) != 0)  \
+   abort ();   \
+}  \
+  while (0)
+
+int
+main (void)
+{
+  memset (&zero_bytes, 0, sizeof zero_bytes);
+  if (memcmp (&zero_bytes, &zero_int, sizeof zero_int) == 0)
+abort ();
+  CHECK_ZERO_BYTES (&zero_fp);
+  CHECK_ZERO_BYTES (&default_init);
+  CHECK_ZERO_BYTES (&s_default_init.a);
+  CHECK_ZERO_BYTES (&s_default_init.b);
+  CHECK_ZERO_BYTES (&s_empty_init.a);
+  CHECK_ZERO_BYTES (&s_empty_init.b);
+  CHECK_INT_BYTES (&s_first_int.a);
+  CHECK_ZERO_BYTES (&s_first_int.b);
+  CHECK_INT_BYTES (&s_both_int.a);
+  CHECK_INT_BYTES (&s_both_int.b);
+  CHECK_ZERO_BYTES (&a_default_init[0]);
+  CHECK_ZERO_BYTES (&a_default_init[1]);
+  CHECK_ZERO_BYTES (&a_default_init[2]);
+  CHECK_ZERO_BYTES (&a_default_init[9]);
+  CHECK_ZERO_BYTES (&a_empty_init[0]);
+  CHECK_ZERO_BYTES (&a_empty_init[1]);
+  CHECK_ZERO_BYTES (&a_empty_init[2]);
+  CHECK_ZERO_BYTES (&a_empty_init[9]);
+  CHECK_INT_BYTES (&a_first_int[0]);
+  CHECK_ZERO_BYTES (&a_first_int[1]);
+  CHECK_ZERO_BYTES (&a_first_int[2]);
+  CHECK_ZERO_BYTES (&a_first_int[9]);
+  CHECK_INT_BYTES (&a_two_int[0]);
+  CHECK_INT_BYTES (&a_two_int[1]);
+  CHECK_ZERO_BYTES (&a_two_int[2]);
+  CHECK_ZERO_BYTES (&a_two_int[9]);
+  struct s s2 = {};
+  CHECK_ZERO_BYTES (&s2.a);
+  CHECK_ZERO_BYTES (&s2.b);
+  struct s s3 = { 0 };
+  CHECK_INT_BYTES (&s3.a);
+  CHECK_ZERO_BYTES (&s3.b);
+  struct s s4 = { 0, 0 };
+  CHECK_INT_BYTES (&s4.a);
+  CHECK_INT_BYTES (&s4.b);
+  struct s s5 = { 0 };
+  sx = s5;
+  CHECK_INT_BYTES (&sx.a);
+  CHECK_ZERO_BYTES (&sx.b);
+  x =

Re: Re: [PATCH] RISC-V: Add runtime invariant support

2022-08-23 Thread juzhe.zh...@rivai.ai

I tried #define LOGICAL_OP_NON_SHORT_CIRCUIT 1 in RISC-V port. The warning is 
still there.
Are you considering this 
patch:https://gcc.gnu.org/pipermail/gcc-patches/2022-August/600120.html 
to solve this issue ? Or you are trying another solution to fix this ?



juzhe.zh...@rivai.ai
 
From: Richard Biener
Date: 2022-08-23 17:34
To: Richard Biener
CC: Andrew Pinski; GCC Patches; Andrew Waterman; Andreas Schwab; Kito Cheng; 
juzhe.zhong
Subject: Re: [PATCH] RISC-V: Add runtime invariant support
On Mon, Aug 22, 2022 at 8:15 AM Richard Biener via Gcc-patches
 wrote:
>
> On Sat, 20 Aug 2022, Andrew Pinski wrote:
>
> > On Sat, Aug 20, 2022 at 3:34 PM Andreas Schwab  
> > wrote:
> > >
> > > This breaks bootstrap:
> > >
> > > ../../gcc/tree-vect-loop-manip.cc: In function 'void 
> > > vect_gen_vector_loop_niters(loop_vec_info, tree, tree_node**, 
> > > tree_node**, bool)':
> > > ../../gcc/tree-vect-loop-manip.cc:1981:26: error: 'const_vf' may be used 
> > > uninitialized [-Werror=maybe-uninitialized]
> > >  1981 |   unsigned HOST_WIDE_INT const_vf;
> > >   |  ^~~~
> > > cc1plus: all warnings being treated as errors
> > > make[3]: *** [Makefile:1146: tree-vect-loop-manip.o] Error 1
> > > make[2]: *** [Makefile:4977: all-stage2-gcc] Error 2
> > > make[1]: *** [Makefile:30363: stage2-bubble] Error 2
> > > make: *** [Makefile:1065: all] Error 2
> >
> >
> > This looks like a real uninitialized variable issue.
> > I even can't tell if the paths that lead to using const_vf will be
> > always set so how we expect GCC to do the same.
> > The code that uses const_vf was added with r11-5820-cdcbef3c3310,
> > CCing the author there.
>
> The key is
>
>   tree log_vf = NULL_TREE;
> ...
>   unsigned HOST_WIDE_INT const_vf;
>   if (vf.is_constant (&const_vf)
>   && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
> {
> ...
>   log_vf = build_int_cst (type, exact_log2 (const_vf));
> ...
> }
> ...
>   if (stmts != NULL && log_vf)
> {
> ... use const_vf ...
>
> so it's uninit analysis little mind that is confused.  There is code
> that's supposed to handle the situation (setting flag under condition,
> testing that flag instead of condition) but maybe it's too twisted
> here.  One could refector this as
>
>  bool const_vf_p = vf.is_constant (&const_vf);
>  if (const_vf_p
>  && ...)
> ...
>if (stmts != NULL && const_vf_p)
> ...
>
> and hope uninits mind is good enough to see log_vf is not used
> uninitialized.
>
> I can also look into why uninit doesn't get it, but preprocessed
> source would be handy then.
 
Btw, the riscv speciality is
 
gcc/config/riscv/riscv.h:#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
 
with --param logical-op-non-short-circuit=1 the diagnostic does not occur.
 
>
> Richard.

[PATCH] Don't gimple fold ymm-version vblendvpd/vblendvps/vpblendvb w/o TARGET_AVX2

2022-08-23 Thread liuhongt via Gcc-patches

Since 256-bit vector integer comparison is under TARGET_AVX2,
and gimple folding for vblendvpd/vblendvps/vpblendvb relies on that.
Restrict gimple fold condition to TARGET_AVX2.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

PR target/106704
* config/i386/i386-builtin.def (BDESC): Add
CODE_FOR_avx_blendvpd256/CODE_FOR_avx_blendvps256 to
corresponding builtins.
* config/i386/i386.cc (ix86_gimple_fold_builtin):
Don't fold IX86_BUILTIN_PBLENDVB256, IX86_BUILTIN_BLENDVPS256,
IX86_BUILTIN_BLENDVPD256 w/o TARGET_AVX2.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr106704.c: New test.
---
 gcc/config/i386/i386-builtin.def |  4 ++--
 gcc/config/i386/i386.cc  | 12 +---
 gcc/testsuite/gcc.target/i386/pr106704.c | 16 
 3 files changed, 27 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106704.c

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index acb7e8ca64b..f9c7abde2cf 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1036,8 +1036,8 @@ BDESC (OPTION_MASK_ISA_AVX, 0, 
CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpe
 
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendpd256, 
"__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) 
V4DF_FTYPE_V4DF_V4DF_INT)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendps256, 
"__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) 
V8SF_FTYPE_V8SF_V8SF_INT)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin_ia32_blendvpd256", 
IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin_ia32_blendvps256", 
IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendvpd256, 
"__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) 
V4DF_FTYPE_V4DF_V4DF_V4DF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendvps256, 
"__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) 
V8SF_FTYPE_V8SF_V8SF_V8SF)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", 
IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufpd256, 
"__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) 
V4DF_FTYPE_V4DF_V4DF_INT)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufps256, 
"__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) 
V8SF_FTYPE_V8SF_V8SF_INT)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index e27c87f8c83..c4d0e36e9c0 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -18452,6 +18452,15 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
}
   break;
 
+case IX86_BUILTIN_PBLENDVB256:
+case IX86_BUILTIN_BLENDVPS256:
+case IX86_BUILTIN_BLENDVPD256:
+  /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
+to scalar operations and not combined back.  */
+  if (!TARGET_AVX2)
+   break;
+
+  /* FALLTHRU.  */
 case IX86_BUILTIN_BLENDVPD:
   /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
 w/o sse4.2, it's veclowered to scalar operations and
@@ -18460,10 +18469,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
break;
   /* FALLTHRU.  */
 case IX86_BUILTIN_PBLENDVB128:
-case IX86_BUILTIN_PBLENDVB256:
 case IX86_BUILTIN_BLENDVPS:
-case IX86_BUILTIN_BLENDVPS256:
-case IX86_BUILTIN_BLENDVPD256:
   gcc_assert (n_args == 3);
   arg0 = gimple_call_arg (stmt, 0);
   arg1 = gimple_call_arg (stmt, 1);
diff --git a/gcc/testsuite/gcc.target/i386/pr106704.c 
b/gcc/testsuite/gcc.target/i386/pr106704.c
new file mode 100644
index 000..44e052a4caa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106704.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx -O2 -mno-avx2" } */
+/* { dg-final { scan-assembler-times {vblendvps[ \t]+%ymm[0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {vblendvpd[ \t]+%ymm[0-9]+} 1 } } */
+
+#include 
+
+__m256 bend_stuff( __m256 a, __m256 b, __m256 mask)
+{
+  return _mm256_blendv_ps(a, b, mask);
+}
+
+__m256d bend_stuff1( __m256d a, __m256d b, __m256d mask)
+{
+  return _mm256_blendv_pd(a, b, mask);
+}
-- 
2.18.1

Ping: [PATCH v4] rs6000: Fix incorrect RTL for Power LE when removing the UNSPECS [PR106069]

2022-08-23 Thread Xionghu Luo via Gcc-patches

Hi Segher, I'd like to resend and ping for this patch. Thanks.
From 23bffdacdf0eb1140c7a3571e6158797f4818d57 Mon Sep 17 00:00:00 2001
From: Xionghu Luo 
Date: Thu, 4 Aug 2022 03:44:58 +
Subject: [PATCH v4] rs6000: Fix incorrect RTL for Power LE when removing the
 UNSPECS [PR106069]

v4: Update per comments.
v3: rename altivec_vmrghb_direct_le to altivec_vmrglb_direct_le to match
the actual output ASM vmrglb. Likewise for all similar xxx_direct_le
patterns.
v2: Split the direct pattern to be and le with same RTL but different insn.

The native RTL expression for vec_mrghw should be same for BE and LE as
they are register and endian-independent.  So both BE and LE need
generate exactly same RTL with index [0 4 1 5] when expanding vec_mrghw
with vec_select and vec_concat.

(set (reg:V4SI 141) (vec_select:V4SI (vec_concat:V8SI
   (subreg:V4SI (reg:V16QI 139) 0)
   (subreg:V4SI (reg:V16QI 140) 0))
   [const_int 0 4 1 5]))

Then combine pass could do the nested vec_select optimization
in simplify-rtx.c:simplify_binary_operation_1 also on both BE and LE:

21: r150:V4SI=vec_select(vec_concat(r141:V4SI,r146:V4SI),parallel [0 4 1 5])
24: {r151:SI=vec_select(r150:V4SI,parallel [const_int 3]);}

=>

21: r150:V4SI=vec_select(vec_concat(r141:V4SI,r146:V4SI),parallel)
24: {r151:SI=vec_select(r146:V4SI,parallel [const_int 1]);}

The endianness check need only once at ASM generation finally.
ASM would be better due to nested vec_select simplified to simple scalar
load.

Regression tested pass for Power8{LE,BE}{32,64} and Power{9,10}LE{32,64}
Linux.

gcc/ChangeLog:

PR target/106069
* config/rs6000/altivec.md (altivec_vmrghb_direct): Remove.
(altivec_vmrghb_direct_be): New pattern for BE.
(altivec_vmrghb_direct_le): New pattern for LE.
(altivec_vmrghh_direct): Remove.
(altivec_vmrghh_direct_be): New pattern for BE.
(altivec_vmrghh_direct_le): New pattern for LE.
(altivec_vmrghw_direct_): Remove.
(altivec_vmrghw_direct__be): New pattern for BE.
(altivec_vmrghw_direct__le): New pattern for LE.
(altivec_vmrglb_direct): Remove.
(altivec_vmrglb_direct_be): New pattern for BE.
(altivec_vmrglb_direct_le): New pattern for LE.
(altivec_vmrglh_direct): Remove.
(altivec_vmrglh_direct_be): New pattern for BE.
(altivec_vmrglh_direct_le): New pattern for LE.
(altivec_vmrglw_direct_): Remove.
(altivec_vmrglw_direct__be): New pattern for BE.
(altivec_vmrglw_direct__le): New pattern for LE.
* config/rs6000/rs6000.cc (altivec_expand_vec_perm_const):
Adjust.
* config/rs6000/vsx.md: Likewise.

gcc/testsuite/ChangeLog:

PR target/106069
* g++.target/powerpc/pr106069.C: New test.

Signed-off-by: Xionghu Luo 
---
 gcc/config/rs6000/altivec.md| 222 ++--
 gcc/config/rs6000/rs6000.cc |  24 +--
 gcc/config/rs6000/vsx.md|  28 +--
 gcc/testsuite/g++.target/powerpc/pr106069.C | 118 +++
 4 files changed, 307 insertions(+), 85 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/pr106069.C

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 2c4940f2e21..c6a381908cb 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1144,15 +1144,16 @@ (define_expand "altivec_vmrghb"
(use (match_operand:V16QI 2 "register_operand"))]
   "TARGET_ALTIVEC"
 {
-  rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghb_direct
-   : gen_altivec_vmrglb_direct;
-  if (!BYTES_BIG_ENDIAN)
-std::swap (operands[1], operands[2]);
-  emit_insn (fun (operands[0], operands[1], operands[2]));
+  if (BYTES_BIG_ENDIAN)
+emit_insn (
+  gen_altivec_vmrghb_direct_be (operands[0], operands[1], operands[2]));
+  else
+emit_insn (
+  gen_altivec_vmrglb_direct_le (operands[0], operands[2], operands[1]));
   DONE;
 })

-(define_insn "altivec_vmrghb_direct"
+(define_insn "altivec_vmrghb_direct_be"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
(vec_select:V16QI
  (vec_concat:V32QI
@@ -1166,7 +1167,25 @@ (define_insn "altivec_vmrghb_direct"
 (const_int 5) (const_int 21)
 (const_int 6) (const_int 22)
 (const_int 7) (const_int 23)])))]
-  "TARGET_ALTIVEC"
+  "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
+  "vmrghb %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghb_direct_le"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+   (vec_select:V16QI
+ (vec_concat:V32QI
+   (match_operand:V16QI 2 "register_operand" "v")
+   (match_operand:V16QI 1 "register_operand" "v"))
+ (parallel [(const_int  8) (const_int 24)
+(const_int  9) (const_int 25)
+(const_int 10) (cons

Re: [PATCH] MIPS: improve -march=native arch detection

2022-08-23 Thread YunQiang Su

YunQiang Su  于2022年8月2日周二 19:11写道：
>
> If we cannot get info from options and cpuinfo, we try to get from:
>   1. getauxval(AT_BASE_PLATFORM), introduced since Linux 5.7
>   2. _MIPS_ARCH from host compiler.
>
> This can fix the wrong loader usage on r5/r6 platform with
>  -march=native.
>

Is it treat as minor fixes?

> gcc/ChangeLog:
> * config/mips/driver-native.cc (host_detect_local_cpu):
>   try getauxval(AT_BASE_PLATFORM) and _MIPS_ARCH, too.
> ---
>  gcc/config/mips/driver-native.cc | 22 +++---
>  1 file changed, 19 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/config/mips/driver-native.cc 
> b/gcc/config/mips/driver-native.cc
> index 47627f85ce1..9aa7044c0b8 100644
> --- a/gcc/config/mips/driver-native.cc
> +++ b/gcc/config/mips/driver-native.cc
> @@ -19,6 +19,7 @@ along with GCC; see the file COPYING3.  If not see
>
>  #define IN_TARGET_CODE 1
>
> +#include 
>  #include "config.h"
>  #include "system.h"
>  #include "coretypes.h"
> @@ -46,15 +47,15 @@ host_detect_local_cpu (int argc, const char **argv)
>bool arch;
>
>if (argc < 1)
> -return NULL;
> +goto fallback_cpu;
>
>arch = strcmp (argv[0], "arch") == 0;
>if (!arch && strcmp (argv[0], "tune"))
> -return NULL;
> +goto fallback_cpu;
>
>f = fopen ("/proc/cpuinfo", "r");
>if (f == NULL)
> -return NULL;
> +goto fallback_cpu;
>
>while (fgets (buf, sizeof (buf), f) != NULL)
>  if (startswith (buf, "cpu model"))
> @@ -84,8 +85,23 @@ host_detect_local_cpu (int argc, const char **argv)
>
>fclose (f);
>
> +fallback_cpu:
> +/*FIXME: how about other OSes, like FreeBSD? */
> +#ifdef __linux__
> +  /*Note: getauxval may return NULL as:
> +   * AT_BASE_PLATFORM is supported since Linux 5.7
> +   * Or from older version of qemu-user
> +   * */
> +  if (cpu == NULL)
> +cpu = (const char *) getauxval (AT_BASE_PLATFORM);
> +#endif
> +
>if (cpu == NULL)
> +#if defined (_MIPS_ARCH)
> +cpu = _MIPS_ARCH;
> +#else
>  return NULL;
> +#endif
>
>return concat ("-m", argv[0], "=", cpu, NULL);
>  }
> --
> 2.30.2
>

Re: tree.cc: Fix optimization of DFP default initialization

2022-08-23 Thread Richard Biener via Gcc-patches




> Am 24.08.2022 um 01:54 schrieb Joseph Myers :
> 
> When an object of decimal floating-point type is default-initialized,
> GCC is inconsistent about whether it is given the all-zero-bits
> representation (zero with the least quantum exponent) or whether it
> acts like a conversion of integer 0 to the DFP type (zero with quantum
> exponent 0).  In particular, the representation stored in memory can
> have all zero bits, but optimization of access to the same object
> based on its known constant value can then produce zero with quantum
> exponent 0 instead.
> 
> C2x leaves the quantum exponent for default initialization
> implementation-defined, but that doesn't allow such inconsistency in
> the interpretation of a single object.  All zero bits seems most
> appropriate; change build_real to special-case dconst0 the same way
> other constants are special-cased and ensure that the correct zero for
> the type is generated.
> 
> Bootstrapped with no regressions for x86_64-pc-linux-gnu.  OK to commit?

Ok

Thanks,
Richard 

> gcc/
>* tree.cc (build_real): Give DFP dconst0 the minimum quantum
>exponent for the type.
> 
> gcc/testsuite/
>* gcc.dg/torture/dfp-default-init-1.c,
>gcc.dg/torture/dfp-default-init-2.c,
>gcc.dg/torture/dfp-default-init-3.c: New tests.
> 
> diff --git a/gcc/testsuite/gcc.dg/torture/dfp-default-init-1.c 
> b/gcc/testsuite/gcc.dg/torture/dfp-default-init-1.c
> new file mode 100644
> index 000..f893ddb52b9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/dfp-default-init-1.c
> @@ -0,0 +1,113 @@
> +/* Test that default-initialized DFP values consistently have the least 
> quantum
> +   exponent.  */
> +/* { dg-do run } */
> +/* { dg-require-effective-target dfp } */
> +
> +extern void exit (int);
> +extern void abort (void);
> +void *memset (void *, int, __SIZE_TYPE__);
> +int memcmp (const void *, const void *, __SIZE_TYPE__);
> +
> +#ifndef TYPE
> +#define TYPE _Decimal32
> +#endif
> +
> +#ifndef ZEROFP
> +#define ZEROFP 0e-101DF
> +#endif
> +
> +TYPE zero_int = 0;
> +TYPE zero_fp = ZEROFP;
> +TYPE default_init;
> +TYPE zero_bytes;
> +TYPE x;
> +
> +struct s { TYPE a, b; };
> +struct s s_default_init;
> +struct s s_empty_init = {};
> +struct s s_first_int = { 0 };
> +struct s s_both_int = { 0, 0 };
> +struct s sx;
> +
> +const TYPE a_default_init[10];
> +const TYPE a_empty_init[10] = {};
> +const TYPE a_first_int[10] = { 0 };
> +const TYPE a_two_int[10] = { 0, 0 };
> +
> +#define CHECK_ZERO_BYTES(expr)\
> +  do\
> +{\
> +  if (memcmp (expr, &zero_bytes, sizeof zero_bytes) != 0)\
> +abort ();\
> +  TYPE tmp = *expr;\
> +  if (memcmp (&tmp, &zero_bytes, sizeof zero_bytes) != 0)\
> +abort ();\
> +}\
> +  while (0)
> +
> +#define CHECK_INT_BYTES(expr)\
> +  do\
> +{\
> +  if (memcmp (expr, &zero_int, sizeof zero_int) != 0)\
> +abort ();\
> +  TYPE tmp = *expr;\
> +  if (memcmp (&tmp, &zero_int, sizeof zero_int) != 0)\
> +abort ();\
> +}\
> +  while (0)
> +
> +int
> +main (void)
> +{
> +  memset (&zero_bytes, 0, sizeof zero_bytes);
> +  if (memcmp (&zero_bytes, &zero_int, sizeof zero_int) == 0)
> +abort ();
> +  CHECK_ZERO_BYTES (&zero_fp);
> +  CHECK_ZERO_BYTES (&default_init);
> +  CHECK_ZERO_BYTES (&s_default_init.a);
> +  CHECK_ZERO_BYTES (&s_default_init.b);
> +  CHECK_ZERO_BYTES (&s_empty_init.a);
> +  CHECK_ZERO_BYTES (&s_empty_init.b);
> +  CHECK_INT_BYTES (&s_first_int.a);
> +  CHECK_ZERO_BYTES (&s_first_int.b);
> +  CHECK_INT_BYTES (&s_both_int.a);
> +  CHECK_INT_BYTES (&s_both_int.b);
> +  CHECK_ZERO_BYTES (&a_default_init[0]);
> +  CHECK_ZERO_BYTES (&a_default_init[1]);
> +  CHECK_ZERO_BYTES (&a_default_init[2]);
> +  CHECK_ZERO_BYTES (&a_default_init[9]);
> +  CHECK_ZERO_BYTES (&a_empty_init[0]);
> +  CHECK_ZERO_BYTES (&a_empty_init[1]);
> +  CHECK_ZERO_BYTES (&a_empty_init[2]);
> +  CHECK_ZERO_BYTES (&a_empty_init[9]);
> +  CHECK_INT_BYTES (&a_first_int[0]);
> +  CHECK_ZERO_BYTES (&a_first_int[1]);
> +  CHECK_ZERO_BYTES (&a_first_int[2]);
> +  CHECK_ZERO_BYTES (&a_first_int[9]);
> +  CHECK_INT_BYTES (&a_two_int[0]);
> +  CHECK_INT_BYTES (&a_two_int[1]);
> +  CHECK_ZERO_BYTES (&a_two_int[2]);
> +  CHECK_ZERO_BYTES (&a_two_int[9]);
> +  struct s s2 = {};
> +  CHECK_ZERO_BYTES (&s2.a);
> +  CHECK_ZERO_BYTES (&s2.b);
> +  struct s s3 = { 0 };
> +  CHECK_INT_BYTES (&s3.a);
> +  CHECK_ZERO_BYTES (&s3.b);
> +  struct s s4 = { 0, 0 };
> +  CHECK_INT_BYTES (&s4.a);
> +  CHECK_INT_BYTES (&s4.b);
> +  struct s s5 = { 0 };
> +  sx = s5;
> +  CHECK_INT_BYTES (&sx.a);
> +  CHECK_ZERO_BYTES (&sx.b);
> +  x = default_

Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-08-23 Thread HAO CHEN GUI via Gcc-patches

Hi Segher,

On 23/8/2022 下午 10:26, Segher Boessenkool wrote:
> Hi!
> 
> On Fri, Aug 19, 2022 at 10:35:54AM +0800, HAO CHEN GUI wrote:
>> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
>> @@ -1,7 +1,8 @@
>>  /* { dg-do compile { target { powerpc*-*-* } } } */
>> -/* { dg-require-effective-target lp64 } */
>> -/* { dg-require-effective-target powerpc_p9vector_ok } */
>>  /* { dg-options "-mdejagnu-cpu=power9" } */
>> +/* { dg-additional-options "-mpowerpc64" { target { powerpc*-*-linux* && 
>> ilp32 } } } */
> 
> You can add this always.  It is default on 64-bit systems, but it is
> simpler to just always add it:
> /* { dg-additional-options "-mpowerpc64" } */
> 
> Or are there subtargets that will error on this?
Yes, AIX fails if TARGET_POWERPC64 is set and TARGET_64BIT is not set.
So I add "-mpowerpc64" for Linux 32-bit environment.

  if (TARGET_POWERPC64 && ! TARGET_64BIT)   \
{   \
  error ("%<-maix64%> required: 64-bit computation with 32-bit addressing 
not yet supported"); \
}

Thanks a lot
Gui Haochen

Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-08-23 Thread Kewen.Lin via Gcc-patches

on 2022/8/24 13:11, HAO CHEN GUI wrote:
> Hi Segher,
> 
> On 23/8/2022 下午 10:26, Segher Boessenkool wrote:
>> Hi!
>>
>> On Fri, Aug 19, 2022 at 10:35:54AM +0800, HAO CHEN GUI wrote:
>>> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
>>> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
>>> @@ -1,7 +1,8 @@
>>>  /* { dg-do compile { target { powerpc*-*-* } } } */
>>> -/* { dg-require-effective-target lp64 } */
>>> -/* { dg-require-effective-target powerpc_p9vector_ok } */
>>>  /* { dg-options "-mdejagnu-cpu=power9" } */
>>> +/* { dg-additional-options "-mpowerpc64" { target { powerpc*-*-linux* && 
>>> ilp32 } } } */
>>
>> You can add this always.  It is default on 64-bit systems, but it is
>> simpler to just always add it:
>> /* { dg-additional-options "-mpowerpc64" } */
>>
>> Or are there subtargets that will error on this?
> Yes, AIX fails if TARGET_POWERPC64 is set and TARGET_64BIT is not set.
> So I add "-mpowerpc64" for Linux 32-bit environment.
> 
>   if (TARGET_POWERPC64 && ! TARGET_64BIT)   \
> {   \
>   error ("%<-maix64%> required: 64-bit computation with 32-bit addressing 
> not yet supported"); \
> }
> 

Could you try to test with dg-options "-mdejagnu-cpu=power9 -mpowerpc64" all 
the time, but still
having that has_arch_ppc64 effective target on aix?

I'd expect has_arch_ppc64 check to fail on aix 32bit, the error will not be a 
problem (turning
into an UNSUPPORTED then)?

BR,
Kewen

Re: [PATCH] MIPS: improve -march=native arch detection

2022-08-23 Thread Xi Ruoyao via Gcc-patches

On Tue, 2022-08-02 at 11:10 +, YunQiang Su wrote:

> If we cannot get info from options and cpuinfo, we try to get from:
>   1. getauxval(AT_BASE_PLATFORM), introduced since Linux 5.7
>   2. _MIPS_ARCH from host compiler.
> 
> This can fix the wrong loader usage on r5/r6 platform with
>  -march=native.

/* snip */

>    if (argc < 1)
> -    return NULL;
> +    goto fallback_cpu;

I don't think this should be changed, if argc < 1 it means the spec
(disambiguation: the thing printed by "gcc -dumpspecs", hard coded in
gnu-user.h) is wrong.  It cannot happen with the built-in spec, but if a
user specifies a bad custom spec with "-spec", we shouldn't be tricked.

>    arch = strcmp (argv[0], "arch") == 0;
>    if (!arch && strcmp (argv[0], "tune"))
> -    return NULL;
> +    goto fallback_cpu;

Likewise.

>    f = fopen ("/proc/cpuinfo", "r");
>    if (f == NULL)
> -    return NULL;
> +    goto fallback_cpu;

OK.

> +fallback_cpu:
> +/*FIXME: how about other OSes, like FreeBSD? */

https://reviews.freebsd.org/D12743 added elf_aux_info as a counterpart
of getauxinfo, but it looks like FreeBSD does not have AT_BASE_PLATFORM.

> +#ifdef __linux__
> +  /*Note: getauxval may return NULL as:
> +   * AT_BASE_PLATFORM is supported since Linux 5.7
> +   * Or from older version of qemu-user
> +   * */
> +  if (cpu == NULL)
> +    cpu = (const char *) getauxval (AT_BASE_PLATFORM);

getauxval is added in Glibc-2.16 so it will fail to build on hosts with
old glibc or other libc implementation.  Check if getauxval and
AT_BASE_PLATFORM are available (in gcc/configure.ac) instead of an
inaccurate "#ifdef __linux__".

> +#endif
> +
>    if (cpu == NULL)
> +#if defined (_MIPS_ARCH)
> +    cpu = _MIPS_ARCH;
> +#else

Ok.

>  return NULL;
> +#endif
>  
>    return concat ("-m", argv[0], "=", cpu, NULL);
>  }

[PATCH v2] libstdc++: Optimize operator+(string/char,string/char)

2022-08-23 Thread whh8b

A revision of the original patch -- based on the feedback from Jonathan -- that
removes the `inline` specifier is attached.

[PATCH] libstdc++: Optimize operator+(string/char, string/char) equally

2022-08-23 Thread whh8b

From: Will Hawkins 

Until now operator+(char*, string) and operator+(string, char*) had
different performance characteristics. The former required a single
memory allocation and the latter required two. This patch makes the
performance equal.

libstdc++-v3/ChangeLog:
* libstdc++-v3/include/bits/basic_string.h (operator+(string, char*)):
Remove naive implementation.
* libstdc++-v3/include/bits/basic_string.tcc (operator+(string, char*)):
Add single-allocation implementation.

Signed-off-by: Will Hawkins 
---
 libstdc++-v3/include/bits/basic_string.h   |  9 ++---
 libstdc++-v3/include/bits/basic_string.tcc | 21 +
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/include/bits/basic_string.h 
b/libstdc++-v3/include/bits/basic_string.h
index b04fba95678..fa6738925bb 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -3521,14 +3521,9 @@ _GLIBCXX_END_NAMESPACE_CXX11
*/
   template
 _GLIBCXX20_CONSTEXPR
-inline basic_string<_CharT, _Traits, _Alloc>
+basic_string<_CharT, _Traits, _Alloc>
 operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
- const _CharT* __rhs)
-{
-  basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
-  __str.append(__rhs);
-  return __str;
-}
+ const _CharT* __rhs);
 
   /**
*  @brief  Concatenate string and character.
diff --git a/libstdc++-v3/include/bits/basic_string.tcc 
b/libstdc++-v3/include/bits/basic_string.tcc
index 4563c61429a..95ba8e503e9 100644
--- a/libstdc++-v3/include/bits/basic_string.tcc
+++ b/libstdc++-v3/include/bits/basic_string.tcc
@@ -640,6 +640,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return __str;
 }
 
+  template
+_GLIBCXX20_CONSTEXPR
+basic_string<_CharT, _Traits, _Alloc>
+operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
+ const _CharT* __rhs)
+{
+  __glibcxx_requires_string(__rhs);
+  typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
+  typedef typename __string_type::size_type  __size_type;
+  typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
+   rebind<_CharT>::other _Char_alloc_type;
+  typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
+  const __size_type __len = _Traits::length(__rhs);
+  __string_type __str(_Alloc_traits::_S_select_on_copy(
+  __lhs.get_allocator()));
+  __str.reserve(__len + __lhs.size());
+  __str.append(__lhs);
+  __str.append(__rhs, __len);
+  return __str;
+}
+
   template
 _GLIBCXX_STRING_CONSTEXPR
 typename basic_string<_CharT, _Traits, _Alloc>::size_type
-- 
2.34.1

Re: [PATCH] libstdc++: Optimize operator+(string/char, string/char) equally

2022-08-23 Thread Will Hawkins

On Tue, Aug 23, 2022 at 12:33 PM Jonathan Wakely  wrote:
>
> On Mon, 22 Aug 2022 at 19:15, Will Hawkins wrote:
> >
> > Until now operator+(char*, string) and operator+(string, char*) had
> > different performance characteristics. The former required a single
> > memory allocation and the latter required two. This patch makes the
> > performance equal.
>
> If you don't have a GCC copyright assignment on file with the FSF then
> please follow the procedure described at https://gcc.gnu.org/dco.html

Thank you.

>
>
> > libstdc++-v3/ChangeLog:
> > * libstdc++-v3/include/bits/basic_string.h (operator+(string, 
> > char*)):
> > Remove naive implementation.
> > * libstdc++-v3/include/bits/basic_string.tcc (operator+(string, 
> > char*)):
> > Add single-allocation implementation.
> > ---
> >  libstdc++-v3/include/bits/basic_string.h   |  7 +--
> >  libstdc++-v3/include/bits/basic_string.tcc | 21 +
> >  2 files changed, 22 insertions(+), 6 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/bits/basic_string.h 
> > b/libstdc++-v3/include/bits/basic_string.h
> > index b04fba95678..bc048fc0689 100644
> > --- a/libstdc++-v3/include/bits/basic_string.h
> > +++ b/libstdc++-v3/include/bits/basic_string.h
> > @@ -3523,12 +3523,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
> >  _GLIBCXX20_CONSTEXPR
> >  inline basic_string<_CharT, _Traits, _Alloc>
>
> Please remove the 'inline' specifier here, since you're moving the
> definition into the non-inline .tcc file.
>
> There's a separate discussion to be had about whether these operator+
> overloads *should* be inline. But for the purposes of this change, we
> want these two operator+ overloads to be consistent, and so they
> should both be non-inline.

Thank you for the feedback. I sent out a v2 of the patch. Again, I
hope that I followed the proper procedure by having my mailer put the
patch in reply to my previous message.

Thank you again!
Will

>
> >  operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
> > - const _CharT* __rhs)
> > -{
> > -  basic_string<_CharT, _Traits, _Alloc> __str(__lhs);
> > -  __str.append(__rhs);
> > -  return __str;
> > -}
> > + const _CharT* __rhs);
> >
> >/**
> > *  @brief  Concatenate string and character.
> > diff --git a/libstdc++-v3/include/bits/basic_string.tcc 
> > b/libstdc++-v3/include/bits/basic_string.tcc
> > index 4563c61429a..95ba8e503e9 100644
> > --- a/libstdc++-v3/include/bits/basic_string.tcc
> > +++ b/libstdc++-v3/include/bits/basic_string.tcc
> > @@ -640,6 +640,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >return __str;
> >  }
> >
> > +  template
> > +_GLIBCXX20_CONSTEXPR
> > +basic_string<_CharT, _Traits, _Alloc>
> > +operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
> > + const _CharT* __rhs)
> > +{
> > +  __glibcxx_requires_string(__rhs);
> > +  typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
> > +  typedef typename __string_type::size_type  __size_type;
> > +  typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
> > +   rebind<_CharT>::other _Char_alloc_type;
> > +  typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
> > +  const __size_type __len = _Traits::length(__rhs);
> > +  __string_type __str(_Alloc_traits::_S_select_on_copy(
> > +  __lhs.get_allocator()));
> > +  __str.reserve(__len + __lhs.size());
> > +  __str.append(__lhs);
> > +  __str.append(__rhs, __len);
> > +  return __str;
> > +}
> > +
> >template
> >  _GLIBCXX_STRING_CONSTEXPR
> >  typename basic_string<_CharT, _Traits, _Alloc>::size_type
> > --
> > 2.34.1
> >
>

Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions

2022-08-23 Thread HAO CHEN GUI via Gcc-patches

Hi Kewen,

On 24/8/2022 下午 1:24, Kewen.Lin wrote:
> Could you try to test with dg-options "-mdejagnu-cpu=power9 -mpowerpc64" all 
> the time, but still
> having that has_arch_ppc64 effective target on aix?
> 
> I'd expect has_arch_ppc64 check to fail on aix 32bit, the error will not be a 
> problem (turning
> into an UNSUPPORTED then)?

Good point. I will get an AIX to test it.

Thanks
Gui Haochen

Re: [PATCH] Speedup path discovery in predicate::use_cannot_happen

2022-08-23 Thread Richard Biener via Gcc-patches

On Tue, 23 Aug 2022, Aldy Hernandez wrote:

> On Tue, Aug 23, 2022 at 2:16 PM Richard Biener  wrote:
> >
> > The following reverts a hunk from r8-5789-g11ef0b22d68cd1 that
> > made compute_control_dep_chain start from function entry rather
> > than the immediate dominator of the source block of the edge with
> > the undefined value on the PHI node.  Reverting at that point
> > does not reveal any testsuite FAIL, in particular the added
> > testcase still passes.  The following adjusts this to the other
> > function that computes predicates that hold on the PHI incoming
> > edges with undefined values, predicate::init_from_phi_def, which
> > starts at the immediate dominator of the PHI.  That's much less
> > likely to run into the CFG walking limit.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> >
> > Aldy - you did this change, do you remember anything here?  In
> > fact the whole thing that's now called predicate::use_cannot_happen
> > seems to be redundant - the two testcases attributed to its history
> > do not fail when that's disabled, nor did they fail when it was
> > introduced.  In principle whats now called predicate::superset_of
> > should cover this (but different implementation limits might apply).
> 
> OMG, I'm drawing a complete blank here.  I have no recollection of
> this.  I'm tempted to say either my account was hacked or that old
> code was all wrong ;-).

Eh.

Pushed then.  I'm going to baby-step changes so we can figure out
things when later bisecting regressions ...

Thanks,
Richard.

Re: LoongArch: add model attribute

2022-08-23 Thread Lulu Cheng




在 2022/8/23 下午10:51, Xi Ruoyao 写道:

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index cadaad7519c..4e925aa3876 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -30,6 +30,12 @@ along with GCC; see the file COPYING3.  If not see
 SYMBOL_PCREL
 The symbol's value will be loaded directly from data section.
  
+   SYMBOL_PCREL32

+   SYMBOL_PCREL64
+   Like SYMBOL_PCREL, but the PC-relative offset of the symbol is
+   assumed to be in the ± 2GiB or ± 8 EiB range, instead of the
+   code model specification.
+
 SYMBOL_TLS
 A thread-local symbol.
  


I don't think it's necessary to add SYMBOL_PCREL32, SYMBOL_PCREL can 
represent relative pc operation in normal mode.

Re: [commited PATCH v1] LoongArch: Add new code model 'medium'.

2022-08-23 Thread Lulu Cheng


Pushed to r13-2165.

在 2022/8/20 下午5:18, Huacai Chen 写道:

Hi, Lulu,

I think there is a typo in your subject line.

Huacai

On Sat, Aug 20, 2022 at 5:05 PM Lulu Cheng  wrote:

The function jump instruction in normal mode is 'bl',
so the scope of the function jump is +-128MB.

Now we've added support for 'medium' mode, this mode is
to complete the function jump through two instructions:
 pcalau12i + jirl
So in this mode the function jump range is increased to +-2GB.

Compared with 'normal' mode, 'medium' mode only affects the
jump range of functions.

gcc/ChangeLog:

 * config/loongarch/genopts/loongarch-strings: Support code model 
medium.
 * config/loongarch/genopts/loongarch.opt.in: Likewise.
 * config/loongarch/loongarch-def.c: Likewise.
 * config/loongarch/loongarch-def.h (CMODEL_LARGE): Likewise.
 (CMODEL_EXTREME): Likewise.
 (N_CMODEL_TYPES): Likewise.
 (CMODEL_MEDIUM): Likewise.
 * config/loongarch/loongarch-opts.cc: Likewise.
 * config/loongarch/loongarch-opts.h (TARGET_CMODEL_MEDIUM): Likewise.
 * config/loongarch/loongarch-str.h (STR_CMODEL_MEDIUM): Likewise.
 * config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
 Tls symbol Loading support medium mode.
 (loongarch_legitimize_call_address): When medium mode, make a symbolic
 jump with two instructions.
 (loongarch_option_override_internal): Support medium.
 * config/loongarch/loongarch.md (@pcalau12i): New template.
 (@sibcall_internal_1): New function call templates added to 
support
 medium mode.
 (@sibcall_value_internal_1): Likewise.
 (@sibcall_value_multiple_internal_1): Likewise.
 (@call_internal_1): Likewise.
 (@call_value_internal_1): Likewise.
 (@call_value_multiple_internal_1): Likewise.
 * config/loongarch/loongarch.opt: Support medium.
 * config/loongarch/predicates.md: Add processing about medium mode.
 * doc/invoke.texi: Document for '-mcmodel=medium'.

gcc/testsuite/ChangeLog:

 * gcc.target/loongarch/func-call-medium-1.c: New test.
 * gcc.target/loongarch/func-call-medium-2.c: New test.
 * gcc.target/loongarch/func-call-medium-3.c: New test.
 * gcc.target/loongarch/func-call-medium-4.c: New test.
 * gcc.target/loongarch/func-call-medium-5.c: New test.
 * gcc.target/loongarch/func-call-medium-6.c: New test.
 * gcc.target/loongarch/func-call-medium-7.c: New test.
 * gcc.target/loongarch/func-call-medium-8.c: New test.
 * gcc.target/loongarch/tls-gd-noplt.c: Add compile parameter 
'-mexplicit-relocs'.
---
  .../loongarch/genopts/loongarch-strings   |   1 +
  gcc/config/loongarch/genopts/loongarch.opt.in |   3 +
  gcc/config/loongarch/loongarch-def.c  |   1 +
  gcc/config/loongarch/loongarch-def.h  |   7 +-
  gcc/config/loongarch/loongarch-opts.cc|  15 ++-
  gcc/config/loongarch/loongarch-opts.h |   1 +
  gcc/config/loongarch/loongarch-str.h  |   1 +
  gcc/config/loongarch/loongarch.cc | 123 +
  gcc/config/loongarch/loongarch.md | 125 +-
  gcc/config/loongarch/loongarch.opt|   3 +
  gcc/config/loongarch/predicates.md|  15 ++-
  gcc/doc/invoke.texi   |   3 +
  .../gcc.target/loongarch/func-call-medium-1.c |  41 ++
  .../gcc.target/loongarch/func-call-medium-2.c |  41 ++
  .../gcc.target/loongarch/func-call-medium-3.c |  41 ++
  .../gcc.target/loongarch/func-call-medium-4.c |  41 ++
  .../gcc.target/loongarch/func-call-medium-5.c |  42 ++
  .../gcc.target/loongarch/func-call-medium-6.c |  42 ++
  .../gcc.target/loongarch/func-call-medium-7.c |  43 ++
  .../gcc.target/loongarch/func-call-medium-8.c |  42 ++
  .../gcc.target/loongarch/tls-gd-noplt.c   |   4 +-
  21 files changed, 595 insertions(+), 40 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-1.c
  create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-2.c
  create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-3.c
  create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-4.c
  create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c
  create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c
  create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c
  create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c

diff --git a/gcc/config/loongarch/genopts/loongarch-strings 
b/gcc/config/loongarch/genopts/loongarch-strings
index cb88ed56b68..44ebb7ab10b 100644
--- a/gcc/config/loongarch/genopts/loongarch-strings
+++ b/gcc/config/loongarch/genopts/loongarch-strings
@@ -54,5 +54,6 @@ OPTSTR_CMODEL   cmodel
  STR_CMODEL_NORMAL normal
  STR_CMODEL_TINY

62 matches

Mail list logo