Re: [PATCH] c, v2: Speed up compilation of large char array initializers when not using #embed

2024-10-09 Thread Joseph Myers
On Wed, 9 Oct 2024, Jakub Jelinek wrote:

> On Tue, Oct 08, 2024 at 07:42:11PM +, Joseph Myers wrote:
> > On Sat, 3 Aug 2024, Jakub Jelinek wrote:
> > 
> > >   * c-c++-common/init-1.c: New test.
> > 
> > I think there should also be tests of initializing signed char (and plain 
> > char) arrays; I don't see any such tests here.
> 
> Here is an updated patch with init-2.c (signed char rather than unsigned
> char) and init-3.c (plain char) tests added.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, additionally tested
> on the former with explicit -fsigned-char and with explicit -funsigned-char,
> ok for trunk?

OK.

-- 
Joseph S. Myers
josmy...@redhat.com



[committed, v2] libstdc++: Test 17_intro/names.cc with -D_FORTIFY_SOURCE=2 [PR116210]

2024-10-09 Thread Jonathan Wakely
I removed the >= 2.35 check, because it was failing on a RHEL glibc 2.34
with the fortify changes backported. So let's just #undef u for anything
older than 2.41 instead.

Tested powerpc64le-linux (glibc-2.34-100.el9_4.3) and x86_64-linux
(glibc-2.39-22.fc40).

Pushed to trunk.

-- >8 --

Add a new testcase that repeats 17_intro/names.cc but with
_FORTIFY_SOURCE defined, to find problems in Glibc fortify wrappers like
https://sourceware.org/bugzilla/show_bug.cgi?id=32052 (which is fixed
now).

libstdc++-v3/ChangeLog:

PR libstdc++/116210
* testsuite/17_intro/names.cc (sz): Undef for versions of Glibc
that use it in the fortify wrappers.
* testsuite/17_intro/names_fortify.cc: New test.
---
 libstdc++-v3/testsuite/17_intro/names.cc | 7 +++
 libstdc++-v3/testsuite/17_intro/names_fortify.cc | 6 ++
 2 files changed, 13 insertions(+)
 create mode 100644 libstdc++-v3/testsuite/17_intro/names_fortify.cc

diff --git a/libstdc++-v3/testsuite/17_intro/names.cc 
b/libstdc++-v3/testsuite/17_intro/names.cc
index bea2d19ecba..5deb310dc31 100644
--- a/libstdc++-v3/testsuite/17_intro/names.cc
+++ b/libstdc++-v3/testsuite/17_intro/names.cc
@@ -383,4 +383,11 @@
 #undef y
 #endif
 
+#if defined __GLIBC_PREREQ && defined _FORTIFY_SOURCE
+# if ! __GLIBC_PREREQ(2,41)
+// https://sourceware.org/bugzilla/show_bug.cgi?id=32052
+#  undef sz
+# endif
+#endif
+
 #include 
diff --git a/libstdc++-v3/testsuite/17_intro/names_fortify.cc 
b/libstdc++-v3/testsuite/17_intro/names_fortify.cc
new file mode 100644
index 000..c975412074b
--- /dev/null
+++ b/libstdc++-v3/testsuite/17_intro/names_fortify.cc
@@ -0,0 +1,6 @@
+// { dg-do compile { target *-*-linux* } }
+// { dg-add-options no_pch }
+
+#define _FORTIFY_SOURCE 2
+// Now we can define the macros to poison uses of non-reserved names:
+#include "names.cc"
-- 
2.46.2



Re: [PATCH v5 1/2] aarch64: Add SVE2 faminmax intrinsics

2024-10-09 Thread Richard Sandiford
 writes:
> +/*
> +** amax_0_f16_m_tied1:
> +**   ...
> +**   famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)

Sorry to ask for another round, but: the "(" and ")" aren't needed.
They're used when something later in the regular expression sequence
needs to refer back to something earlier, such as in:

**  mov (z[0-9]+\.h), h4
**  movprfx z0, z1
**  famax   z0\.h, p0/m, z0\.h, \1

Here, the "\1" refers back to the "(...)", i.e. the last operand of the
famax has to be the destination of the mov.

But in amax_0_f16_m_tied1 above, we're not matching the constant move
that sets the register to all zero.  We're just matching a single instruction
and are taking it on faith that the "z[0-9]+\.h" operand does in fact hold
zeros.  So it should just be:

**  famax   z0\.h, p0/m, z0\.h, z[0-9]+\.h

Same for the other tests that have "(...)" in their final line.

Thanks,
Richard


> +**   ret
> +*/
> +TEST_UNIFORM_Z (amax_0_f16_m_tied1, svfloat16_t,
> + z0 = svamax_n_f16_m (p0, z0, 0),
> + z0 = svamax_m (p0, z0, 0))
> +
> +/*
> +** amax_0_f16_m_untied:
> +**   ...
> +**   movprfx z0, z1
> +**   famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
> +**   ret
> +*/
> +TEST_UNIFORM_Z (amax_0_f16_m_untied, svfloat16_t,
> + z0 = svamax_n_f16_m (p0, z1, 0),
> + z0 = svamax_m (p0, z1, 0))
> +
> +/*
> +** amax_1_f16_m_tied1:
> +**   ...
> +**   famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
> +**   ret
> +*/
> +TEST_UNIFORM_Z (amax_1_f16_m_tied1, svfloat16_t,
> + z0 = svamax_n_f16_m (p0, z0, 1),
> + z0 = svamax_m (p0, z0, 1))
> +
> +/*
> +** amax_1_f16_m_untied:
> +**   ...
> +**   movprfx z0, z1
> +**   famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
> +**   ret
> +*/
> +TEST_UNIFORM_Z (amax_1_f16_m_untied, svfloat16_t,
> + z0 = svamax_n_f16_m (p0, z1, 1),
> + z0 = svamax_m (p0, z1, 1))
> +
> +/*
> +** amax_2_f16_m:
> +**   ...
> +**   famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
> +**   ret
> +*/
> +TEST_UNIFORM_Z (amax_2_f16_m, svfloat16_t,
> + z0 = svamax_n_f16_m (p0, z0, 2),
> + z0 = svamax_m (p0, z0, 2))
> +
> +/*
> +** amax_f16_z_tied1:
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   famax   z0\.h, p0/m, z0\.h, z1\.h
> +**   ret
> +*/
> +TEST_UNIFORM_Z (amax_f16_z_tied1, svfloat16_t,
> + z0 = svamax_f16_z (p0, z0, z1),
> + z0 = svamax_z (p0, z0, z1))
> +
> +/*
> +** amax_f16_z_tied2:
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   famax   z0\.h, p0/m, z0\.h, z1\.h
> +**   ret
> +*/
> +TEST_UNIFORM_Z (amax_f16_z_tied2, svfloat16_t,
> + z0 = svamax_f16_z (p0, z1, z0),
> + z0 = svamax_z (p0, z1, z0))
> +
> +/*
> +** amax_f16_z_untied:
> +** (
> +**   movprfx z0\.h, p0/z, z1\.h
> +**   famax   z0\.h, p0/m, z0\.h, z2\.h
> +** |
> +**   movprfx z0\.h, p0/z, z2\.h
> +**   famax   z0\.h, p0/m, z0\.h, z1\.h
> +** )
> +**   ret
> +*/
> +TEST_UNIFORM_Z (amax_f16_z_untied, svfloat16_t,
> + z0 = svamax_f16_z (p0, z1, z2),
> + z0 = svamax_z (p0, z1, z2))
> +
> +/*
> +** amax_h4_f16_z_tied1:
> +**   mov (z[0-9]+\.h), h4
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   famax   z0\.h, p0/m, z0\.h, \1
> +**   ret
> +*/
> +TEST_UNIFORM_ZD (amax_h4_f16_z_tied1, svfloat16_t, __fp16,
> +  z0 = svamax_n_f16_z (p0, z0, d4),
> +  z0 = svamax_z (p0, z0, d4))
> +
> +/*
> +** amax_h4_f16_z_untied:
> +**   mov (z[0-9]+\.h), h4
> +** (
> +**   movprfx z0\.h, p0/z, z1\.h
> +**   famax   z0\.h, p0/m, z0\.h, \1
> +** |
> +**   movprfx z0\.h, p0/z, \1
> +**   famax   z0\.h, p0/m, z0\.h, z1\.h
> +** )
> +**   ret
> +*/
> +TEST_UNIFORM_ZD (amax_h4_f16_z_untied, svfloat16_t, __fp16,
> +  z0 = svamax_n_f16_z (p0, z1, d4),
> +  z0 = svamax_z (p0, z1, d4))
> +
> +/*
> +** amax_0_f16_z_tied1:
> +**   ...
> +**   movprfx z0, z31
> +**   famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
> +**   ret
> +*/
> +TEST_UNIFORM_Z (amax_0_f16_z_tied1, svfloat16_t,
> + z0 = svamax_n_f16_z (p0, z0, 0),
> + z0 = svamax_z (p0, z0, 0))
> +
> +/*
> +** amax_0_f16_z_untied:
> +**   ...
> +**   famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
> +**   ret
> +*/
> +TEST_UNIFORM_Z (amax_0_f16_z_untied, svfloat16_t,
> + z0 = svamax_n_f16_z (p0, z1, 0),
> + z0 = svamax_z (p0, z1, 0))
> +
> +/*
> +** amax_1_f16_z_tied1:
> +**   ...
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
> +**   ret
> +*/
> +TEST_UNIFORM_Z (amax_1_f16_z_tied1, svfloat16_t,
> + z0 = svamax_n_f16_z (p0, z0, 1),
> + z0 = svamax_z (p0, z0, 1))
> +
> +/*
> +** amax_1_f16_z_untied:
> +**   ...
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
> +**   ret
> +*/
> +TEST_UNIFORM_Z (amax_1_f16_z_untied, svfloat16_t,
> + z0 = svamax_n_f16_z (p0, z1, 1),
> + z0 = svamax_z (p0, z1, 1))
> +
> +/*
> +** amax_2_f16_z:
> +**   ...
> +**   movprfx z0\.h, p0/z, z0\.h
> +

Re: [PATCH] RISC-V: Enable builtin __riscv_mul with Zmmul extension.

2024-10-09 Thread Jeff Law




On 10/9/24 3:21 PM, Patrick O'Neill wrote:


On 10/9/24 14:07, Jeff Law wrote:



Also note that if you use the tag "[RISC-V]" in your subject line your 
patch will be automatically picked up by a pre-commit tester that can 
be subsequently examined to verify behavior.


This patch's subject line looks good to me. It would've been picked up 
as-is since it mentions riscv/risc-v.


The patch doesn't show up in patchworks so that's what stopped the risc- 
v pre-commit from finding it.


Sadly I don't have much insight into what stopped patchworks from seeing 
it. :-/

I'd assumed it wasn't [RISC-V], but you know that aspect better than I :-)

jeff



[Ada] PR ada/117038

2024-10-09 Thread Eric Botcazou
This is the LTO bootstrap failure with -Werror=lto-type-mismatch introduced by 
the patch implementing the new aspect External_Initialization.

In GNAT's implementation model, using convention C (or C_Pass_By_Copy) has no 
effect on the internal representation of types since the representation is 
identical to that of C by default.  It's even counter-productive given the
implementation advice listed in B.3(63-71) so the interface between the
front-end and gigi does not use it and instead uses structurally identical
types on both sides for the sake of LTO.

Tested on x86-64/Linux, applied on the mainline.


2024-10-09  Eric Botcazou  

PR ada/117038
* fe.h (struct c_array): Add 'const' to declaration of pointer.
(C_Source_Buffer): Use consistent formatting.
* par-ch3.adb (P_Component_Items): Properly set Aliased_Present on
access definition.
* sinput.ads: Remove clause for Interfaces.C.
(C_Array): Change type of Length to Integer and make both
components aliased.  Remove Convention aspect.
(C_Source_Buffer): Remove all aspects.
* sinput.adb (C_Source_Buffer): Adjust to above change.

-- 
Eric Botcazoudiff --git a/gcc/ada/fe.h b/gcc/ada/fe.h
index 36f5e9bfe9e..e3e65fe18bd 100644
--- a/gcc/ada/fe.h
+++ b/gcc/ada/fe.h
@@ -348,17 +348,17 @@ extern void Set_Present_Expr		(Node_Id, Uint);
 /* sinput: */
 
 struct c_array {
-  char *pointer;
+  const char *pointer;
   int length;
 };
 
-#define C_Source_Buffer sinput__c_source_buffer
+#define C_Source_Buffer 	sinput__c_source_buffer
 #define Debug_Source_Name	sinput__debug_source_name
 #define Get_Column_Number	sinput__get_column_number
 #define Get_Logical_Line_Number	sinput__get_logical_line_number
 #define Get_Source_File_Index	sinput__get_source_file_index
 
-extern struct c_array C_Source_Buffer (Source_File_Index);
+extern struct c_array C_Source_Buffer 		(Source_File_Index);
 extern File_Name_Type Debug_Source_Name		(Source_File_Index);
 extern Column_Number_Type Get_Column_Number	(Source_Ptr);
 extern Line_Number_Type Get_Logical_Line_Number	(Source_Ptr);
diff --git a/gcc/ada/par-ch3.adb b/gcc/ada/par-ch3.adb
index a5f4319debf..04246dc04eb 100644
--- a/gcc/ada/par-ch3.adb
+++ b/gcc/ada/par-ch3.adb
@@ -3841,7 +3841,7 @@ package body Ch3 is
--  end if;
 
Set_Subtype_Indication (CompDef_Node, Empty);
-   Set_Aliased_Present(CompDef_Node, False);
+   Set_Aliased_Present(CompDef_Node, Aliased_Present);
Set_Access_Definition  (CompDef_Node,
  P_Access_Definition (Not_Null_Present));
 else
diff --git a/gcc/ada/sinput.adb b/gcc/ada/sinput.adb
index f2e6dda1c99..2b7439f1036 100644
--- a/gcc/ada/sinput.adb
+++ b/gcc/ada/sinput.adb
@@ -281,10 +281,8 @@ package body Sinput is
-
 
function C_Source_Buffer (S : SFI) return C_Array is
-  use type Interfaces.C.int;
-
-  Length : constant Interfaces.C.int :=
-Interfaces.C.int (Source_Last (S) - Source_First (S));
+  Length : constant Integer :=
+Integer (Source_Last (S) - Source_First (S));
 
   Text : constant Source_Buffer_Ptr := Source_Text (S);
 
diff --git a/gcc/ada/sinput.ads b/gcc/ada/sinput.ads
index ce47fef76db..d33c4708352 100644
--- a/gcc/ada/sinput.ads
+++ b/gcc/ada/sinput.ads
@@ -56,7 +56,6 @@
 
 with Alloc;
 with Casing; use Casing;
-with Interfaces.C;
 with Namet;  use Namet;
 with System;
 with Table;
@@ -708,12 +707,13 @@ package Sinput is
--  to avoid memory leaks.
 
type C_Array is record
-  Pointer : access constant Character;
-  Length  : Interfaces.C.int range 0 .. Interfaces.C.int'Last;
-   end record with Convention => C_Pass_By_Copy;
+  Pointer : aliased access constant Character;
+  Length  : aliased Integer;
+   end record;
+   --  WARNING: There is a matching C declaration of this type in fe.h
 
-   function C_Source_Buffer (S : SFI) return C_Array with
- Export, Convention => C, External_Name => "sinput__c_source_buffer";
+   function C_Source_Buffer (S : SFI) return C_Array;
+   --  WARNING: There is a matching C declaration of this subprogram in fe.h
 
 private
pragma Inline (File_Name);


[Ada] Remove support for HP-UX 10

2024-10-09 Thread Eric Botcazou
The support was removed from the rest of the compiler two years ago.

Applied on the mainline.


2024-10-09  Eric Botcazou  

* Makefile.rtl: Remove HP-UX 10 section.
* libgnarl/s-osinte__hpux-dce.ads: Delete.
* libgnarl/s-osinte__hpux-dce.adb: Likewise.
* libgnarl/s-taprop__hpux-dce.adb: Likewise.
* libgnarl/s-taspri__hpux-dce.ads: Likewise.
* libgnat/s-oslock__hpux-dce.ads: Likewise.

-- 
Eric Botcazoudiff --git a/gcc/ada/Makefile.rtl b/gcc/ada/Makefile.rtl
index 246c0059fb7..a36f60170b5 100644
--- a/gcc/ada/Makefile.rtl
+++ b/gcc/ada/Makefile.rtl
@@ -1972,32 +1972,6 @@ ifeq ($(strip $(filter-out s390% linux%,$(target_cpu) $(target_os))),)
 endif
 endif
 
-# HP/PA HP-UX 10
-ifeq ($(SELECTED_PAIRS),PAIRS_NONE)
-ifeq ($(strip $(filter-out hppa% hp hpux10%,$(target_cpu) $(target_vendor) $(target_os))),)
-
-  SELECTED_PAIRS=hppa-hpux10
-
-  LIBGNAT_TARGET_PAIRS = \
-  a-intnam.ads

Re: [Ada] Remove support for HP-UX 10

2024-10-09 Thread Jeff Law




On 10/9/24 3:35 PM, Eric Botcazou wrote:

The support was removed from the rest of the compiler two years ago.

Yea, HPUX 10 is a dead OS on a dead chip :-0

Jeff


Re: [PATCH] RISC-V: Add implication for M extension.

2024-10-09 Thread Jeff Law




On 10/9/24 10:52 AM, Palmer Dabbelt wrote:

On Tue, 08 Oct 2024 16:43:13 PDT (-0700), jeffreya...@gmail.com wrote:



On 10/7/24 11:33 PM, Tsung Chun Lin wrote:

That M implies Zmmul.

gcc/ChangeLog:

 * common/config/riscv/riscv-common.cc: M implies Zmmul.

THanks.  I've pushed this to the trunk.

jeff

ps.  Quite a discussion on this topic in the zmmul public discussion
from a couple years ago.  But it looks like the consensus was that M
should imply Zmmul.


Wacky timing: I just sat down to try and reply to the "how do C and Zca 
relate" thread in binutils, only to find this one instead.  I think 
we've now got the same issue that Jan is pointing out in binutils: 
basically +M and -M aren't inverses any more (it would leave on Zmmul). 
Not 100% sure there.


FWIW that M/Zmmul thread basically made me give up so I don't really 
care that much either way, IMO anyone depending on these fine-grained 
extension things is in for a pile of brokenness.  So as long as we 
document what we do it's fine with me.
Yea, when I read it my thought was this was a waste of time, but that I 
didn't care enough to fight it.


Unfortunately it looks like the patches didn't actually get tested and 
is causing a few hundred regressions.  Once I confirm I'll officially 
revert and ask Tsung Chun to post an update after fixing the testing 
failures.


Jeff



Re: [PATCH] RISC-V: Add implication for M extension.

2024-10-09 Thread Palmer Dabbelt

On Tue, 08 Oct 2024 16:43:13 PDT (-0700), jeffreya...@gmail.com wrote:



On 10/7/24 11:33 PM, Tsung Chun Lin wrote:

That M implies Zmmul.

gcc/ChangeLog:

 * common/config/riscv/riscv-common.cc: M implies Zmmul.

THanks.  I've pushed this to the trunk.

jeff

ps.  Quite a discussion on this topic in the zmmul public discussion
from a couple years ago.  But it looks like the consensus was that M
should imply Zmmul.


Wacky timing: I just sat down to try and reply to the "how do C and Zca 
relate" thread in binutils, only to find this one instead.  I think 
we've now got the same issue that Jan is pointing out in binutils: 
basically +M and -M aren't inverses any more (it would leave on Zmmul).  
Not 100% sure there.


FWIW that M/Zmmul thread basically made me give up so I don't really 
care that much either way, IMO anyone depending on these fine-grained 
extension things is in for a pile of brokenness.  So as long as we 
document what we do it's fine with me.


Here's the binutils thread: 
https://inbox.sourceware.org/binutils/ff5b9acf-6bd6-4cac-a7e5-9ec96ef9c...@suse.com/


Re: [PATCH] RISC-V: Add implication for M extension.

2024-10-09 Thread Palmer Dabbelt

On Wed, 09 Oct 2024 09:55:02 PDT (-0700), jeffreya...@gmail.com wrote:



On 10/9/24 10:52 AM, Palmer Dabbelt wrote:

On Tue, 08 Oct 2024 16:43:13 PDT (-0700), jeffreya...@gmail.com wrote:



On 10/7/24 11:33 PM, Tsung Chun Lin wrote:

That M implies Zmmul.

gcc/ChangeLog:

 * common/config/riscv/riscv-common.cc: M implies Zmmul.

THanks.  I've pushed this to the trunk.

jeff

ps.  Quite a discussion on this topic in the zmmul public discussion
from a couple years ago.  But it looks like the consensus was that M
should imply Zmmul.


Wacky timing: I just sat down to try and reply to the "how do C and Zca
relate" thread in binutils, only to find this one instead.  I think
we've now got the same issue that Jan is pointing out in binutils:
basically +M and -M aren't inverses any more (it would leave on Zmmul).
Not 100% sure there.

FWIW that M/Zmmul thread basically made me give up so I don't really
care that much either way, IMO anyone depending on these fine-grained
extension things is in for a pile of brokenness.  So as long as we
document what we do it's fine with me.

Yea, when I read it my thought was this was a waste of time, but that I
didn't care enough to fight it.

Unfortunately it looks like the patches didn't actually get tested and
is causing a few hundred regressions.  Once I confirm I'll officially
revert and ask Tsung Chun to post an update after fixing the testing
failures.


OK, works for me ;)



Jeff


PING: [PATCH v7] Provide new GCC builtin __builtin_counted_by_ref [PR116016]

2024-10-09 Thread Qing Zhao
Hi,

This is the Ping to the 7th version of the patch.

Okay for the trunk?

thanks.

Qing

Begin forwarded message:

From: Qing Zhao 
Subject: [PATCH v7] Provide new GCC builtin __builtin_counted_by_ref [PR116016]
Date: September 27, 2024 at 14:05:50 EDT
To: josmy...@redhat.com, ja...@redhat.com, uec...@tugraz.at
Cc: keesc...@chromium.org, isanb...@gmail.com, siddh...@gotplt.org, 
sja...@gcc.gnu.org, gcc-patches@gcc.gnu.org, Qing Zhao 

Hi, this is the 7th version of the patch.

Compare to the 6th version, the major changes are several style issues
raised by Jakub for the 6th version of the patchs.

The 6th version is at:
https://gcc.gnu.org/pipermail/gcc-patches/2024-September/663992.html

bootstrapped and regress tested on both X86 and aarch64. no issue.

Okay for the trunk?

thanks.

Qing.


With the addition of the 'counted_by' attribute and its wide roll-out
within the Linux kernel, a use case has been found that would be very
nice to have for object allocators: being able to set the counted_by
counter variable without knowing its name.

For example, given:

 struct foo {
   ...
   int counter;
   ...
   struct bar array[] __attribute__((counted_by (counter)));
 } *p;

The existing Linux object allocators are roughly:

 #define MAX(A, B) (A > B) ? (A) : (B)
 #define alloc(P, FAM, COUNT) ({ \
   __auto_type __p = &(P); \
   size_t __size = MAX (sizeof(*P),
__builtin_offsetof (__typeof(*P), FAM)
+ sizeof (*(P->FAM)) * COUNT); \
   *__p = kmalloc(__size); \
 })

Right now, any addition of a counted_by annotation must also
include an open-coded assignment of the counter variable after
the allocation:

 p = alloc(p, array, how_many);
 p->counter = how_many;

In order to avoid the tedious and error-prone work of manually adding
the open-coded counted-by intializations everywhere in the Linux
kernel, a new GCC builtin __builtin_counted_by_ref will be very useful
to be added to help the adoption of the counted-by attribute.

-- Built-in Function: TYPE __builtin_counted_by_ref (PTR)
The built-in function '__builtin_counted_by_ref' checks whether the
array object pointed by the pointer PTR has another object
associated with it that represents the number of elements in the
array object through the 'counted_by' attribute (i.e.  the
counted-by object).  If so, returns a pointer to the corresponding
counted-by object.  If such counted-by object does not exist,
returns a NULL pointer.

This built-in function is only available in C for now.

The argument PTR must be a pointer to an array.  The TYPE of the
returned value must be a pointer type pointing to the corresponding
type of the counted-by object or VOID pointer type in case of a
NULL pointer being returned.

With this new builtin, the central allocator could be updated to:

 #define MAX(A, B) (A > B) ? (A) : (B)
 #define alloc(P, FAM, COUNT) ({ \
   __auto_type __p = &(P); \
   __auto_type __c = (COUNT); \
   size_t __size = MAX (sizeof (*(*__p)),\
__builtin_offsetof (__typeof(*(*__p)),FAM) \
+ sizeof (*((*__p)->FAM)) * __c); \
   if ((*__p = kmalloc(__size))) { \
 __auto_type ret = __builtin_counted_by_ref((*__p)->FAM); \
 *_Generic(ret, void *: &(size_t){0}, default: ret) = __c; \
   } \
 })

And then structs can gain the counted_by attribute without needing
additional open-coded counter assignments for each struct, and
unannotated structs could still use the same allocator.

PR c/116016

gcc/c-family/ChangeLog:

* c-common.cc: Add new __builtin_counted_by_ref.
* c-common.h (enum rid): Add RID_BUILTIN_COUNTED_BY_REF.

gcc/c/ChangeLog:

* c-decl.cc (names_builtin_p): Add RID_BUILTIN_COUNTED_BY_REF.
* c-parser.cc (has_counted_by_object): New routine.
(get_counted_by_ref): New routine.
(c_parser_postfix_expression): Handle New RID_BUILTIN_COUNTED_BY_REF.
* c-tree.h: New routine handle_counted_by_for_component_ref.
* c-typeck.cc (handle_counted_by_for_component_ref): New routine.
(build_component_ref): Call the new routine.

gcc/ChangeLog:

* doc/extend.texi: Add documentation for __builtin_counted_by_ref.

gcc/testsuite/ChangeLog:

* gcc.dg/builtin-counted-by-ref-1.c: New test.
* gcc.dg/builtin-counted-by-ref.c: New test.
---
gcc/c-family/c-common.cc  |   1 +
gcc/c-family/c-common.h   |   1 +
gcc/c/c-decl.cc   |   1 +
gcc/c/c-parser.cc |  79 ++
gcc/c/c-tree.h|   1 +
gcc/c/c-typeck.cc |  33 +++--
gcc/doc/extend.texi   |  55 +++
.../gcc.dg/builtin-counted-by-ref-1.c | 135 ++
gcc/testsuite/gcc.dg/builtin-counted-by-ref.c |  61 
9 files changed, 358 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/builtin-counted-by-ref-1.c
create mode 100644 gcc/testsuite/gcc.dg/builtin-counted-by-ref.c

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index ec6a5da892d

Re: [PATCH v5 1/2] aarch64: Add SVE2 faminmax intrinsics

2024-10-09 Thread Saurabh Jha




On 10/9/2024 5:26 PM, Richard Sandiford wrote:

 writes:

+/*
+** amax_0_f16_m_tied1:
+** ...
+** famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)


Sorry to ask for another round, but: the "(" and ")" aren't needed.
They're used when something later in the regular expression sequence
needs to refer back to something earlier, such as in:

**  mov (z[0-9]+\.h), h4
**  movprfx z0, z1
**  famax   z0\.h, p0/m, z0\.h, \1

Here, the "\1" refers back to the "(...)", i.e. the last operand of the
famax has to be the destination of the mov.

But in amax_0_f16_m_tied1 above, we're not matching the constant move
that sets the register to all zero.  We're just matching a single instruction
and are taking it on faith that the "z[0-9]+\.h" operand does in fact hold
zeros.  So it should just be:

**  famax   z0\.h, p0/m, z0\.h, z[0-9]+\.h

Same for the other tests that have "(...)" in their final line.


No worries, thank you for the review again! I'll send in a new version.


Thanks,
Richard



+** ret
+*/
+TEST_UNIFORM_Z (amax_0_f16_m_tied1, svfloat16_t,
+   z0 = svamax_n_f16_m (p0, z0, 0),
+   z0 = svamax_m (p0, z0, 0))
+
+/*
+** amax_0_f16_m_untied:
+** ...
+** movprfx z0, z1
+** famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
+** ret
+*/
+TEST_UNIFORM_Z (amax_0_f16_m_untied, svfloat16_t,
+   z0 = svamax_n_f16_m (p0, z1, 0),
+   z0 = svamax_m (p0, z1, 0))
+
+/*
+** amax_1_f16_m_tied1:
+** ...
+** famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
+** ret
+*/
+TEST_UNIFORM_Z (amax_1_f16_m_tied1, svfloat16_t,
+   z0 = svamax_n_f16_m (p0, z0, 1),
+   z0 = svamax_m (p0, z0, 1))
+
+/*
+** amax_1_f16_m_untied:
+** ...
+** movprfx z0, z1
+** famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
+** ret
+*/
+TEST_UNIFORM_Z (amax_1_f16_m_untied, svfloat16_t,
+   z0 = svamax_n_f16_m (p0, z1, 1),
+   z0 = svamax_m (p0, z1, 1))
+
+/*
+** amax_2_f16_m:
+** ...
+** famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
+** ret
+*/
+TEST_UNIFORM_Z (amax_2_f16_m, svfloat16_t,
+   z0 = svamax_n_f16_m (p0, z0, 2),
+   z0 = svamax_m (p0, z0, 2))
+
+/*
+** amax_f16_z_tied1:
+** movprfx z0\.h, p0/z, z0\.h
+** famax   z0\.h, p0/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_Z (amax_f16_z_tied1, svfloat16_t,
+   z0 = svamax_f16_z (p0, z0, z1),
+   z0 = svamax_z (p0, z0, z1))
+
+/*
+** amax_f16_z_tied2:
+** movprfx z0\.h, p0/z, z0\.h
+** famax   z0\.h, p0/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_Z (amax_f16_z_tied2, svfloat16_t,
+   z0 = svamax_f16_z (p0, z1, z0),
+   z0 = svamax_z (p0, z1, z0))
+
+/*
+** amax_f16_z_untied:
+** (
+** movprfx z0\.h, p0/z, z1\.h
+** famax   z0\.h, p0/m, z0\.h, z2\.h
+** |
+** movprfx z0\.h, p0/z, z2\.h
+** famax   z0\.h, p0/m, z0\.h, z1\.h
+** )
+** ret
+*/
+TEST_UNIFORM_Z (amax_f16_z_untied, svfloat16_t,
+   z0 = svamax_f16_z (p0, z1, z2),
+   z0 = svamax_z (p0, z1, z2))
+
+/*
+** amax_h4_f16_z_tied1:
+** mov (z[0-9]+\.h), h4
+** movprfx z0\.h, p0/z, z0\.h
+** famax   z0\.h, p0/m, z0\.h, \1
+** ret
+*/
+TEST_UNIFORM_ZD (amax_h4_f16_z_tied1, svfloat16_t, __fp16,
+z0 = svamax_n_f16_z (p0, z0, d4),
+z0 = svamax_z (p0, z0, d4))
+
+/*
+** amax_h4_f16_z_untied:
+** mov (z[0-9]+\.h), h4
+** (
+** movprfx z0\.h, p0/z, z1\.h
+** famax   z0\.h, p0/m, z0\.h, \1
+** |
+** movprfx z0\.h, p0/z, \1
+** famax   z0\.h, p0/m, z0\.h, z1\.h
+** )
+** ret
+*/
+TEST_UNIFORM_ZD (amax_h4_f16_z_untied, svfloat16_t, __fp16,
+z0 = svamax_n_f16_z (p0, z1, d4),
+z0 = svamax_z (p0, z1, d4))
+
+/*
+** amax_0_f16_z_tied1:
+** ...
+** movprfx z0, z31
+** famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
+** ret
+*/
+TEST_UNIFORM_Z (amax_0_f16_z_tied1, svfloat16_t,
+   z0 = svamax_n_f16_z (p0, z0, 0),
+   z0 = svamax_z (p0, z0, 0))
+
+/*
+** amax_0_f16_z_untied:
+** ...
+** famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
+** ret
+*/
+TEST_UNIFORM_Z (amax_0_f16_z_untied, svfloat16_t,
+   z0 = svamax_n_f16_z (p0, z1, 0),
+   z0 = svamax_z (p0, z1, 0))
+
+/*
+** amax_1_f16_z_tied1:
+** ...
+** movprfx z0\.h, p0/z, z0\.h
+** famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
+** ret
+*/
+TEST_UNIFORM_Z (amax_1_f16_z_tied1, svfloat16_t,
+   z0 = svamax_n_f16_z (p0, z0, 1),
+   z0 = svamax_z (p0, z0, 1))
+
+/*
+** amax_1_f16_z_untied:
+** ...
+** movprfx z0\.h, p0/z, z0\.h
+** famax   z0\.h, p0/m, z0\.h, (z[0-9]+\.h)
+** ret
+*/
+TEST_UNIFORM_Z (amax_1_f16_z_untied, svfloat16_t,
+   z0 = svamax_n_f16_z (p0, z1, 1),
+   z0 = svamax_z (p0, z1, 1))
+
+/*
+** amax_2_f16_z:
+** ...
+** movprfx z0\.h, p0/z, z0\.h
+** famax   z0\.h, p0/m, 

Re: [PATCH] [PR116831] match.pd: Check trunc_mod vector obtap before folding.

2024-10-09 Thread Jennifer Schmitz

> On 8 Oct 2024, at 10:31, Richard Biener  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> On Fri, 4 Oct 2024, Jennifer Schmitz wrote:
> 
>> As in https://gcc.gnu.org/pipermail/gcc-patches/2024-September/663185.html,
>> this patch guards the simplification x / y * y == x -> x % y == 0 in
>> match.pd for vector types by a check for:
>> 1) Support of the mod optab for vectors OR
>> 2) Application before vector lowering for non-VL vectors.
>> 
>> The patch was bootstrapped and tested with no regression on
>> aarch64-linux-gnu and x86_64-linux-gnu.
>> OK for mainline?
> 
> -  (if (TREE_CODE (TREE_TYPE (@0)) != COMPLEX_TYPE)
> +  (if (TREE_CODE (TREE_TYPE (@0)) != COMPLEX_TYPE
> +   || (VECTOR_INTEGER_TYPE_P (type)
> +  && ((optimize_vectors_before_lowering_p ()
> +   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
> +  || target_supports_op_p (type, TRUNC_MOD_EXPR,
> +   optab_vector
> 
> this looks a bit odd, VECTOR_INTEGER_TYPE_P (type) checks the
> result type of the comparison.  I think the whole condition is
> better written as
> 
> (if (TREE_CODE (TREE_TYPE (@0)) != COMPLEX_TYPE
>  && (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (@0)))
>  || !target_supports_op_p (TREE_TYPE (@0), TRUNC_DIV_EXPR,
>optab_vector)
>  || target_supports_op_p (TREE_TYPE (@0), TRUNC_MOD_EXPR,
>optab_vector)))
> 
> when we have non-vector mode we're before lowering, likewise when
> the target doesn't support the division.  Even before lowering
> we shouldn't replace a supported division (and multiplication)
> with an unsupported modulo.
Dear Richard,
thanks for the review. I updated the patch with your suggestion and 
re-validated on aarch64 and x86_64.
Best,
Jennifer

This patch guards the simplification x / y * y == x -> x % y == 0 in
match.pd by a check for:
1) Non-vector mode of x OR
2) Lack of support for vector division OR
3) Support of vector modulo

The patch was bootstrapped and tested with no regression on
aarch64-linux-gnu and x86_64-linux-gnu.
OK for mainline?

Signed-off-by: Jennifer Schmitz 

gcc/
PR tree-optimization/116831
* match.pd: Guard simplification to trunc_mod with check for
mod optab support.

gcc/testsuite/
PR tree-optimization/116831
* gcc.dg/torture/pr116831.c: New test.
---
 gcc/match.pd|  9 +++--
 gcc/testsuite/gcc.dg/torture/pr116831.c | 10 ++
 2 files changed, 17 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr116831.c

diff --git a/gcc/match.pd b/gcc/match.pd
index ba83f0f29e6..9b59b5c12f1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5380,8 +5380,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* x / y * y == x -> x % y == 0.  */
 (simplify
   (eq:c (mult:c (trunc_div:s @0 @1) @1) @0)
-  (if (TREE_CODE (TREE_TYPE (@0)) != COMPLEX_TYPE)
-(eq (trunc_mod @0 @1) { build_zero_cst (TREE_TYPE (@0)); })))
+  (if (TREE_CODE (TREE_TYPE (@0)) != COMPLEX_TYPE
+   && (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (@0)))
+  || !target_supports_op_p (TREE_TYPE (@0), TRUNC_DIV_EXPR,
+optab_vector)
+  || target_supports_op_p (TREE_TYPE (@0), TRUNC_MOD_EXPR,
+   optab_vector)))
+   (eq (trunc_mod @0 @1) { build_zero_cst (TREE_TYPE (@0)); })))
 
 /* ((X /[ex] A) +- B) * A  -->  X +- A * B.  */
 (for op (plus minus)
diff --git a/gcc/testsuite/gcc.dg/torture/pr116831.c 
b/gcc/testsuite/gcc.dg/torture/pr116831.c
new file mode 100644
index 000..92b2a130e69
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116831.c
@@ -0,0 +1,10 @@
+/* { dg-additional-options "-mcpu=neoverse-v2" { target aarch64*-*-* } } */
+
+long a;
+int b, c;
+void d (int e[][5], short f[][5][5][5]) 
+{
+  for (short g; g; g += 4)
+a = c ?: e[6][0] % b ? 0 : f[0][0][0][g];
+}
+
-- 
2.44.0


> 
> Richard.
> 
> 
>> Signed-off-by: Jennifer Schmitz 
>> 
>> gcc/
>>  PR tree-optimization/116831
>>  * match.pd: Guard simplification to trunc_mod with check for
>>  mod optab support.
>> 
>> gcc/testsuite/
>>  PR tree-optimization/116831
>>  * gcc.dg/torture/pr116831.c: New test.
>> 
> 
> --
> Richard Biener 
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)




smime.p7s
Description: S/MIME cryptographic signature


[PATCH v1 2/2] RISC-V: Add testcases for form 3 of scalar signed SAT_TRUNC

2024-10-09 Thread pan2 . li
From: Pan Li 

Form 3:
  #define DEF_SAT_S_TRUNC_FMT_3(NT, WT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN < x && x <= (WT)NT_MAX\
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  }

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_trunc-3-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-3-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-3-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-3-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-3-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-3-i64-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-3-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-3-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-3-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-3-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-3-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-run-3-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 15 ++
 .../riscv/sat_s_trunc-3-i16-to-i8.c   | 26 +
 .../riscv/sat_s_trunc-3-i32-to-i16.c  | 28 +++
 .../riscv/sat_s_trunc-3-i32-to-i8.c   | 26 +
 .../riscv/sat_s_trunc-3-i64-to-i16.c  | 28 +++
 .../riscv/sat_s_trunc-3-i64-to-i32.c  | 26 +
 .../riscv/sat_s_trunc-3-i64-to-i8.c   | 26 +
 .../riscv/sat_s_trunc-run-3-i16-to-i8.c   | 16 +++
 .../riscv/sat_s_trunc-run-3-i32-to-i16.c  | 16 +++
 .../riscv/sat_s_trunc-run-3-i32-to-i8.c   | 16 +++
 .../riscv/sat_s_trunc-run-3-i64-to-i16.c  | 16 +++
 .../riscv/sat_s_trunc-run-3-i64-to-i32.c  | 16 +++
 .../riscv/sat_s_trunc-run-3-i64-to-i8.c   | 16 +++
 13 files changed, 271 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-3-i16-to-i8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-3-i32-to-i16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-3-i32-to-i8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-3-i64-to-i16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-3-i64-to-i32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-3-i64-to-i8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-run-3-i16-to-i8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/sat_s_trunc-run-3-i32-to-i16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-run-3-i32-to-i8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/sat_s_trunc-run-3-i64-to-i16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/sat_s_trunc-run-3-i64-to-i32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-run-3-i64-to-i8.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index f88432a3817..607bc4fc82e 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -489,10 +489,25 @@ sat_s_trunc_##WT##_to_##NT##_fmt_2 (WT x) \
 #define DEF_SAT_S_TRUNC_FMT_2_WRAP(NT, WT, NT_MIN, NT_MAX) \
   DEF_SAT_S_TRUNC_FMT_2(NT, WT, NT_MIN, NT_MAX)
 
+#define DEF_SAT_S_TRUNC_FMT_3(NT, WT, NT_MIN, NT_MAX) \
+NT __attribute__((noinline))  \
+sat_s_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
+{ \
+  NT trunc = (NT)x;   \
+  return (WT)NT_MIN < x && x <= (WT)NT_MAX\
+? trunc   \
+: x < 0 ? NT_MIN : NT_MAX;\
+}
+#define DEF_SAT_S_TRUNC_FMT_3_WRAP(NT, WT, NT_MIN, NT_MAX) \
+  DEF_SAT_S_TRUNC_FMT_3(NT, WT, NT_MIN, NT_MAX)
+
 #define RUN_SAT_S_TRUNC_FMT_1(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_S_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_1(NT, WT, x)
 
 #define RUN_SAT_S_TRUNC_FMT_2(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_2 (x)
 #define RUN_SAT_S_TRUNC_FMT_2_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_2(NT, WT, x)
 
+#define RUN_SAT_S_TRUNC_FMT_3(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_3 (x)
+#define RUN_SAT_S_TRUNC_FMT_3_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_3(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_trunc-3-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-3-i16-to-i8.c
new file mode 100644
index 000..7b8a663d53b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/

[PATCH v3] Remove sys/user time in -ftime-report

2024-10-09 Thread Andi Kleen
From: Andi Kleen 

Retrieving sys/user time in timevars is quite expensive because it
always needs a system call. Only getting the wall time is much
cheaper because operating systems have optimized paths for this.

The sys time isn't that interesting for a compiler and wall time
is usually close to user time except when the system is overloaded.
On the other hand when it is not wall time is more accurate because
it has less overhead.

For building tramp3d with -O0 the -ftime-report overhead drops from
18% to 3%. For -O2 it drops from 8% to not measurable.

I changed the code to use gettimeofday as a fallback for clock_gettime
CLOCK_MONOTONIC.  If a host has neither of those the time will not
be measured. Previously clock was the fallback.

This removes a lot of code in timevar.cc:

 gcc/timevar.cc | 167 ++---
 gcc/timevar.h  |  10 +---

 2 files changed, 17 insertions(+), 160 deletions(-)

Bootstrapped on x86_64-linux with full test suite run.

gcc/ChangeLog:

* timevar.cc (struct tms): Remove.
(RUSAGE_SELF): Remove.
(TICKS_PER_SECOND): Remove.
(USE_TIMES): Remove.
(HAVE_USER_TIME): Remove.
(HAVE_SYS_TIME): Remove.
(HAVE_WALL_TIME): Remove.
(USE_GETRUSAGE): Remove.
(USE_CLOCK): Remove.
(NANOSEC_PER_SEC): Remove.
(TICKS_TO_NANOSEC): Remove.
(CLOCKS_TO_NANOSEC): Remove.
(timer::named_items::push): Remove sys/user.
(get_time): Remove clock and times and getruage code.
(timevar_accumulate): Remove sys/user.
(timevar_diff): Dito.
(timer::validate_phases): Dito.
(timer::print_row): Dito.
(timer::all_zero): Dito.
(timer::print): Dito.
(make_json_for_timevar_time_def): Dito.
* timevar.h (struct timevar_time_def): Dito.

---

v2: Adjust JSON/Sarif output too.
v3: Make unconditional.
---
 gcc/timevar.cc | 189 ++---
 gcc/timevar.h  |  10 +--
 2 files changed, 22 insertions(+), 177 deletions(-)

diff --git a/gcc/timevar.cc b/gcc/timevar.cc
index 68bcf44864f9..4a57e74230d3 100644
--- a/gcc/timevar.cc
+++ b/gcc/timevar.cc
@@ -26,84 +26,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "options.h"
 #include "json.h"
 
-#ifndef HAVE_CLOCK_T
-typedef int clock_t;
-#endif
-
-#ifndef HAVE_STRUCT_TMS
-struct tms
-{
-  clock_t tms_utime;
-  clock_t tms_stime;
-  clock_t tms_cutime;
-  clock_t tms_cstime;
-};
-#endif
-
-#ifndef RUSAGE_SELF
-# define RUSAGE_SELF 0
-#endif
-
-/* Calculation of scale factor to convert ticks to seconds.
-   We mustn't use CLOCKS_PER_SEC except with clock().  */
-#if HAVE_SYSCONF && defined _SC_CLK_TCK
-# define TICKS_PER_SECOND sysconf (_SC_CLK_TCK) /* POSIX 1003.1-1996 */
-#else
-# ifdef CLK_TCK
-#  define TICKS_PER_SECOND CLK_TCK /* POSIX 1003.1-1988; obsolescent */
-# else
-#  ifdef HZ
-#   define TICKS_PER_SECOND HZ  /* traditional UNIX */
-#  else
-#   define TICKS_PER_SECOND 100 /* often the correct value */
-#  endif
-# endif
-#endif
-
-/* Prefer times to getrusage to clock (each gives successively less
-   information).  */
-#ifdef HAVE_TIMES
-# if defined HAVE_DECL_TIMES && !HAVE_DECL_TIMES
-  extern clock_t times (struct tms *);
-# endif
-# define USE_TIMES
-# define HAVE_USER_TIME
-# define HAVE_SYS_TIME
-# define HAVE_WALL_TIME
-#else
-#ifdef HAVE_GETRUSAGE
-# if defined HAVE_DECL_GETRUSAGE && !HAVE_DECL_GETRUSAGE
-  extern int getrusage (int, struct rusage *);
-# endif
-# define USE_GETRUSAGE
-# define HAVE_USER_TIME
-# define HAVE_SYS_TIME
-#else
-#ifdef HAVE_CLOCK
-# if defined HAVE_DECL_CLOCK && !HAVE_DECL_CLOCK
-  extern clock_t clock (void);
-# endif
-# define USE_CLOCK
-# define HAVE_USER_TIME
-#endif
-#endif
-#endif
-
-/* libc is very likely to have snuck a call to sysconf() into one of
-   the underlying constants, and that can be very slow, so we have to
-   precompute them.  Whose wonderful idea was it to make all those
-   _constants_ variable at run time, anyway?  */
-#define NANOSEC_PER_SEC 10
-#ifdef USE_TIMES
-static uint64_t ticks_to_nanosec;
-#define TICKS_TO_NANOSEC (NANOSEC_PER_SEC / TICKS_PER_SECOND)
-#endif
-
-#ifdef USE_CLOCK
-static uint64_t clocks_to_nanosec;
-#define CLOCKS_TO_NANOSEC (NANOSEC_PER_SEC / CLOCKS_PER_SEC)
-#endif
-
 /* Non-NULL if timevars should be used.  In GCC, this happens with
the -ftime-report flag.  */
 
@@ -181,8 +103,6 @@ timer::named_items::push (const char *item_name)
   timer::timevar_def *def = &m_hash_map.get_or_insert (item_name, &existed);
   if (!existed)
 {
-  def->elapsed.user = 0;
-  def->elapsed.sys = 0;
   def->elapsed.wall = 0;
   def->name = item_name;
   def->standalone = 0;
@@ -230,37 +150,27 @@ timer::named_items::make_json () const
   return arr;
 }
 
-/* Fill the current times into TIME.  The definition of this function
-   also defines any or all of the HAVE_USER_TIME, HAVE_SYS_TIME, and
-   HAVE_WAL

Re: [PATCH] RISC-V: Enable builtin __riscv_mul with Zmmul extension.

2024-10-09 Thread Jeff Law




On 10/8/24 12:25 AM, Tsung Chun Lin wrote:


0001-RISC-V-Enable-builtin-__riscv_mul-with-Zmmul-extensi.patch

 From d5b254e19d1f37fe27c7e98a0160e5c22446cfea Mon Sep 17 00:00:00 2001
From: Jim Lin
Date: Tue, 8 Oct 2024 13:14:32 +0800
Subject: [PATCH] RISC-V: Enable builtin __riscv_mul with Zmmul extension.

gcc/ChangeLog:

 * config/riscv/riscv-c.cc:
 Enable builtin __riscv_mul with Zmmul extension.

Tsung Chun,

Both of your patches caused various regressions in the GCC testsuite. 
As an example on riscv32-elf:



Tests that now fail, but worked before (49 tests):

unix/-march=rv32gcv: gcc: gcc.target/riscv/predef-14.c   -O0  (test for excess 
errors)
unix/-march=rv32gcv: gcc: gcc.target/riscv/predef-14.c   -O1  (test for excess 
errors)
unix/-march=rv32gcv: gcc: gcc.target/riscv/predef-14.c   -O2  (test for excess 
errors)
unix/-march=rv32gcv: gcc: gcc.target/riscv/predef-14.c   -O2 -flto 
-fno-use-linker-plugin -flto-partition=none  (test for excess errors)
unix/-march=rv32gcv: gcc: gcc.target/riscv/predef-14.c   -O2 -flto 
-fuse-linker-plugin -fno-fat-lto-objects  (test for excess errors)
unix/-march=rv32gcv: gcc: gcc.target/riscv/predef-14.c   -O3 -g  (test for 
excess errors)
unix/-march=rv32gcv: gcc: gcc.target/riscv/predef-14.c   -Os  (test for excess 
errors)

[ More similar failures follow. ]


I should have asked if you had run the regression testsuite to be sure 
your patches weren't going to regress something unexpectedly.  GIven all 
the regressions where in the risc-v specific subdirectory, you can use


make check-gcc RUNTESTFLAGS="riscv.exp"

To run just the risc-v specific tests, which will be much faster than 
running the full testsuite.  This shortcut isn't usually appropriate, 
but I think it is for the two patches you've submitted.  There is a web 
page with further information on the testsuite, but the basic idea would 
be to build the compiler without your patch, run the testsuite, save the 
resulting .sum files, then apply your patch and run the testsuite again 
and compare the resulting .sum files.  Ideally there should be no 
differences.


Anyway, I'm going to be reverting both patches.  Can you please 
re-submit them after fixing the testsuite failures?


Also note that if you use the tag "[RISC-V]" in your subject line your 
patch will be automatically picked up by a pre-commit tester that can be 
subsequently examined to verify behavior.


Thanks,
Jeff


Re: [PATCH v2] libstdc++: Workaround glibc headers on ia64-linux

2024-10-09 Thread Frank Scheiner

On 08.10.24 20:51, Jonathan Wakely wrote:

On Thu, 3 Oct 2024 at 20:59, Frank Scheiner wrote:

[...]
The following patch adds a workaround for this on the libstdc++
testsuite side.

Signed-off-by: Frank Scheiner 


Thanks, I'll push this.

N.B. there's no ChangeLog entry in the patch submission, so I'll use:

libstdc++-v3/ChangeLog:

* testsuite/17_intro/names.cc [__linux__ && __ia64__]: Undefine
'u' as used in glibc headers.


Thanks a lot for taking this patch as is, also for adding a ChangeLog
entry. I didn't anticipate that for this one :-/. And I really need to
find out why my MUA breaks the patches.

Cheers,
Frank


Re: [PATCH v3 0/2] ia64: enable LRA and un-obsolete ia64*-*-linux

2024-10-09 Thread Frank Scheiner



On 09.10.24 11:29, Richard Biener wrote:

On Wed, 9 Oct 2024, Frank Scheiner wrote:

On 09.10.24 10:26, Richard Biener wrote:

On Wed, 9 Oct 2024, Richard Biener wrote:
[...]

I'll push this for you.


I spoke too fast - something between you and me corrupts the patch
so it doesn't apply (even after manually resolving line-wrapping,
I suspect whitespace is also broken).  Can you re-send them as
attachments please?


Is it OK to attach them to my reply here or better with a v4?

If the latter, should the patches be attached to the cover letter and the
numbering in the subject be removed then, as everything is included in one
email?


The attachments worked fine, I pushed them


Great, thanks a lot. This actually made my day! :-)

Cheers,
Frank


[PATCH] testsuite: arm: use effective-target for mod* tests

2024-10-09 Thread Torbjörn SVENSSON
Committed below patch as obvious to master.

--

This fixes a typo introduced in r15-4200-gcf08dd297ca that was reported
at https://linaro.atlassian.net/browse/GNU-1369.

gcc/testsuite/ChangeLog

* gcc.target/arm/mod_2.c: Corrected effective-target to
arm_cpu_cortex_a57_ok.
* gcc.target/arm/mod_256.c: Likewise.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/gcc.target/arm/mod_2.c   | 2 +-
 gcc/testsuite/gcc.target/arm/mod_256.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c 
b/gcc/testsuite/gcc.target/arm/mod_2.c
index 3a203b67d73..5b8dec44ed5 100644
--- a/gcc/testsuite/gcc.target/arm/mod_2.c
+++ b/gcc/testsuite/gcc.target/arm/mod_2.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-skip-if "-mpure-code supports M-profile only" { *-*-* } { 
"-mpure-code" } } */
 /* { dg-require-effective-target arm32 } */
-/* { dg-require-effective-target arm_cpu_cortex_a57 } */
+/* { dg-require-effective-target arm_cpu_cortex_a57_ok } */
 /* { dg-options "-O2 -save-temps" } */
 /* { dg-add-options arm_cpu_cortex_a57 } */
 
diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c 
b/gcc/testsuite/gcc.target/arm/mod_256.c
index 3521d7a05f3..8589b948f41 100644
--- a/gcc/testsuite/gcc.target/arm/mod_256.c
+++ b/gcc/testsuite/gcc.target/arm/mod_256.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-skip-if "-mpure-code supports M-profile only" { *-*-* } { 
"-mpure-code" } } */
 /* { dg-require-effective-target arm32 } */
-/* { dg-require-effective-target arm_cpu_cortex_a57 } */
+/* { dg-require-effective-target arm_cpu_cortex_a57_ok } */
 /* { dg-options "-O2 -save-temps" } */
 /* { dg-add-options arm_cpu_cortex_a57 } */
 
-- 
2.25.1



Re: [PATCH v4 3/7] OpenMP: C front-end support for dispatch + adjust_args

2024-10-09 Thread Tobias Burnus

First comments; I need to have a deeper, but now I need fetch some victuals.

Paul-Antoine Arras wrote:

This patch adds support to the C front-end to parse the `dispatch` construct and
the `adjust_args` clause. It also includes some common C/C++ bits for pragmas
and attributes.

Additional common C/C++ testcases are in a later patch in the series.


. . .


--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -571,6 +571,8 @@ const struct attribute_spec c_common_gnu_attributes[] =
  handle_omp_declare_variant_attribute, NULL },
{ "omp declare variant variant", 0, -1, true,  false, false, false,
  handle_omp_declare_variant_attribute, NULL },
+  { "omp declare variant adjust_args need_device_ptr", 0, -1, true,  false, 
false, false,
+ handle_omp_declare_variant_attribute, NULL },


the first line is 9 characters too long ...


--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -1747,6 +1747,8 @@ static void c_parser_omp_assumption_clauses (c_parser *, 
bool);
  static void c_parser_omp_allocate (c_parser *);
  static void c_parser_omp_assumes (c_parser *);
  static bool c_parser_omp_ordered (c_parser *, enum pragma_context, bool *);
+static tree
+c_parser_omp_dispatch (location_t, c_parser *);


Spurious line break after 'tree' in the declaration.


+// Adapted from c_parser_expr_no_commas


While parts of GCC have started to use // comments of C++ and C99,
this file seemingly hasn't and I am not sure that you want to be the
first one to adds it ...

I think this needs some words on the purpose of this function,
i.e. why it exists - alias what syntax it support and does not
support.


+static tree
+c_parser_omp_dispatch_body (c_parser *parser)
+{


...


+  lhs = c_parser_conditional_expression (parser, NULL, NULL);
+  if (TREE_CODE (lhs.value) == CALL_EXPR)
+return lhs.value;
+  else
+{


You can save on indentation and curly braces by removing the 'else {'
as after the 'return' you never need to handle the CALL_EXPR case.


+  location_t op_location = c_parser_peek_token (parser)->location;
+  if (!c_parser_require (parser, CPP_EQ, "expected %<=%>"))
+   return error_mark_node;
+
+  /* Parse function name*/


(Possibly a '.' and then) two spaces before '*/'.


+   for (int i = 0; i < 3; i++)
+ {
+   sizeof_arg[i] = NULL_TREE;
+   sizeof_arg_loc[i] = UNKNOWN_LOCATION;


Wrong size: c_parser_expr_list expects that 6 not 3 values exist.

Looks as if your code predates Jakub's change of Dec 2023:
r14-6741-ge7dd72aefed851  Split -Wcalloc-transposed-args warning from 
-Walloc-size, -Walloc-size fixes


Tobias


Re: [wwwdocs] Document that gcc-8 changed the default to -std=gnu17

2024-10-09 Thread Jonathan Wakely
On Wed, 9 Oct 2024 at 18:07, Joseph Myers wrote:
>
> On Wed, 9 Oct 2024, Jonathan Wakely wrote:
>
> > +C
> > +  
> > +The default mode has been changed to -std=gnu17.
> > +  
>
> The option didn't exist before GCC 8, I think you need to say something
> about the option being added rather than just referring to a new default
> of an option that wasn't in previous versions.

Well in GCC 7 the default was -std=gnu11 and in GCC 8 it changed to
-std=gnu17, which also happened to be new.

How about this?

+C
+  
+New options -std=c17, to select support for the 2018
+edition of the ISO C standard, and -std=gnu17, for C17
+with GNU extensions.
+The default mode has been changed to -std=gnu17.
+  

Should it say 2017 to avoid confusion, even though it was published as
9899:2018?



Re: [PATCH] libcpp, c, middle-end, v2: Optimize initializers using #embed in C

2024-10-09 Thread Joseph Myers
On Wed, 9 Oct 2024, Jakub Jelinek wrote:

> Here is an updated patch with that implemented plus additional testcase
> which tests it (and was failing with the old version of the patch for C).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

The libcpp/, gcc/c-family/, gcc/c/ and gcc/testsuite/ changes are OK.  The 
rest is OK in the absence of objections within a week.

-- 
Joseph S. Myers
josmy...@redhat.com



Re: [wwwdocs] Document that gcc-8 changed the default to -std=gnu17

2024-10-09 Thread Joseph Myers
On Wed, 9 Oct 2024, Jonathan Wakely wrote:

> +C
> +  
> +New options -std=c17, to select support for the 2018
> +edition of the ISO C standard, and -std=gnu17, for C17
> +with GNU extensions.
> +The default mode has been changed to -std=gnu17.
> +  

That seems reasonable.

> Should it say 2017 to avoid confusion, even though it was published as
> 9899:2018?

Maybe something like "2018 edition (__STDC_VERSION__ == 
201710L)".

-- 
Joseph S. Myers
josmy...@redhat.com



Re: [PATCH v13 0/4] c: Add __lengthof__ operator

2024-10-09 Thread Joseph Myers
On Wed, 9 Oct 2024, Alejandro Colomar wrote:

> Every little bit adds up.  Documentation is simpler if there is naming
> consistency.  We have SYNOPSISes in the man pages, and they're up front,
> because they constitute an important part of the documentation.

We also have a convention for future standard C interfaces to put the 
length before the pointer so that a VLA parameter declaration can be used 
that makes very clear the intent for how many elements the array has, 
which seems much better for that purpose than relying on the name of a 
parameter.

-- 
Joseph S. Myers
josmy...@redhat.com



Re: [PATCH v13 0/4] c: Add __lengthof__ operator

2024-10-09 Thread Alejandro Colomar
On Wed, Oct 09, 2024 at 09:11:52PM GMT, Joseph Myers wrote:
> On Wed, 9 Oct 2024, Alejandro Colomar wrote:
> 
> > Every little bit adds up.  Documentation is simpler if there is naming
> > consistency.  We have SYNOPSISes in the man pages, and they're up front,
> > because they constitute an important part of the documentation.
> 
> We also have a convention for future standard C interfaces to put the 
> length before the pointer so that a VLA parameter declaration can be used 
> that makes very clear the intent for how many elements the array has, 
> which seems much better for that purpose than relying on the name of a 
> parameter.

I doubt that this will be doable for string functions.  Even newer
additions to  will most likely have the size as the last
element, if just for consistency with the existing APIs.  And this issue
is primarily a string issue, so it won't be solved.

[.identifier] is more likely to help with this.

Cheers,
Alex

> 
> -- 
> Joseph S. Myers
> josmy...@redhat.com
> 

-- 



signature.asc
Description: PGP signature


Re: [PATCH] RISC-V: Enable builtin __riscv_mul with Zmmul extension.

2024-10-09 Thread Patrick O'Neill



On 10/9/24 14:07, Jeff Law wrote:



Also note that if you use the tag "[RISC-V]" in your subject line your 
patch will be automatically picked up by a pre-commit tester that can 
be subsequently examined to verify behavior.


This patch's subject line looks good to me. It would've been picked up 
as-is since it mentions riscv/risc-v.


The patch doesn't show up in patchworks so that's what stopped the 
risc-v pre-commit from finding it.


Sadly I don't have much insight into what stopped patchworks from seeing 
it. :-/


Thanks,
Patrick



[pushed] c++: more modules and -M

2024-10-09 Thread Jason Merrill
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

In r15-4119-gc877a27f04f648 I told preprocess_file to use the
directives-only scan with modules, but it seems that I also need to set the
cpp_option so that communication between _cpp_handle_directive and
scan_translation_unit_directives_only works properly in
c-c++-common/cpp/embed-6.c.

gcc/c-family/ChangeLog:

* c-ppoutput.cc (preprocess_file): Set directives_only flag.
---
 gcc/c-family/c-ppoutput.cc | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/c-family/c-ppoutput.cc b/gcc/c-family/c-ppoutput.cc
index 374252bb4f3..e2c38cbd9eb 100644
--- a/gcc/c-family/c-ppoutput.cc
+++ b/gcc/c-family/c-ppoutput.cc
@@ -93,8 +93,11 @@ preprocess_file (cpp_reader *pfile)
   if (flag_no_output && pfile->buffer)
 {
   if (flag_modules)
-   /* For macros from imported headers we need directives_only_cb.  */
-   scan_translation_unit_directives_only (pfile);
+   {
+ /* For macros from imported headers we need directives_only_cb.  */
+ cpp_get_options (pfile)->directives_only = true;
+ scan_translation_unit_directives_only (pfile);
+   }
   else
{
  /* Scan -included buffers, then the main file.  */

base-commit: 08e91d71e5cc155f1fe7b9ee1c44829aa24ff921
-- 
2.46.2



Re: [PATCH v13 0/4] c: Add __lengthof__ operator

2024-10-09 Thread Joseph Myers
On Wed, 9 Oct 2024, Alejandro Colomar wrote:

> I'm not fabricating, BTW.  Here's a list of off-by-one bugs in login
> code, precisely due to this size-length naming issue:
> 
> 

Those don't look to me like they're much to do with size/length *naming* 
confusion.  It's a conceptual confusion about whether the value needed by 
a particular API includes a null terminator or not, not about what you 
call size and what you call length.  As such, a name without "length" 
wouldn't help, because if you say countof, there would still be the same 
confusion about whether the bytes you are counting are meant to include a 
null terminator or not.

You could maybe avoid some cases of such off-by-one errors by language 
features that tie an array length more closely to a pointer (such as 
.IDENTIFIER proposals where IDENTIFIER is required to be const size_t, in 
cases where a pointer-to-VLA is passed, if there were appropriate 
constraints to require a matching pair of const size_t object and pointer 
to [.IDENTIFIER] VLA to be passed from caller to callee - more general 
versions with such strict requirements about passing matching pairs would 
be less likely to ensure correct sizes everywhere, and this idea about 
ensuring matching pairs isn't in N3188, it's an idea combining things from 
multiple papers).  But I think naming is essentially orthogonal to any 
kind of language feature that might enable reliable bounded pointers.

-- 
Joseph S. Myers
josmy...@redhat.com



Re: [wwwdocs] Document that gcc-8 changed the default to -std=gnu17

2024-10-09 Thread Joseph Myers
On Wed, 9 Oct 2024, Jonathan Wakely wrote:

> +C
> +  
> +The default mode has been changed to -std=gnu17.
> +  

The option didn't exist before GCC 8, I think you need to say something 
about the option being added rather than just referring to a new default 
of an option that wasn't in previous versions.

-- 
Joseph S. Myers
josmy...@redhat.com



Re: [PATCH] testsuite: arm: use effective-target for mod* tests

2024-10-09 Thread Christophe Lyon
Thanks for the prompt fix!

Christophe

Le mer. 9 oct. 2024, 22:14, Torbjörn SVENSSON 
a écrit :

> Committed below patch as obvious to master.
>
> --
>
> This fixes a typo introduced in r15-4200-gcf08dd297ca that was reported
> at https://linaro.atlassian.net/browse/GNU-1369.
>
> gcc/testsuite/ChangeLog
>
> * gcc.target/arm/mod_2.c: Corrected effective-target to
> arm_cpu_cortex_a57_ok.
> * gcc.target/arm/mod_256.c: Likewise.
>
> Signed-off-by: Torbjörn SVENSSON 
> ---
>  gcc/testsuite/gcc.target/arm/mod_2.c   | 2 +-
>  gcc/testsuite/gcc.target/arm/mod_256.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c
> b/gcc/testsuite/gcc.target/arm/mod_2.c
> index 3a203b67d73..5b8dec44ed5 100644
> --- a/gcc/testsuite/gcc.target/arm/mod_2.c
> +++ b/gcc/testsuite/gcc.target/arm/mod_2.c
> @@ -1,7 +1,7 @@
>  /* { dg-do compile } */
>  /* { dg-skip-if "-mpure-code supports M-profile only" { *-*-* } {
> "-mpure-code" } } */
>  /* { dg-require-effective-target arm32 } */
> -/* { dg-require-effective-target arm_cpu_cortex_a57 } */
> +/* { dg-require-effective-target arm_cpu_cortex_a57_ok } */
>  /* { dg-options "-O2 -save-temps" } */
>  /* { dg-add-options arm_cpu_cortex_a57 } */
>
> diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c
> b/gcc/testsuite/gcc.target/arm/mod_256.c
> index 3521d7a05f3..8589b948f41 100644
> --- a/gcc/testsuite/gcc.target/arm/mod_256.c
> +++ b/gcc/testsuite/gcc.target/arm/mod_256.c
> @@ -1,7 +1,7 @@
>  /* { dg-do compile } */
>  /* { dg-skip-if "-mpure-code supports M-profile only" { *-*-* } {
> "-mpure-code" } } */
>  /* { dg-require-effective-target arm32 } */
> -/* { dg-require-effective-target arm_cpu_cortex_a57 } */
> +/* { dg-require-effective-target arm_cpu_cortex_a57_ok } */
>  /* { dg-options "-O2 -save-temps" } */
>  /* { dg-add-options arm_cpu_cortex_a57 } */
>
> --
> 2.25.1
>
>


Re: [PATCH v13 0/4] c: Add __lengthof__ operator

2024-10-09 Thread Alejandro Colomar
Hi Joseph,

On Wed, Oct 09, 2024 at 07:31:50PM GMT, Joseph Myers wrote:
> On Wed, 9 Oct 2024, Alejandro Colomar wrote:
> 
> > The documentation of an API starts by its prototype.
> > 
> > void login_prompt(char *name, int len);
> > void login_prompt(char *name, int size);
> > 
> > The former should _not_ include a NUL terminator in the argument.
> > The latter should.  If those names are meaningless, there are more
> > chances of being confused.
> 
> You need actual API *documentation*, not just expecting people to guess 
> based on a name.

Every little bit adds up.  Documentation is simpler if there is naming
consistency.  We have SYNOPSISes in the man pages, and they're up front,
because they constitute an important part of the documentation.

If each manual page used a different naming convention, you'd have to
carefully read each page to understand an API.  And you'd have to be
extra careful about every little detail.

If instead there's a careful consistency across the entire Linux
man-pages project (for example), including naming consistency, you can
read a new page, and have a rough idea of how it works after just a few
looks at the page; hopefully even just by having a look at the SYNOPSIS
plus the first few lines of the DESCRIPTION.  Documentation should not
be surprising, but rather confirm what you already guessed by looking at
the API itself.

> One of those commit messages refers to non-null-terminated utmp 
> structures.  I'd say the actual error-prone antipattern seen here is the 
> use of such arrays (fixed width, not necessarily null-terminated) to store 
> things that might otherwise be thought of as strings, rather than anything 
> to do with naming.

Yeah, utmp(5) didn't help either.

It's hard to quantify how much each problem contributed to the actual
bugs, but I tend to think both factors contributed.


Have a lovely night!
Alex

-- 



signature.asc
Description: PGP signature


Re: Meaning of "length", "size", and "count"

2024-10-09 Thread Alejandro Colomar
Hi Jakub,

On Wed, Oct 09, 2024 at 09:40:11PM GMT, Jakub Łukasiewicz wrote:
> On 2024-10-09 20:48 CEST, Alejandro Colomar  wrote:
> > countof() is a new term, so it doesn't yet have a meaning (except as
> > given by the attribute), but it naturally fits more with number of
> > elements.
> 
> How would you call, for example, a function that returns how many times a
> value is contained in a data structure (be it array, linked list, or any
> other)?

list_count() or similar would be a good name.

It's length that's dangerous to overload because (1) it's already used
by strings, and (2) strings have the danger of the NUL terminator which
is not counted by its length.

But for example, it's not dangerous to misuse size for the number of
elements of an array, because they're so obviously different that you'll
not introduce a bug easily.  I think it's okay to say wcslcpy() gets a
size as the third parameter, even if pedantically it's a number of
elements.  So, using "count" for both arrays, and user-defined types
such as linked lists, is just fine.  The only _dangerous_ term is
length.


Have a lovely night!
Alex

> 
> ~ J.Ł.

-- 



signature.asc
Description: PGP signature


Re: [PATCH v3] libstdc++: implement concatenation of strings and string_views

2024-10-09 Thread Patrick Palka
On Sat, 21 Sep 2024, Giuseppe D'Angelo wrote:

> On 31/07/2024 00:19, Jonathan Wakely wrote:
> > One more thing that I missed last time, sorry:
> > 
> > +#if __glibcxx_string_view >= 202403L
> > +  // const string & + string_view
> > +  template
> > +_GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> > +inline basic_string<_CharT, _Traits, _Alloc>
> > +operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
> > +   __type_identity_t> __rhs)
> > 
> > Since this is a C++26 feature, we can use [[nodiscard]] and constexpr
> > unconditionally, so it can be simply:
> > 
> > +  template
> > +[[nodiscard]]
> > +constexpr basic_string<_CharT, _Traits, _Alloc>
> > 
> > i.e. use [[nodiscard]] not the NODISCARD macro, and constexpr instead
> > of the CONSTEXPR macro and the inline keyword.
> 
> Here's a rebased patch that also includes these changes.
> 
> Thank you,
> 
> -- 
> Giuseppe D'Angelo
> 

> Subject: [PATCH] libstdc++: implement concatenation of strings and
>  string_views
> 
> This adds support for P2591R5, merged for C++26.
> 
> libstdc++-v3/ChangeLog:
> 
>   * include/bits/basic_string.h: Implement the four operator+
>   overloads between basic_string and (types convertible to)
>   basic_string_view.
>   * include/bits/version.def: Bump the feature-testing macro.
>   * include/bits/version.h: Regenerate.
>   * 
> testsuite/21_strings/basic_string/operators/char/op_plus_fspath_neg.cc: New 
> test.
>   * 
> testsuite/21_strings/basic_string/operators/char/op_plus_string_view.cc: New 
> test.
>   * 
> testsuite/21_strings/basic_string/operators/char/op_plus_string_view_compat.cc:
>   New test.
> 
> Signed-off-by: Giuseppe D'Angelo 
> ---
>  libstdc++-v3/include/bits/basic_string.h  |  48 +
>  libstdc++-v3/include/bits/version.def |   5 +
>  libstdc++-v3/include/bits/version.h   |   7 +-
>  .../operators/char/op_plus_fspath_neg.cc  |  13 ++
>  .../operators/char/op_plus_string_view.cc | 169 ++
>  .../char/op_plus_string_view_compat.cc|  63 +++
>  6 files changed, 304 insertions(+), 1 deletion(-)
>  create mode 100644 
> libstdc++-v3/testsuite/21_strings/basic_string/operators/char/op_plus_fspath_neg.cc
>  create mode 100644 
> libstdc++-v3/testsuite/21_strings/basic_string/operators/char/op_plus_string_view.cc
>  create mode 100644 
> libstdc++-v3/testsuite/21_strings/basic_string/operators/char/op_plus_string_view_compat.cc
> 
> diff --git a/libstdc++-v3/include/bits/basic_string.h 
> b/libstdc++-v3/include/bits/basic_string.h
> index 120c0bc9a17..7cb2193230c 100644
> --- a/libstdc++-v3/include/bits/basic_string.h
> +++ b/libstdc++-v3/include/bits/basic_string.h
> @@ -3745,6 +3745,54 @@ _GLIBCXX_END_NAMESPACE_CXX11
>  { return std::move(__lhs.append(1, __rhs)); }
>  #endif
>  
> +#if __glibcxx_string_view >= 202403L
> +  // const string & + string_view
> +  template
> +[[nodiscard]]
> +constexpr inline basic_string<_CharT, _Traits, _Alloc>

Redundant 'inline's

> +operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
> +type_identity_t> __rhs)
> +{
> +  typedef basic_string<_CharT, _Traits, _Alloc> _Str;

These typedefs might as well be usings instead

Besides that LGTM!

> +  return std::__str_concat<_Str>(__lhs.data(), __lhs.size(),
> +   __rhs.data(), __rhs.size(),
> +   __lhs.get_allocator());
> +}
> +
> +  // string && + string_view
> +  template
> +[[nodiscard]]
> +constexpr inline basic_string<_CharT, _Traits, _Alloc>
> +operator+(basic_string<_CharT, _Traits, _Alloc>&& __lhs,
> +type_identity_t> __rhs)
> +{
> +  return std::move(__lhs.append(__rhs));
> +}
> +
> +  // string_view + const string &
> +  template
> +[[nodiscard]]
> +constexpr inline basic_string<_CharT, _Traits, _Alloc>
> +operator+(type_identity_t> __lhs,
> +const basic_string<_CharT, _Traits, _Alloc>& __rhs)
> +{
> +  typedef basic_string<_CharT, _Traits, _Alloc> _Str;
> +  return std::__str_concat<_Str>(__lhs.data(), __lhs.size(),
> +   __rhs.data(), __rhs.size(),
> +   __rhs.get_allocator());
> +}
> +
> +  // string_view + string &&
> +  template
> +[[nodiscard]]
> +constexpr inline basic_string<_CharT, _Traits, _Alloc>
> +operator+(type_identity_t> __lhs,
> +basic_string<_CharT, _Traits, _Alloc>&& __rhs)
> +
> +  return std::move(__rhs.insert(0, __lhs));
> +}
> +#endif
> +
>// operator ==
>/**
> *  @brief  Test equivalence of two strings.
> diff --git a/libstdc++-v3/include/bits/version.def 
> b/libstdc++-v3/include/bits/version.def
> index 23478523e0a..d1505c1a0ac 100644
> --- a/libstdc++-v3/include/bits/version.def
> +++ b/libstdc++-v3/include/bits/version.def
> @@ -698,6 +698,11 @@ ftms = {
>  
>  ftms = {
>na

[PATCH v1 1/2] Match: Support form 3 for scalar signed integer SAT_TRUNC

2024-10-09 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 3 of the scalar signed
integer SAT_TRUNC.  Aka below example:

Form 3:
  #define DEF_SAT_S_TRUNC_FMT_3(NT, WT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN < x && x <= (WT)NT_MAX\
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  }

DEF_SAT_S_TRUNC_FMT_3(int8_t, int16_t, INT8_MIN, INT8_MAX)

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_sub_int8_t_fmt_3 (int8_t x, int8_t y)
   6   │ {
   7   │   signed char _1;
   8   │   signed char _2;
   9   │   int8_t _3;
  10   │   __complex__ signed char _6;
  11   │   _Bool _8;
  12   │   signed char _9;
  13   │   signed char _10;
  14   │   signed char _11;
  15   │
  16   │ ;;   basic block 2, loop depth 0
  17   │ ;;pred:   ENTRY
  18   │   _6 = .SUB_OVERFLOW (x_4(D), y_5(D));
  19   │   _2 = IMAGPART_EXPR <_6>;
  20   │   if (_2 != 0)
  21   │ goto ; [50.00%]
  22   │   else
  23   │ goto ; [50.00%]
  24   │ ;;succ:   4
  25   │ ;;3
  26   │
  27   │ ;;   basic block 3, loop depth 0
  28   │ ;;pred:   2
  29   │   _1 = REALPART_EXPR <_6>;
  30   │   goto ; [100.00%]
  31   │ ;;succ:   5
  32   │
  33   │ ;;   basic block 4, loop depth 0
  34   │ ;;pred:   2
  35   │   _8 = x_4(D) < 0;
  36   │   _9 = (signed char) _8;
  37   │   _10 = -_9;
  38   │   _11 = _10 ^ 127;
  39   │ ;;succ:   5
  40   │
  41   │ ;;   basic block 5, loop depth 0
  42   │ ;;pred:   3
  43   │ ;;4
  44   │   # _3 = PHI <_1(3), _11(4)>
  45   │   return _3;
  46   │ ;;succ:   EXIT
  47   │
  48   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_trunc_int16_t_to_int8_t_fmt_3 (int16_t x)
   6   │ {
   7   │   int8_t _3;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _3 = .SAT_TRUNC (x_4(D)); [tail call]
  12   │   return _3;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add case 3 matching pattern for signed SAT_TRUNC.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 70fdd10926f..5e20651c8ce 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3478,6 +3478,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
wide_int limit_0 = wi::mask (otype_prec, false, itype_prec); // Aka 255
wide_int limit_1 = wi::uhwi ((HOST_WIDE_INT_1U << otype_prec) - 3,
itype_prec); // Aka 253
+   wide_int limit_2 = wi::uhwi ((HOST_WIDE_INT_1U << otype_prec) - 2,
+   itype_prec); // Aka 254
wide_int otype_max = wi::mask (otype_prec - 1, false, otype_prec);
wide_int itype_max = wi::mask (otype_prec - 1, false, itype_prec);
wide_int int_cst_1 = wi::to_wide (@1);
@@ -3485,6 +3487,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
wide_int int_cst_3 = wi::to_wide (@3);
   }
   (if (((wi::eq_p (int_cst_1, offset) && wi::eq_p (int_cst_2, limit_0))
+|| (wi::eq_p (int_cst_1, itype_max) && wi::eq_p (int_cst_2, limit_2))
 || (wi::eq_p (int_cst_1, itype_max) && wi::eq_p (int_cst_2, limit_1)))
&& wi::eq_p (int_cst_3, otype_max))
 
-- 
2.43.0



Re: [PATCH] aarch64: Fix folding of degenerate svwhilele case [PR117045]

2024-10-09 Thread Richard Sandiford
Tamar Christina  writes:
> Hi Richard,
>
>> -Original Message-
>> From: Richard Sandiford 
>> Sent: Wednesday, October 9, 2024 12:58 PM
>> To: gcc-patches@gcc.gnu.org
>> Cc: ktkac...@nvidia.com; Richard Earnshaw ;
>> Tamar Christina 
>> Subject: [PATCH] aarch64: Fix folding of degenerate svwhilele case [PR117045]
>> 
>> The svwhilele folder mishandled the degenerate case in which
>> the second argument is the maximum integer.  In that case,
>> the result is all-true regardless of the first parameter:
>> 
>>   If the second scalar operand is equal to the maximum signed integer
>>   value then a condition which includes an equality test can never fail
>>   and the result will be an all-true predicate.
>> 
>> This is because the conceptual "increment the first operand
>> by 1 after each element" is done modulo the range of the operand.
>> The GCC code was instead treating it as infinite precision.
>> whilele_5.c even had a test for the incorrect behaviour.
>> 
>> The easiest fix seemed to be to handle that case specially before
>> doing constant folding.  This also copes with variable first operands.
>> 
>> Tested on aarch64-linux-gnu.  I'll push on Friday if there are no
>> comments before then.  Since it's a wrong-code bug, I'd also like
>> to backport to release branches.
>> 
>> Thanks,
>> Richard
>> 
>> 
>> gcc/
>>  PR target/116999
>>  PR target/117045
>>  * config/aarch64/aarch64-sve-builtins-base.cc
>>  (svwhilelx_impl::fold): Check for WHILELTs of the minimum value
>>  and WHILELEs of the maximum value.  Fold them to all-false and
>>  all-true respectively.
>> 
>> gcc/testsuite/
>>  PR target/116999
>>  PR target/117045
>>  * gcc.target/aarch64/sve/acle/general/whilele_5.c: Fix bogus
>>  expected result.
>>  * gcc.target/aarch64/sve/acle/general/whilele_11.c: New test.
>>  * gcc.target/aarch64/sve/acle/general/whilele_12.c: Likewise.
>> ---
>>  .../aarch64/aarch64-sve-builtins-base.cc  | 11 +-
>>  .../aarch64/sve/acle/general/whilele_11.c | 31 +
>>  .../aarch64/sve/acle/general/whilele_12.c | 34 +++
>>  .../aarch64/sve/acle/general/whilele_5.c  |  2 +-
>>  4 files changed, 76 insertions(+), 2 deletions(-)
>>  create mode 100644
>> gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
>>  create mode 100644
>> gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_12.c
>> 
>> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> index 4b33585d981..3d0975e4294 100644
>> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> @@ -2945,7 +2945,9 @@ public:
>>  : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
>>{}
>> 
>> -  /* Try to fold a call by treating its arguments as constants of type T.  
>> */
>> +  /* Try to fold a call by treating its arguments as constants of type T.
>> + We have already filtered out the degenerate cases of X .LT. MIN
>> + and X .LE. MAX.  */
>>template
>>gimple *
>>fold_type (gimple_folder &f) const
>> @@ -3001,6 +3003,13 @@ public:
>>  if (f.vectors_per_tuple () > 1)
>>return nullptr;
>> 
>> +/* Filter out cases where the condition is always true or always false. 
>>  */
>> +tree arg1 = gimple_call_arg (f.call, 1);
>> +if (!m_eq_p && operand_equal_p (arg1, TYPE_MIN_VALUE (TREE_TYPE
>> (arg1
>> +  return f.fold_to_pfalse ();
>
> Just a quick question for my own understanding, I assume the reason MIN
> is handled here is because fold_type will decrement the value at some point?
>
> Otherwise wouldn't MIN + 1 still fit inside the type's precision?
>
> FWIW patch looks good to me, just wondering why the MIN case is needed :)

I admit it probably isn't needed to fix the bug.  I just though it would
look strange if we handled the arg1 extremity for m_eq_p without also
handling it for !m_eq_p.

Thanks,
Richard

>
> Cheers,
> Tamar
>
>> +if (m_eq_p && operand_equal_p (arg1, TYPE_MAX_VALUE (TREE_TYPE
>> (arg1
>> +  return f.fold_to_ptrue ();
>> +
>>  if (f.type_suffix (1).unsigned_p)
>>return fold_type (f);
>>  else
>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
>> b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
>> new file mode 100644
>> index 000..2be9dc5c534
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
>> @@ -0,0 +1,31 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2" } */
>> +
>> +#include 
>> +#include 
>> +
>> +svbool_t
>> +f1 (volatile int32_t *ptr)
>> +{
>> +  return svwhilelt_b8_s32 (*ptr, INT32_MIN);
>> +}
>> +
>> +svbool_t
>> +f2 (volatile uint32_t *ptr)
>> +{
>> +  return svwhilelt_b16_u32 (*ptr, 0);
>> +}
>> +
>> +svbool_t
>> +f3 (volatile int64_t *ptr)
>> +{
>> +  return svwhilelt_b32_s64 (*ptr, INT64_MIN);
>> +}
>> +
>> +

[committed] libstdc++: Drop format attribute from snprintf wrapper [PR116969]

2024-10-09 Thread Jonathan Wakely
Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

When __LONG_DOUBLE_IEEE128__ is defined we need to declare a wrapper for
Glibc's 'snprintf' symbol, so we can call the original definition that
works with the IBM128 format of long double. Because we were declaring
the wrapper using __typeof__(__builtin_snprintf) it inherited the
__attribute__((format(printf, 3, 4))) decoration, and then we got a
warning for calling that wrapper with an __ibm128 argument for a %Lf
conversion specifier. The warning is bogus, because the function we're
calling really does want __ibm128 for %Lf, but there's no "printf but
with a different long double format" archetype for the attribute.

In r15-4039-g28911f626864e7 I added a diagnostic pragma to suppress the
warning, but it would be better to just declare the wrapper without the
attribute, and not have to suppress a warning for code that we know is
actually correct.

libstdc++-v3/ChangeLog:

PR libstdc++/116969
* include/bits/locale_facets_nonio.tcc (money_put::__do_put):
Remove diagnostic pragmas.
(__glibcxx_snprintfibm128): Declare type manually, instead of
using __typeof__(__builtin_snprintf).
---
 libstdc++-v3/include/bits/locale_facets_nonio.tcc | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/libstdc++-v3/include/bits/locale_facets_nonio.tcc 
b/libstdc++-v3/include/bits/locale_facets_nonio.tcc
index 53553d113b2..863350a85f1 100644
--- a/libstdc++-v3/include/bits/locale_facets_nonio.tcc
+++ b/libstdc++-v3/include/bits/locale_facets_nonio.tcc
@@ -637,10 +637,14 @@ _GLIBCXX_BEGIN_NAMESPACE_LDBL_OR_CXX11
 
 #if defined _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT \
   && defined __LONG_DOUBLE_IEEE128__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wformat" // '%Lf' expects 'long double'
-extern "C"
-__typeof__(__builtin_snprintf) __glibcxx_snprintfibm128 __asm__("snprintf");
+// The snprintf symbol in glibc that works with __ibm128 format is not visible
+// when compiling with -mabi=ieeelongdouble so we use this name for it instead.
+// N.B. we don't use __typeof__(__builtin_snprintf) for the type because that
+// would inherit __attribute__((format(printf, 3, 4))) and give a warning for
+// passing __ibm128 to %Lf instead of long double. The warning would be wrong
+// because long double in this TU is __ieee128 and snprintf expects __ibm128.
+extern "C" int
+__glibcxx_snprintfibm128(char*, size_t, const char*, ...) __asm__("snprintf");
 
   template
 _OutIter
@@ -673,7 +677,6 @@ __typeof__(__builtin_snprintf) __glibcxx_snprintfibm128 
__asm__("snprintf");
   return __intl ? _M_insert(__s, __io, __fill, __digits)
: _M_insert(__s, __io, __fill, __digits);
 }
-#pragma GCC diagnostic pop
 #endif
 
 _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
-- 
2.46.2



Re: [PATCH v13 0/4] c: Add __lengthof__ operator

2024-10-09 Thread Joseph Myers
On Wed, 9 Oct 2024, Alejandro Colomar wrote:

> The documentation of an API starts by its prototype.
> 
>   void login_prompt(char *name, int len);
>   void login_prompt(char *name, int size);
> 
> The former should _not_ include a NUL terminator in the argument.
> The latter should.  If those names are meaningless, there are more
> chances of being confused.

You need actual API *documentation*, not just expecting people to guess 
based on a name.

One of those commit messages refers to non-null-terminated utmp 
structures.  I'd say the actual error-prone antipattern seen here is the 
use of such arrays (fixed width, not necessarily null-terminated) to store 
things that might otherwise be thought of as strings, rather than anything 
to do with naming.

-- 
Joseph S. Myers
josmy...@redhat.com



Meaning of "length", "size", and "count"

2024-10-09 Thread Jakub Łukasiewicz

On 2024-10-09 20:48 CEST, Alejandro Colomar  wrote:
countof() is a new term, so it doesn't yet have a meaning (except 
as given by the attribute), but it naturally fits more with number 
of elements.


How would you call, for example, a function that returns how many 
times a value is contained in a data structure (be it array, linked 
list, or any other)?


~ J.Ł.


Re: [PATCH ver2 0/4] rs6000, remove redundant built-ins and add more test cases

2024-10-09 Thread Carl Love



Ping


On 10/1/24 8:12 AM, Carl Love wrote:


GCC maintainers:

The following version 2 of a series of patches for PowerPC removes 
some built-ins that are covered by existing overloaded built-ins. 
Additionally, there are patches to add missing testcases and 
documentation.  The original version of the patch series was posted on 
8/7/2024.  It was originally reviewed by Kewen.


The patches have been updated per the review.  Note patches 2 and 3 in 
the series were approved with minor changes.  I will post the entire 
series for review for completeness.


The patch series has been re-tested on Power 10 LE and BE with no 
regressions.


Please let me know if the patches are acceptable for mainline. Thanks.

    Carl




Re: [PATCH] rs6000, fix test builtins-1-p10-runnable.c

2024-10-09 Thread Carl Love

Ping, FYI this is a fairly simple fix to a testcase.


On 10/3/24 8:11 AM, Carl Love wrote:

GCC maintainers:

The builtins-1-10-runnable.c has the debugging inadvertently enabled.  
The test uses #ifdef to enable/disable the debugging. Unfortunately, 
the #define DEBUG was set to 0 to disable debugging and enable the 
call to abort in case of error.  The #define should have been removed 
to disable debugging.
Additionally, a change in the expected output which was made for 
testing purposes was not removed.  Hence, the test is printing that 
there was an error not calling abort.  The result is the test does not 
get reported as failing.


This patch removes the #define DEBUG to enable the call to abort and 
restores the expected output to the correct value.  The patch was 
tested on a Power 10 without the #define DEBUG to verify that the test 
does fail with the incorrect expected value.  The correct expected 
value was then restored.  The test reports 19 expected passes and no 
errors.


Please let me know if this patch is acceptable for mainline. Thanks.

Carl


--- 



rs6000, fix test builtins-1-p10-runnable.c

The test has two issues:

1) The test should generate execute abort() if an error is found.
However, the test contains a #define 0 which actually enables the
error prints not exectuting void() because the debug code is protected
by an #ifdef not #if.  The #define DEBUG needs to be removed to so the
test will abort on an error.

2) The vec_i_expected output was tweeked to test that it would fail.
The test value was not removed.

By removing the #define DEBUG, the test fails and reports 1 failure.
Removing the intentionally wrong expected value results in the test
passing with no errors as expected.

gcc/testsuite/ChangeLog:
    * gcc.target/powerpc/builtins-1-p10-runnable.c: Remove #define
    DEBUG.    Replace vec_i_expected value with correct value.
---
 gcc/testsuite/gcc.target/powerpc/builtins-1-p10-runnable.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/powerpc/builtins-1-p10-runnable.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-1-p10-runnable.c

index 222c8b3a409..3e8a1c736e3 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1-p10-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-p10-runnable.c
@@ -25,8 +25,6 @@
 #include 
 #include 

-#define DEBUG 0
-
 #ifdef DEBUG
 #include 
 #endif
@@ -281,8 +279,7 @@ int main()
 /* Signed word multiply high */
 i_arg1 = (vector int){ 2147483648, 2147483648, 2147483648, 
2147483648 };

 i_arg2 = (vector int){ 2, 3, 4, 5};
-    //    vec_i_expected = (vector int){-1, -2, -2, -3};
-    vec_i_expected = (vector int){1, -2, -2, -3};
+    vec_i_expected = (vector int){-1, -2, -2, -3};

 vec_i_result = vec_mulh (i_arg1, i_arg2);





Re: [PATCH v13 0/4] c: Add __lengthof__ operator

2024-10-09 Thread Alejandro Colomar

Hi Joseph,

On Wed, Oct 09, 2024 at 05:05:16PM GMT, Joseph Myers wrote:
> On Wed, 9 Oct 2024, Alejandro Colomar wrote:
> 
> > I'm not fabricating, BTW.  Here's a list of off-by-one bugs in login
> > code, precisely due to this size-length naming issue:
> > 
> > 
> 
> Those don't look to me like they're much to do with size/length *naming* 
> confusion.  It's a conceptual confusion about whether the value needed by 
> a particular API includes a null terminator or not, not about what you 
> call size and what you call length.

The documentation of an API starts by its prototype.

void login_prompt(char *name, int len);
void login_prompt(char *name, int size);

The former should _not_ include a NUL terminator in the argument.
The latter should.  If those names are meaningless, there are more
chances of being confused.

The bugs were introduced in

which changed old code that was misusing the term length for referring
to the size (or number of elements, pedantically), for code that used
the actual size.  The author of the change didn't give much meaning to
the difference between size and length, and thought they were
interchangeable, and so the bugs were introduced.

As long as one has a clear distinction, that wouldn't have happened.

>  As such, a name without "length" 
> wouldn't help, because if you say countof, there would still be the same 
> confusion about whether the bytes you are counting are meant to include a 
> null terminator or not.

There are 3 terms:

-  size:Size in bytes of an object (possibly an array).
-  length:  Number of non-null characters in a string.
-  n: Number of elements of an array.

When the array is of char, since sizeof(char)==1, size and n are
interchangeable, and both obviously include the NUL terminator.

If you prefer nelementsof() over countof(), I'm all-in for it.  Just ask
for it, and I'll send a patch using nelementsof().  countof() is a new
term, so it doesn't yet have a meaning (except as given by the
attribute), but it naturally fits more with number of elements.  But
from all of the terms that there are, length is the only one that
doesn't include the NUL, so count is fine.  As long as you don't use
length, it should include the NUL.

> 
> You could maybe avoid some cases of such off-by-one errors by language 
> features that tie an array length more closely to a pointer (such as 
> .IDENTIFIER proposals where IDENTIFIER is required to be const size_t, in 
> cases where a pointer-to-VLA is passed, if there were appropriate 
> constraints to require a matching pair of const size_t object and pointer 
> to [.IDENTIFIER] VLA to be passed from caller to callee - more general 
> versions with such strict requirements about passing matching pairs would 
> be less likely to ensure correct sizes everywhere, and this idea about 
> ensuring matching pairs isn't in N3188, it's an idea combining things from 
> multiple papers).

Yeah, Martin and I have the intention of moving in that direction.

countof() [or whatever the name is] will hopefully soon work on array
parameters.

>  But I think naming is essentially orthogonal to any 
> kind of language feature that might enable reliable bounded pointers.

I still think conceptual confusions like this one (two) start with API
design and documentation, which itself starts in API naming.


Have a lovely night!
Alex

-- 



signature.asc
Description: PGP signature


[PATCH 3/4] libcpp: avoid extra spaces in module preprocessing

2024-10-09 Thread Jason Merrill
Tested x86_64-pc-linux-gnu, will apply to trunk with the rest of this patch
series.

-- 8< --

Within the compiler, module keywords "import", "module", and "export" that
are recognized as part of module directives gain an extra trailing space to
distinguish them from other non-keyword uses of those words in the code.
But when dumping preprocessed output, printing those spaces creates a
gratuitous inconsistency with non-modules preprocessing, as revealed by
several of the g++.dg/modules/cpp* tests if modules are enabled by default
in C++20 mode.

libcpp/ChangeLog:

* lex.cc (cpp_output_token): Omit terminal space from name.

gcc/testsuite/ChangeLog:

* g++.dg/modules/cpp-2_c.C: Expect only one space after import.
* g++.dg/modules/cpp-5_c.C
* g++.dg/modules/dep-2.C
* g++.dg/modules/dir-only-2_b.C
* g++.dg/modules/pr99050_b.C
* g++.dg/modules/inc-xlate-1_b.H
* g++.dg/modules/legacy-3_b.H
* g++.dg/modules/legacy-3_c.H: Likewise.
---
 gcc/testsuite/g++.dg/modules/cpp-2_c.C   |  2 +-
 gcc/testsuite/g++.dg/modules/cpp-5_c.C   |  2 +-
 gcc/testsuite/g++.dg/modules/dep-2.C |  2 +-
 gcc/testsuite/g++.dg/modules/dir-only-2_b.C  | 10 +-
 gcc/testsuite/g++.dg/modules/pr99050_b.C |  2 +-
 libcpp/lex.cc|  7 +--
 gcc/testsuite/g++.dg/modules/inc-xlate-1_b.H |  2 +-
 gcc/testsuite/g++.dg/modules/legacy-3_b.H|  2 +-
 gcc/testsuite/g++.dg/modules/legacy-3_c.H|  2 +-
 9 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/gcc/testsuite/g++.dg/modules/cpp-2_c.C 
b/gcc/testsuite/g++.dg/modules/cpp-2_c.C
index c6e02b7800a..2a79031eb34 100644
--- a/gcc/testsuite/g++.dg/modules/cpp-2_c.C
+++ b/gcc/testsuite/g++.dg/modules/cpp-2_c.C
@@ -11,7 +11,7 @@ import nope;
 #endif
 think
 
-// { dg-final { scan-file cpp-2_c.i {cpp-2_c.C"\n\n\n\nmodule  bob;\n#pragma 
GCC unused\nimport  "[^\n]*\./cpp-2_b.H" \[\[ CLOSE ]];\nimport  
"[^\n]*cpp-2_a.H" \[\[ ]];\n} } }
+// { dg-final { scan-file cpp-2_c.i {cpp-2_c.C"\n\n\n\nmodule bob;\n#pragma 
GCC unused\nimport "[^\n]*\./cpp-2_b.H" \[\[ CLOSE ]];\nimport 
"[^\n]*cpp-2_a.H" \[\[ ]];\n} } }
 // { dg-final { scan-file cpp-2_c.i "int i;" } }
 // { dg-final { scan-file-not cpp-2_c.i "import *nope;" } }
 // { dg-final { scan-file cpp-2_c.i "THIS IS STDIO\n" } }
diff --git a/gcc/testsuite/g++.dg/modules/cpp-5_c.C 
b/gcc/testsuite/g++.dg/modules/cpp-5_c.C
index e0a78a516ae..7f75a228cda 100644
--- a/gcc/testsuite/g++.dg/modules/cpp-5_c.C
+++ b/gcc/testsuite/g++.dg/modules/cpp-5_c.C
@@ -7,4 +7,4 @@ import "cpp-5_a.H";
 
 Q
 
-// { dg-final { scan-file cpp-5_c.i {\nimport  "[^\n]*cpp-5_a.H";\n\n0\n} } }
+// { dg-final { scan-file cpp-5_c.i {\nimport "[^\n]*cpp-5_a.H";\n\n0\n} } }
diff --git a/gcc/testsuite/g++.dg/modules/dep-2.C 
b/gcc/testsuite/g++.dg/modules/dep-2.C
index 2dccab3554d..3c869755785 100644
--- a/gcc/testsuite/g++.dg/modules/dep-2.C
+++ b/gcc/testsuite/g++.dg/modules/dep-2.C
@@ -9,4 +9,4 @@ module m:part;
 // { dg-final { scan-file dep-2.d {\ngcm.cache/m:part\.gcm:| dep-2\.o} } }
 // { dg-final { scan-file dep-2.d {\n\.PHONY: m:part\.c\+\+-module} } }
 
-// { dg-final { scan-file dep-2.i {\nmodule  m:part;\n} } }
+// { dg-final { scan-file dep-2.i {\nmodule m:part;\n} } }
diff --git a/gcc/testsuite/g++.dg/modules/dir-only-2_b.C 
b/gcc/testsuite/g++.dg/modules/dir-only-2_b.C
index 1009ae5b3a2..b1ef9b95e58 100644
--- a/gcc/testsuite/g++.dg/modules/dir-only-2_b.C
+++ b/gcc/testsuite/g++.dg/modules/dir-only-2_b.C
@@ -21,8 +21,8 @@ export module bob;
 
 export import q;
 
-// { dg-final { scan-file dir-only-2_b.i {// a comment\nmodule ;\nfrob} } }
-// { dg-final { scan-file dir-only-2_b.i {frob\nexport\nimport  foo;\nimport 
7;} } }
-// { dg-final { scan-file dir-only-2_b.i {import  
"[^\n]*/dir-only-2_a.H";\nimport  "[^\n]*/dir-only-2_a.H";\nX} } }
-// { dg-final { scan-file dir-only-2_b.i {export  module  bob;\n\nexport  
import  q;} } }
-// { dg-final { scan-file dir-only-2_b.i {import  sing;\n\n\n// comment} } }
+// { dg-final { scan-file dir-only-2_b.i {// a comment\nmodule;\nfrob} } }
+// { dg-final { scan-file dir-only-2_b.i {frob\nexport\nimport foo;\nimport 
7;} } }
+// { dg-final { scan-file dir-only-2_b.i {import 
"[^\n]*/dir-only-2_a.H";\nimport "[^\n]*/dir-only-2_a.H";\nX} } }
+// { dg-final { scan-file dir-only-2_b.i {export module bob;\n\nexport import 
q;} } }
+// { dg-final { scan-file dir-only-2_b.i {import sing;\n\n\n// comment} } }
diff --git a/gcc/testsuite/g++.dg/modules/pr99050_b.C 
b/gcc/testsuite/g++.dg/modules/pr99050_b.C
index 439e216eb16..f2c95631321 100644
--- a/gcc/testsuite/g++.dg/modules/pr99050_b.C
+++ b/gcc/testsuite/g++.dg/modules/pr99050_b.C
@@ -4,4 +4,4 @@
 
 int main () {}
 
-// { dg-final { scan-file pr99050_b.i {import  "[^\n]*99050_a.H" 
\[\[__translated\]\];\n} }  }
+// { dg-final { scan-file pr99050_b.i {import "[^\n]*99050_a.H" 
\[\[__translated\]\];\n} }  }
diff --git a/libcpp/lex.cc b/libcpp/lex.c

Re: [PATCH v3] RISC-V: Optimize branches with shifted immediate operands

2024-10-09 Thread Jeff Law




On 10/9/24 2:59 AM, Jovan Vukic wrote:

After the valuable feedback I received, it’s clear to me that the
oversight was in the tests showing the benefits of the patch. In the
test file, I added functions f5 and f6, which now generate more
efficient code with fewer instructions.

Before the patch:

f5:
 li  a4,2097152
 addia4,a4,-2048
 li  a5,1167360
 and a0,a0,a4
 addia5,a5,-2048
 beq a0,a5,.L4

f6:
 li  a5,3407872
 addia5,a5,-2048
 and a0,a0,a5
 li  a5,1114112
 beq a0,a5,.L7

After the patch:

f5:
 srlia5,a0,11
 andia5,a5,1023
 li  a4,569
 beq a5,a4,.L5

f6:
 srlia5,a0,11
 andia5,a5,1663
 li  a4,544
 beq a5,a4,.L9

2024-10-09  Jovan Vukic  

 PR target/115921

gcc/ChangeLog:

 * config/riscv/iterators.md (any_eq): New code iterator.
 * config/riscv/riscv.h (COMMON_TRAILING_ZEROS): New macro.
 (SMALL_AFTER_COMMON_TRAILING_SHIFT): Ditto.
 * config/riscv/riscv.md 
(*branch_shiftedarith__shifted):
 New pattern.

gcc/testsuite/ChangeLog:

 * gcc.target/riscv/branch-1.c: Additional tests.

THanks!  I've pushed this to the trunk.

Jeff



[PATCH RFC 1/4] c-family: add -fsearch-include-path

2024-10-09 Thread Jason Merrill
Tested x86_64-pc-linux-gnu.  Any thoughts?

-- 8< --

The C++ modules code has a -fmodule-header option to specify looking up
headers to compile to header units on the usual include paths.  I'd like to
have the same functionality for full C++20 modules such as module std, which
could also live on the include path.  But this behavior doesn't seem
necessarily connected to modules, so I'm proposing a general C/C++ option to
specify the behavior of looking in the include path for the input files
specified on the command line.

Other ideas for the name of the option are very welcome.

gcc/ChangeLog:

* doc/cppopts.texi: Document -fsearch-include-path.

gcc/c-family/ChangeLog:

* c.opt: Add -fsearch-include-path.
* c-opts.cc (c_common_post_options): Handle it.

gcc/cp/ChangeLog:

* module.cc (module_preprocess_options): Don't override it.
---
 gcc/doc/cppopts.texi   | 11 +++
 gcc/doc/invoke.texi|  5 +
 gcc/c-family/c.opt |  7 +++
 gcc/c-family/c-opts.cc | 13 +
 gcc/cp/module.cc   |  3 ++-
 5 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/gcc/doc/cppopts.texi b/gcc/doc/cppopts.texi
index 5b5b0848ae8..e3686b63337 100644
--- a/gcc/doc/cppopts.texi
+++ b/gcc/doc/cppopts.texi
@@ -270,6 +270,17 @@ When preprocessing, do not shorten system header paths 
with canonicalization.
 @item -fmax-include-depth=@var{depth}
 Set the maximum depth of the nested #include. The default is 200. 
 
+@opindex fsearch-include-path
+@item -fsearch-include-path
+Look for input files on the #include path, not just the current
+directory.  This is particularly useful with C++20 modules, for which
+both header units and module interface units need to be compiled
+directly:
+
+@smallexample
+g++ -c -std=c++20 -fmodules-ts -fsearch-include-path bits/stdc++.h std.cppm
+@end smallexample
+
 @opindex ftabstop
 @item -ftabstop=@var{width}
 Set the distance between tab stops.  This helps the preprocessor report
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c0c8bf1c29a..c69d032323e 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -38042,6 +38042,11 @@ installed.  Specifying the language as one of these 
variants also
 inhibits output of the object file, as header files have no associated
 object file.
 
+Alternately, or for a module interface unit in an installed location,
+you can use @option{-fsearch-include-path} to specify that the main
+source file should be found on the include path rather than the
+current directory.
+
 Header units can be used in much the same way as precompiled headers
 (@pxref{Precompiled Headers}), but with fewer restrictions: an
 #include that is translated to a header unit import can appear at any
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 1f2e72a0bb7..0aada1c3080 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -2237,6 +2237,13 @@ frtti
 C++ ObjC++ Optimization Var(flag_rtti) Init(1)
 Generate run time type descriptor information.
 
+fsearch-include-path
+C ObjC C++ ObjC++
+Look for the main source file on the include path.
+
+fsearch-include-path=
+C++ ObjC++ Joined RejectNegative Undocumented
+
 fshort-enums
 C ObjC C++ ObjC++ LTO Optimization Var(flag_short_enums)
 Use the narrowest integer type possible for enumeration types.
diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index 510e0870140..2798b4d295f 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -769,6 +769,19 @@ c_common_handle_option (size_t scode, const char *arg, 
HOST_WIDE_INT value,
   cpp_opts->traditional = 1;
   break;
 
+case OPT_fsearch_include_path:
+  cpp_opts->main_search = CMS_user;
+  break;
+
+case OPT_fsearch_include_path_:
+  if (!strcmp (arg, "user"))
+   cpp_opts->main_search = CMS_user;
+  else if (!strcmp (arg, "system"))
+   cpp_opts->main_search = CMS_system;
+  else
+   error ("invalid argument %qs to %<-fsearch-include-path%>", arg);
+  break;
+
 case OPT_v:
   verbose = true;
   break;
diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 2dc59ce8a12..e58c057a96e 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -21069,7 +21069,8 @@ module_preprocess_options (cpp_reader *reader)
}
   auto *opt = cpp_get_options (reader);
   opt->module_directives = true;
-  opt->main_search = cpp_main_search (flag_header_unit);
+  if (opt->main_search == CMS_none)
+   opt->main_search = cpp_main_search (flag_header_unit);
 }
 }
 

base-commit: dcee0b6547211a428b75adb03a461285fed0f20d
-- 
2.46.2



[PATCH RFC 2/4] c++: add internal --header flag

2024-10-09 Thread Jason Merrill
Tested x86_64-pc-linux-gnu.  Any other ideas for how to communicate headerness
to the front-end?

-- 8< --

C++20 modules support has depended on the driver seeing the -fmodules-ts
flag in order to implicitly add other flags.  This won't work when we start
enabling modules support by default; it's better to leave it up to the
front-end to decide what behaviors are implied by modules.

But for header units to be generated properly, we still need some way for
the driver to communicate to the front-end that we're dealing with a header.
We could almost exploit the --output-pch flag for this, but that doesn't
help with preprocessed output, including during -save-temps.

So this patch adds an internal --header flag for communication between the
driver and the front-end.  Users are not expected to pass this flag
themselves.

The patch then removes all -fmodules-ts checks from C++ lang-spec.h
in favor of a few lines in c_common_post_options.  It also replaces
-fmodule-header= with -fsearch-include-path= and adds --header as
appropriate.

Instead of implying -fdirectives-only when preprocessing a header unit, this
patch implies only -dD, which seems like enough to transmit the macro
definitions to the eventual header unit CMI.

Before this change, legacy-3_b.H got neither -fdirectives-only nor -dD
because lang-specs was only checking for -fmodules-ts, not -fmodule-header.
After this change it has -dD active, so I needed to add some blank lines as
suggested by the comment, and adjust the expected line numbers accordingly.

gcc/ChangeLog:

* doc/invoke.texi: Update module header preprocessing guidance.

gcc/c-family/ChangeLog:

* c.opt: Add --header.
* c-opts.cc (c_common_post_options): Use it to enable
modules header unit mode.

gcc/cp/ChangeLog:

* lang-specs.h: Use --header and -fsearch-include-path
instead of -fmodule-header and checking -fmodules-ts.
* module.cc (init_modules): Update PCH comment.
(handle_module_option): -fmodule-header implies --header.

gcc/testsuite/ChangeLog:

* g++.dg/modules/legacy-3_b.H: Adjust for -dD.
---
 gcc/doc/invoke.texi   | 15 
 gcc/c-family/c.opt|  3 ++
 gcc/cp/lang-specs.h   | 42 ---
 gcc/c-family/c-opts.cc| 12 +++
 gcc/cp/module.cc  |  4 +--
 gcc/testsuite/g++.dg/modules/legacy-3_b.H |  6 +++-
 6 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c69d032323e..b27e85bc38a 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -38235,14 +38235,17 @@ does not require macro expansion, so it is not 
necessary to use
 @option{-MD}.  See also @option{-fdeps-*} for an alternate format for
 module dependency information.
 
-The @option{-save-temps} option uses @option{-fdirectives-only} for
-preprocessing, and preserve the macro definitions in the preprocessed
-output.  Usually you also want to use this option when explicitly
-preprocessing a header-unit, or consuming such preprocessed output:
+Preprocessing a header with modules enabled implicitly enables
+@option{-dD} so that if the preprocessed output is then compiled to
+produce a header unit, the macro definitions are included.  In that
+second compilation you also need to tell the compiler that it's a
+preprocessed header; the simplest way to do that is compiling the
+@samp{.ii} file (which implies @option{-x c++-cpp-output}) with
+@option{-fmodule-header}.
 
 @smallexample
-g++ -fmodules-ts -E -fdirectives-only my-header.hh -o my-header.ii
-g++ -x c++-header -fmodules-ts -fpreprocessed -fdirectives-only my-header.ii
+g++ -fmodules-ts -E my-header.hh -o my-header.ii
+g++ -c -fmodule-header my-header.ii
 @end smallexample
 
 @node C++ Compiled Module Interface
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 0aada1c3080..6056dbde4b4 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -76,6 +76,9 @@ C ObjC C++ ObjC++ Separate Alias(-embed-dir=) 
MissingArgError(missing path after
 -embed-directory=
 C ObjC C++ ObjC++ Joined Alias(-embed-dir=) MissingArgError(missing path after 
%qs)
 
+-header
+C ObjC C++ ObjC++ Undocumented Var(flag_header)
+
 -imacros
 C ObjC C++ ObjC++ Separate Alias(imacros) MissingArgError(missing filename 
after %qs)
 
diff --git a/gcc/cp/lang-specs.h b/gcc/cp/lang-specs.h
index e5651567a2d..b6ed85c399c 100644
--- a/gcc/cp/lang-specs.h
+++ b/gcc/cp/lang-specs.h
@@ -46,63 +46,51 @@ along with GCC; see the file COPYING3.  If not see
   {".c++m", "@c++", 0, 0, 0},
   {".ccm", "@c++", 0, 0, 0},
   {"@c++-header",
-  "%{E|M|MM:cc1plus -E %{fmodules-ts:-fdirectives-only -fmodule-header}"
+  "%{E|M|MM:cc1plus -E --header"
   "  %(cpp_options) %2 %(cpp_debug_options)}"
   "%{!E:%{!M:%{!MM:"
-  "  %{save-temps*|no-integrated-cpp:cc1plus -E"
-  "%{fmodules-ts:-fdirectives-only -fmodule-header}"
+ 

[PATCH 4/4] c++: enable modules by default in c++20

2024-10-09 Thread Jason Merrill
Tested x86_64-pc-linux-gnu, will apply to trunk with the rest of the patch
series.

-- 8< --

At this point there doesn't seem to be much reason not to have modules
support enabled by default in C++20, and it's good get more test coverage to
find corner case bugs like some I fixed recently.

It also seems to make more sense to use the spelling -fmodules rather than
-fmodules-ts, much like we moved from -fconcepts-ts to -fconcepts.  The old
spelling is still accepted for backward compatibility.

gcc/ChangeLog:

* doc/invoke.texi: Modules are now enabled by default in C++20.
Change advertised flag from -fmodules-ts to -fmodules.

gcc/c-family/ChangeLog:

* c.opt: Change advertised flag from -fmodules-ts to -fmodules.
* c-opts.cc (c_common_post_options): Enable modules by default
in C++20.

gcc/testsuite/ChangeLog:

* g++.dg/template/error25.C: Adjust 'export' diagnostic.
* g++.old-deja/g++.benjamin/tem05.C: Likewise.
* g++.old-deja/g++.pt/export1.C: Likewise.
* g++.dg/pch/pch.exp: Specify -fno-modules.
---
 gcc/doc/invoke.texi   | 32 +--
 gcc/c-family/c.opt| 10 +++---
 gcc/c-family/c-opts.cc|  6 +++-
 gcc/testsuite/g++.dg/template/error25.C   |  2 +-
 .../g++.old-deja/g++.benjamin/tem05.C |  8 ++---
 gcc/testsuite/g++.old-deja/g++.pt/export1.C   |  2 +-
 gcc/testsuite/g++.dg/pch/pch.exp  |  2 +-
 7 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b27e85bc38a..29fd99fff92 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -223,7 +223,7 @@ in the following sections.
 -fno-implicit-templates
 -fno-implicit-inline-templates
 -fno-implement-inlines
--fmodule-header@r{[}=@var{kind}@r{]} -fmodule-only -fmodules-ts
+-fmodule-header@r{[}=@var{kind}@r{]} -fmodule-only -fmodules
 -fmodule-implicit-inline
 -fno-module-lazy
 -fmodule-mapper=@var{specification}
@@ -3501,14 +3501,12 @@ To save space, do not emit out-of-line copies of inline 
functions
 controlled by @code{#pragma implementation}.  This causes linker
 errors if these functions are not inlined everywhere they are called.
 
-@opindex fmodules-ts
-@opindex fno-modules-ts
-@item -fmodules-ts
-@itemx -fno-modules-ts
-Enable support for C++20 modules (@pxref{C++ Modules}).  The
-@option{-fno-modules-ts} is usually not needed, as that is the
-default.  Even though this is a C++20 feature, it is not currently
-implicitly enabled by selecting that standard version.
+@opindex fmodules
+@opindex fno-modules
+@item -fmodules
+@itemx -fno-modules
+Enable support for C++20 modules (@pxref{C++ Modules}).  This flag is
+enabled by default for C++20 and above.
 
 @opindex fmodule-header
 @item -fmodule-header
@@ -38004,9 +38002,9 @@ affected by how you partition header files into header 
units.
 
 @end table
 
-Modular compilation is @emph{not} enabled with just the
-@option{-std=c++20} option.  You must explicitly enable it with the
-@option{-fmodules-ts} option.  It is independent of the language
+Modular compilation is enabled with the @option{-std=c++20} option.
+You can also enable or disable it explicitly with the
+@option{-fmodules} option.  It is independent of the language
 version selected, although in pre-C++20 versions, it is of course an
 extension.
 
@@ -38024,7 +38022,7 @@ Acyclic Graph (DAG).  You must build imports before the 
importer.
 Header files may themselves be compiled to header units, which are a
 transitional ability aiming at faster compilation.  The
 @option{-fmodule-header} option is used to enable this, and implies
-the @option{-fmodules-ts} option.  These CMIs are named by the fully
+the @option{-fmodules} option.  These CMIs are named by the fully
 resolved underlying header file, and thus may be a complete pathname
 containing subdirectories.  If the header file is found at an absolute
 pathname, the CMI location is still relative to a CMI root directory.
@@ -38033,7 +38031,7 @@ As header files often have no suffix, you commonly have 
to specify a
 @option{-x} option to tell the compiler the source is a header file.
 You may use @option{-x c++-header}, @option{-x c++-user-header} or
 @option{-x c++-system-header}.  When used in conjunction with
-@option{-fmodules-ts}, these all imply an appropriate
+@option{-fmodules}, these all imply an appropriate
 @option{-fmodule-header} option.  The latter two variants use the
 user or system include path to search for the file specified.  This
 allows you to, for instance, compile standard library header files as
@@ -38056,8 +38054,8 @@ the bits/stdc++.h header used for libstdc++ precompiled 
headers you
 can
 
 @smallexample
-g++ -fmodules-ts -x c++-system-header -c bits/stdc++.h
-g++ -fmodules-ts -include bits/stdc++.h mycode.C
+g++ -fmodules -x c++-system-header -c bits/stdc++.h
+g++ -fmodules -include bits/stdc++.h mycode.C
 @end smallexampl

Re: [PATCH] libgcc: Use inline variable instead of function-local static

2024-10-09 Thread Jason Merrill

On 10/8/24 8:58 AM, Michael Matz wrote:

Hello,

On Tue, 8 Oct 2024, Jonathan Wakely wrote:


We originally had global static variables, which means a different
variable per TU. That causes ODR violations which were silently
ignored until we try to use them in modules, where they're diagnosed.
So we need to replace them.


Aren't these variables implementation detail?  If so violating language
rules like ODR should be okay.  (Imagine libgcc would be written in some
language where such rules doesn't exist).

So isn't the actual problem that needs solving rather that c++ modules
trip over these here?


That's an interesting point; we need to handle internal variables in 
header units, I'm not sure how much trouble it would be to provide a way 
to suppress the exposure diagnostic for particular statics in a named 
module.


Jason



Re: [PATCH 4/4] c++: enable modules by default in c++20

2024-10-09 Thread Patrick Palka
On Wed, 9 Oct 2024, Jason Merrill wrote:

> Tested x86_64-pc-linux-gnu, will apply to trunk with the rest of the patch
> series.
> 
> -- 8< --
> 
> At this point there doesn't seem to be much reason not to have modules
> support enabled by default in C++20, and it's good get more test coverage to
> find corner case bugs like some I fixed recently.

Not sure how much we care about PCH anymore, but won't this effectively
disable PCH in C++20 and later due to

  /* C++ modules and PCH don't play together.  */
  if (flag_modules)
return 2;

in c_common_valid_pch?

> 
> It also seems to make more sense to use the spelling -fmodules rather than
> -fmodules-ts, much like we moved from -fconcepts-ts to -fconcepts.  The old
> spelling is still accepted for backward compatibility.
> 
> gcc/ChangeLog:
> 
>   * doc/invoke.texi: Modules are now enabled by default in C++20.
>   Change advertised flag from -fmodules-ts to -fmodules.
> 
> gcc/c-family/ChangeLog:
> 
>   * c.opt: Change advertised flag from -fmodules-ts to -fmodules.
>   * c-opts.cc (c_common_post_options): Enable modules by default
>   in C++20.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/template/error25.C: Adjust 'export' diagnostic.
>   * g++.old-deja/g++.benjamin/tem05.C: Likewise.
>   * g++.old-deja/g++.pt/export1.C: Likewise.
>   * g++.dg/pch/pch.exp: Specify -fno-modules.
> ---
>  gcc/doc/invoke.texi   | 32 +--
>  gcc/c-family/c.opt| 10 +++---
>  gcc/c-family/c-opts.cc|  6 +++-
>  gcc/testsuite/g++.dg/template/error25.C   |  2 +-
>  .../g++.old-deja/g++.benjamin/tem05.C |  8 ++---
>  gcc/testsuite/g++.old-deja/g++.pt/export1.C   |  2 +-
>  gcc/testsuite/g++.dg/pch/pch.exp  |  2 +-
>  7 files changed, 32 insertions(+), 30 deletions(-)
> 
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index b27e85bc38a..29fd99fff92 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -223,7 +223,7 @@ in the following sections.
>  -fno-implicit-templates
>  -fno-implicit-inline-templates
>  -fno-implement-inlines
> --fmodule-header@r{[}=@var{kind}@r{]} -fmodule-only -fmodules-ts
> +-fmodule-header@r{[}=@var{kind}@r{]} -fmodule-only -fmodules
>  -fmodule-implicit-inline
>  -fno-module-lazy
>  -fmodule-mapper=@var{specification}
> @@ -3501,14 +3501,12 @@ To save space, do not emit out-of-line copies of 
> inline functions
>  controlled by @code{#pragma implementation}.  This causes linker
>  errors if these functions are not inlined everywhere they are called.
>  
> -@opindex fmodules-ts
> -@opindex fno-modules-ts
> -@item -fmodules-ts
> -@itemx -fno-modules-ts
> -Enable support for C++20 modules (@pxref{C++ Modules}).  The
> -@option{-fno-modules-ts} is usually not needed, as that is the
> -default.  Even though this is a C++20 feature, it is not currently
> -implicitly enabled by selecting that standard version.
> +@opindex fmodules
> +@opindex fno-modules
> +@item -fmodules
> +@itemx -fno-modules
> +Enable support for C++20 modules (@pxref{C++ Modules}).  This flag is
> +enabled by default for C++20 and above.
>  
>  @opindex fmodule-header
>  @item -fmodule-header
> @@ -38004,9 +38002,9 @@ affected by how you partition header files into 
> header units.
>  
>  @end table
>  
> -Modular compilation is @emph{not} enabled with just the
> -@option{-std=c++20} option.  You must explicitly enable it with the
> -@option{-fmodules-ts} option.  It is independent of the language
> +Modular compilation is enabled with the @option{-std=c++20} option.
> +You can also enable or disable it explicitly with the
> +@option{-fmodules} option.  It is independent of the language
>  version selected, although in pre-C++20 versions, it is of course an
>  extension.
>  
> @@ -38024,7 +38022,7 @@ Acyclic Graph (DAG).  You must build imports before 
> the importer.
>  Header files may themselves be compiled to header units, which are a
>  transitional ability aiming at faster compilation.  The
>  @option{-fmodule-header} option is used to enable this, and implies
> -the @option{-fmodules-ts} option.  These CMIs are named by the fully
> +the @option{-fmodules} option.  These CMIs are named by the fully
>  resolved underlying header file, and thus may be a complete pathname
>  containing subdirectories.  If the header file is found at an absolute
>  pathname, the CMI location is still relative to a CMI root directory.
> @@ -38033,7 +38031,7 @@ As header files often have no suffix, you commonly 
> have to specify a
>  @option{-x} option to tell the compiler the source is a header file.
>  You may use @option{-x c++-header}, @option{-x c++-user-header} or
>  @option{-x c++-system-header}.  When used in conjunction with
> -@option{-fmodules-ts}, these all imply an appropriate
> +@option{-fmodules}, these all imply an appropriate
>  @option{-fmodule-header} option.  The latter two variants use the
>  user or system i

RE: [PATCH v5] gcc, libcpp: Add warning switch for "#pragma once in main file" [PR89808]

2024-10-09 Thread Jiang, Haochen
> From: Andreas Schwab 
> Sent: Wednesday, October 9, 2024 2:04 PM
> 
> ../../libcpp/directives.cc: In function 'void do_pragma_once(cpp_reader*)':
> ../../libcpp/directives.cc:2078:20: error: unknown conversion type character
> '<' in format [-Werror=format=]
>  2078 |  "%<#pragma once%> in main file");
>   |^
> ../../libcpp/directives.cc:2078:34: error: unknown conversion type character
> '>' in format [-Werror=format=]
>  2078 |  "%<#pragma once%> in main file");
>   |  ^
> cc1plus: all warnings being treated as errors
> make[3]: *** [Makefile:227: directives.o] Error 1
> 

Same bootstrap fail for me and my script on x86_64:

https://gcc.gnu.org/pipermail/gcc-regression/2024-October/080957.html

Thx,
Haochen

> --
> Andreas Schwab, SUSE Labs, sch...@suse.de GPG Key fingerprint = 0196
> BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7 "And now for something
> completely different."


[PATCH v5 1/2] aarch64: Add SVE2 faminmax intrinsics

2024-10-09 Thread saurabh.jha

The AArch64 FEAT_FAMINMAX extension introduces instructions for
computing the floating point absolute maximum and minimum of the
two vectors element-wise.

This patch introduces SVE2 faminmax intrinsics. The intrinsics of this
extension are implemented as the following builtin functions:
* sva[max|min]_[m|x|z]
* sva[max|min]_[f16|f32|f64]_[m|x|z]
* sva[max|min]_n_[f16|f32|f64]_[m|x|z]

gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins-base.cc
(svamax): Absolute maximum declaration.
(svamin): Absolute minimum declaration.
* config/aarch64/aarch64-sve-builtins-base.def
(REQUIRED_EXTENSIONS): Add faminmax intrinsics behind a flag.
(svamax): Absolute maximum declaration.
(svamin): Absolute minimum declaration.
* config/aarch64/aarch64-sve-builtins-base.h: Declaring function
bases for the new intrinsics.
* config/aarch64/aarch64.h
(TARGET_SVE_FAMINMAX): New flag for SVE2 faminmax.
* config/aarch64/iterators.md: New unspecs, iterators, and attrs
for the new intrinsics.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve2/acle/asm/amax_f16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/amax_f32.c: New test.
* gcc.target/aarch64/sve2/acle/asm/amax_f64.c: New test.
* gcc.target/aarch64/sve2/acle/asm/amin_f16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/amin_f32.c: New test.
* gcc.target/aarch64/sve2/acle/asm/amin_f64.c: New test.
---
 .../aarch64/aarch64-sve-builtins-base.cc  |   4 +
 .../aarch64/aarch64-sve-builtins-base.def |   5 +
 .../aarch64/aarch64-sve-builtins-base.h   |   2 +
 gcc/config/aarch64/aarch64.h  |   1 +
 gcc/config/aarch64/iterators.md   |  18 +-
 .../aarch64/sve2/acle/asm/amax_f16.c  | 437 ++
 .../aarch64/sve2/acle/asm/amax_f32.c  | 437 ++
 .../aarch64/sve2/acle/asm/amax_f64.c  | 437 ++
 .../aarch64/sve2/acle/asm/amin_f16.c  | 437 ++
 .../aarch64/sve2/acle/asm/amin_f32.c  | 437 ++
 .../aarch64/sve2/acle/asm/amin_f64.c  | 437 ++
 11 files changed, 2651 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 4b33585d981..b189818d643 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -3071,6 +3071,10 @@ FUNCTION (svadrb, svadr_bhwd_impl, (0))
 FUNCTION (svadrd, svadr_bhwd_impl, (3))
 FUNCTION (svadrh, svadr_bhwd_impl, (1))
 FUNCTION (svadrw, svadr_bhwd_impl, (2))
+FUNCTION (svamax, cond_or_uncond_unspec_function,
+	  (UNSPEC_COND_FAMAX, UNSPEC_FAMAX))
+FUNCTION (svamin, cond_or_uncond_unspec_function,
+	  (UNSPEC_COND_FAMIN, UNSPEC_FAMIN))
 FUNCTION (svand, rtx_code_function, (AND, AND))
 FUNCTION (svandv, reduction, (UNSPEC_ANDV))
 FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def
index 65fcba91586..95e04e4393d 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
@@ -379,3 +379,8 @@ DEF_SVE_FUNCTION (svzip2q, binary, all_data, none)
 DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit)
 DEF_SVE_FUNCTION (svmmla, mmla, d_float, none)
 #undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_FAMINMAX
+DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz)
+DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h
index 5bbf3569c4b..978cf7013f9 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h
@@ -37,6 +37,8 @@ namespace aarch64_sve
 extern const function_base *const svadrd;
 extern const function_base *const svadrh;
 extern const function_base *const svadrw;
+extern const function_base *const svamax;
+extern const function_base *const svamin;
 extern const function_base *const svand;
 extern const function_base *const svandv;
 extern const function_base *const svasr;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 030cffb1760..593319fd472 100644
--- a/gcc/c

[PATCH v5 0/2] Add support for SVE2 faminmax

2024-10-09 Thread saurabh.jha
From: Saurabh Jha 

This patch series is a revised version of:
https://gcc.gnu.org/pipermail/gcc-patches/2024-October/664391.html

Previous review comments are in this thread:
https://gcc.gnu.org/pipermail/gcc-patches/2024-October/664329.html

The second patch of this is okay to merge. The changes are in the first
patch which are as follows:
1. Fixing sve_pred_fp_rhs2_operand for the new unspecs as the new
   operators don't have an immediate form.
2. Adding new intrinsic test cases to make sure we handle immediate
   arguments correctly. Also removed the use of fmov instructions.

Regression tested on aarch64-unknown-linux-gnu and found no regressions.

Ok for master?

Thanks,
Saurabh 

Saurabh Jha (2):
  aarch64: Add SVE2 faminmax intrinsics
  aarch64: Add codegen support for SVE2 faminmax

 .../aarch64/aarch64-sve-builtins-base.cc  |   4 +
 .../aarch64/aarch64-sve-builtins-base.def |   5 +
 .../aarch64/aarch64-sve-builtins-base.h   |   2 +
 gcc/config/aarch64/aarch64-sve2.md|  37 ++
 gcc/config/aarch64/aarch64.h  |   1 +
 gcc/config/aarch64/iterators.md   |  24 +-
 .../gcc.target/aarch64/sve/faminmax_1.c   |  44 ++
 .../gcc.target/aarch64/sve/faminmax_2.c   |  60 +++
 .../aarch64/sve2/acle/asm/amax_f16.c  | 437 ++
 .../aarch64/sve2/acle/asm/amax_f32.c  | 437 ++
 .../aarch64/sve2/acle/asm/amax_f64.c  | 437 ++
 .../aarch64/sve2/acle/asm/amin_f16.c  | 437 ++
 .../aarch64/sve2/acle/asm/amin_f32.c  | 437 ++
 .../aarch64/sve2/acle/asm/amin_f64.c  | 437 ++
 14 files changed, 2798 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c

-- 
2.34.1



[PATCH v5 2/2] aarch64: Add codegen support for SVE2 faminmax

2024-10-09 Thread saurabh.jha

The AArch64 FEAT_FAMINMAX extension introduces instructions for
computing the floating point absolute maximum and minimum of the
two vectors element-wise.

This patch adds code generation for famax and famin in terms of existing
unspecs. With this patch:
1. famax can be expressed as taking UNSPEC_COND_SMAX of the two operands
   and then taking absolute value of their result.
2. famin can be expressed as taking UNSPEC_COND_SMIN of the two operands
   and then taking absolute value of their result.

This fusion of operators is only possible when
-march=armv9-a+faminmax+sve flags are passed. We also need to pass
-ffast-math flag; this is what enables compiler to use UNSPEC_COND_SMAX
and UNSPEC_COND_SMIN.

This code generation is only available on -O2 or -O3 as that is when
auto-vectorization is enabled.

gcc/ChangeLog:

* config/aarch64/aarch64-sve2.md
(*aarch64_pred_faminmax_fused): Instruction pattern for faminmax
codegen.
* config/aarch64/iterators.md: Iterator and attribute for
faminmax codegen.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/faminmax_1.c: New test.
* gcc.target/aarch64/sve/faminmax_2.c: New test.
---
 gcc/config/aarch64/aarch64-sve2.md| 37 
 gcc/config/aarch64/iterators.md   |  6 ++
 .../gcc.target/aarch64/sve/faminmax_1.c   | 44 ++
 .../gcc.target/aarch64/sve/faminmax_2.c   | 60 +++
 4 files changed, 147 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c

diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 725092cc95f..5f2697c3179 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -2467,6 +2467,43 @@
   [(set_attr "movprfx" "yes")]
 )
 
+;; -
+;; -- [FP] Absolute maximum and minimum
+;; -
+;; Includes:
+;; - FAMAX
+;; - FAMIN
+;; -
+;; Predicated floating-point absolute maximum and minimum.
+(define_insn_and_rewrite "*aarch64_pred_faminmax_fused"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand: 1 "register_operand")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	   (unspec:SVE_FULL_F
+	 [(match_operand 5)
+	  (const_int SVE_RELAXED_GP)
+	  (match_operand:SVE_FULL_F 2 "register_operand")]
+	 UNSPEC_COND_FABS)
+	   (unspec:SVE_FULL_F
+	 [(match_operand 6)
+	  (const_int SVE_RELAXED_GP)
+	  (match_operand:SVE_FULL_F 3 "register_operand")]
+	 UNSPEC_COND_FABS)]
+	  SVE_COND_SMAXMIN))]
+  "TARGET_SVE_FAMINMAX"
+  {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
+ [ w, Upl , %0 , w ; *  ] \t%0., %1/m, %0., %3.
+ [ ?&w  , Upl , w  , w ; yes] movprfx\t%0, %2\;\t%0., %1/m, %0., %3.
+  }
+  "&& (!rtx_equal_p (operands[1], operands[5])
+   || !rtx_equal_p (operands[1], operands[6]))"
+  {
+operands[5] = copy_rtx (operands[1]);
+operands[6] = copy_rtx (operands[1]);
+  }
+)
+
 ;; =
 ;; == Complex arithmetic
 ;; =
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index cbacf59c451..244a9c1b75d 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -3143,6 +3143,9 @@
 	 UNSPEC_COND_SMAX
 	 UNSPEC_COND_SMIN])
 
+(define_int_iterator SVE_COND_SMAXMIN [UNSPEC_COND_SMAX
+   UNSPEC_COND_SMIN])
+
 (define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA
 	  UNSPEC_COND_FMLS
 	  UNSPEC_COND_FNMLA
@@ -4503,6 +4506,9 @@
 
 (define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN])
 
+(define_int_attr faminmax_cond_uns_op
+  [(UNSPEC_COND_SMAX "famax") (UNSPEC_COND_SMIN "famin")])
+
 (define_int_attr faminmax_uns_op
   [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")])
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
new file mode 100644
index 000..3b65ccea065
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -ffast-math" } */
+
+#include "arm_sve.h"
+
+#pragma GCC target "+sve+faminmax"
+
+#define TEST_FAMAX(TYPE)		\
+  void fn_famax_##TYPE (TYPE * restrict a,\
+			TYPE * restrict b,\
+			TYPE * restrict c,\
+			int n) {	\
+for (int i = 0; i < n; i++) {	\
+  TYPE temp1 = __builtin_fabs (a[i]);\
+  TYPE temp2 = __builtin_fabs (b[i]);\
+  c[i] = __builtin_fmax (temp1, temp2);\
+}	\
+  }	

RE: [PATCH]middle-end: support SLP early break

2024-10-09 Thread Richard Biener
On Tue, 8 Oct 2024, Tamar Christina wrote:

> > -Original Message-
> > From: Richard Biener 
> > Sent: Wednesday, October 2, 2024 1:50 PM
> > To: Tamar Christina 
> > Cc: gcc-patches@gcc.gnu.org; nd ; j...@ventanamicro.com
> > Subject: Re: [PATCH]middle-end: support SLP early break
> > 
> > On Tue, 1 Oct 2024, Tamar Christina wrote:
> > 
> > > Hi all,
> > >
> > > This patch introduces feature parity for early break int the SLP only
> > > vectorizer.
> > >
> > > The approach taken here is to treat the early exits as root statements 
> > > for an
> > > SLP tree.  This means that we don't need any changes to build_slp to 
> > > support
> > > gconds.
> > >
> > > Codegen for the gcond itself now has to be done out of line but the body 
> > > of the
> > > SLP blocks itself is simply driven by SLP scheduling.  There is a slight
> > > awkwardness in having re-used vectorizable_early_exit for both SLP and 
> > > non-SLP
> > > but I've documented the differences and when I did try to refactor it it 
> > > wasn't
> > > really worth it given that this is a temporary state anyway.
> > >
> > > This version is restricted to lane = 1, as such we can re-use the existing
> > > move_early_break function instead of having to do safety update through
> > > scheduling.  I have a branch where I'm working on that but lane > 1 is 
> > > out of
> > > scope for GCC 15 anyway.   The only reason I will try to get moving 
> > > through
> > > scheduling done as a stretch goal is so we get epilogue vectorization 
> > > back for
> > > early break.
> > >
> > > The example:
> > >
> > > unsigned test4(unsigned x)
> > > {
> > >  unsigned ret = 0;
> > >  for (int i = 0; i < N; i++)
> > >  {
> > >vect_b[i] = x + i;
> > >if (vect_a[i]*2 != x)
> > >  break;
> > >vect_a[i] = x;
> > >
> > >  }
> > >  return ret;
> > > }
> > >
> > > builds the following SLP instance for early break:
> > >
> > > note:   Analyzing vectorizable control flow: if (patt_6 != 0)
> > > note:   Starting SLP discovery for
> > > note: patt_6 = _4 != x_9(D);
> > > note:   starting SLP discovery for node 0x63abc80
> > > note:   Build SLP for patt_6 = _4 != x_9(D);
> > > note:   precomputed vectype: vector(4) 
> > > note:   nunits = 4
> > > note:   vect_is_simple_use: operand x_9(D), type of def: external
> > > note:   vect_is_simple_use: operand # RANGE [irange] unsigned int [0, 
> > > 0][2, +INF]
> > MASK 0x
> > > _3 * 2, type of def: internal
> > > note:   starting SLP discovery for node 0x63abdc0
> > > note:   Build SLP for _4 = _3 * 2;
> > > note:   precomputed vectype: vector(4) unsigned int
> > > note:   nunits = 4
> > > note:   vect_is_simple_use: operand #
> > > vect_aD.4416[i_15], type of def: internal
> > > note:   vect_is_simple_use: operand 2, type of def: constant
> > > note:   starting SLP discovery for node 0x63abe60
> > > note:   Build SLP for _3 = vect_a[i_15];
> > > note:   precomputed vectype: vector(4) unsigned int
> > > note:   nunits = 4
> > > note:   SLP discovery for node 0x63abe60 succeeded
> > > note:   SLP discovery for node 0x63abdc0 succeeded
> > > note:   SLP discovery for node 0x63abc80 succeeded
> > > note:   SLP size 3 vs. limit 10.
> > > note:   Final SLP tree for instance 0x6474190:
> > > note:   node 0x63abc80 (max_nunits=4, refcnt=2) vector(4) 
> > > 
> > > note:   op template: patt_6 = _4 != x_9(D);
> > > note: stmt 0 patt_6 = _4 != x_9(D);
> > > note: children 0x63abd20 0x63abdc0
> > > note:   node (external) 0x63abd20 (max_nunits=1, refcnt=1)
> > > note: { x_9(D) }
> > > note:   node 0x63abdc0 (max_nunits=4, refcnt=2) vector(4) unsigned int
> > > note:   op template: _4 = _3 * 2;
> > > note: stmt 0 _4 = _3 * 2;
> > > note: children 0x63abe60 0x63abf00
> > > note:   node 0x63abe60 (max_nunits=4, refcnt=2) vector(4) unsigned int
> > > note:   op template: _3 = vect_a[i_15];
> > > note: stmt 0 _3 = vect_a[i_15];
> > > note: load permutation { 0 }
> > > note:   node (constant) 0x63abf00 (max_nunits=1, refcnt=1)
> > > note: { 2 }
> > >
> > > and during codegen:
> > >
> > > note:   -->vectorizing SLP node starting from: patt_6 = _4 != x_9(D);
> > > note:   vect_is_simple_use: operand # RANGE [irange] unsigned int [0, 
> > > 0][2, +INF]
> > MASK 0x
> > > _3 * 2, type of def: internal
> > > note:   add new stmt: mask_patt_6.18_58 = _53 != vect__4.17_57;
> > > note:=== vectorizable_early_exit ===
> > > note:transform early-exit.
> > > note:   vectorizing stmts using SLP.
> > > note:   Vectorizing SLP tree:
> > > note:   node 0x63abfa0 (max_nunits=4, refcnt=1) vector(4) int
> > > note:   op template: i_12 = i_15 + 1;
> > > note: stmt 0 i_12 = i_15 + 1;
> > > note: children 0x63aba00 0x63ac040
> > > note:   node 0x63aba00 (max_nunits=4, refcnt=2) vector(4) int
> > > note:   op template: i_15 = PHI 
> > > note: [l] stmt 0 i_15 = PHI 
> > > note: children (nil) (nil)
> > > note:   node (constant) 0x63ac040 (max_nunits=1, refcnt=1) vec

RE: [PATCH]middle-end: support SLP early break

2024-10-09 Thread Tamar Christina
> -Original Message-
> From: Richard Biener 
> Sent: Wednesday, October 9, 2024 9:20 AM
> To: Tamar Christina 
> Cc: gcc-patches@gcc.gnu.org; nd ; j...@ventanamicro.com
> Subject: RE: [PATCH]middle-end: support SLP early break
> 
> On Tue, 8 Oct 2024, Tamar Christina wrote:
> 
> > > -Original Message-
> > > From: Richard Biener 
> > > Sent: Wednesday, October 2, 2024 1:50 PM
> > > To: Tamar Christina 
> > > Cc: gcc-patches@gcc.gnu.org; nd ; j...@ventanamicro.com
> > > Subject: Re: [PATCH]middle-end: support SLP early break
> > >
> > > On Tue, 1 Oct 2024, Tamar Christina wrote:
> > >
> > > > Hi all,
> > > >
> > > > This patch introduces feature parity for early break int the SLP only
> > > > vectorizer.
> > > >
> > > > The approach taken here is to treat the early exits as root statements 
> > > > for an
> > > > SLP tree.  This means that we don't need any changes to build_slp to 
> > > > support
> > > > gconds.
> > > >
> > > > Codegen for the gcond itself now has to be done out of line but the 
> > > > body of
> the
> > > > SLP blocks itself is simply driven by SLP scheduling.  There is a slight
> > > > awkwardness in having re-used vectorizable_early_exit for both SLP and 
> > > > non-
> SLP
> > > > but I've documented the differences and when I did try to refactor it 
> > > > it wasn't
> > > > really worth it given that this is a temporary state anyway.
> > > >
> > > > This version is restricted to lane = 1, as such we can re-use the 
> > > > existing
> > > > move_early_break function instead of having to do safety update through
> > > > scheduling.  I have a branch where I'm working on that but lane > 1 is 
> > > > out of
> > > > scope for GCC 15 anyway.   The only reason I will try to get moving 
> > > > through
> > > > scheduling done as a stretch goal is so we get epilogue vectorization 
> > > > back for
> > > > early break.
> > > >
> > > > The example:
> > > >
> > > > unsigned test4(unsigned x)
> > > > {
> > > >  unsigned ret = 0;
> > > >  for (int i = 0; i < N; i++)
> > > >  {
> > > >vect_b[i] = x + i;
> > > >if (vect_a[i]*2 != x)
> > > >  break;
> > > >vect_a[i] = x;
> > > >
> > > >  }
> > > >  return ret;
> > > > }
> > > >
> > > > builds the following SLP instance for early break:
> > > >
> > > > note:   Analyzing vectorizable control flow: if (patt_6 != 0)
> > > > note:   Starting SLP discovery for
> > > > note: patt_6 = _4 != x_9(D);
> > > > note:   starting SLP discovery for node 0x63abc80
> > > > note:   Build SLP for patt_6 = _4 != x_9(D);
> > > > note:   precomputed vectype: vector(4) 
> > > > note:   nunits = 4
> > > > note:   vect_is_simple_use: operand x_9(D), type of def: external
> > > > note:   vect_is_simple_use: operand # RANGE [irange] unsigned int [0, 
> > > > 0][2,
> +INF]
> > > MASK 0x
> > > > _3 * 2, type of def: internal
> > > > note:   starting SLP discovery for node 0x63abdc0
> > > > note:   Build SLP for _4 = _3 * 2;
> > > > note:   precomputed vectype: vector(4) unsigned int
> > > > note:   nunits = 4
> > > > note:   vect_is_simple_use: operand #
> > > > vect_aD.4416[i_15], type of def: internal
> > > > note:   vect_is_simple_use: operand 2, type of def: constant
> > > > note:   starting SLP discovery for node 0x63abe60
> > > > note:   Build SLP for _3 = vect_a[i_15];
> > > > note:   precomputed vectype: vector(4) unsigned int
> > > > note:   nunits = 4
> > > > note:   SLP discovery for node 0x63abe60 succeeded
> > > > note:   SLP discovery for node 0x63abdc0 succeeded
> > > > note:   SLP discovery for node 0x63abc80 succeeded
> > > > note:   SLP size 3 vs. limit 10.
> > > > note:   Final SLP tree for instance 0x6474190:
> > > > note:   node 0x63abc80 (max_nunits=4, refcnt=2) vector(4)  boolean:32>
> > > > note:   op template: patt_6 = _4 != x_9(D);
> > > > note:   stmt 0 patt_6 = _4 != x_9(D);
> > > > note:   children 0x63abd20 0x63abdc0
> > > > note:   node (external) 0x63abd20 (max_nunits=1, refcnt=1)
> > > > note:   { x_9(D) }
> > > > note:   node 0x63abdc0 (max_nunits=4, refcnt=2) vector(4) unsigned int
> > > > note:   op template: _4 = _3 * 2;
> > > > note:   stmt 0 _4 = _3 * 2;
> > > > note:   children 0x63abe60 0x63abf00
> > > > note:   node 0x63abe60 (max_nunits=4, refcnt=2) vector(4) unsigned int
> > > > note:   op template: _3 = vect_a[i_15];
> > > > note:   stmt 0 _3 = vect_a[i_15];
> > > > note:   load permutation { 0 }
> > > > note:   node (constant) 0x63abf00 (max_nunits=1, refcnt=1)
> > > > note:   { 2 }
> > > >
> > > > and during codegen:
> > > >
> > > > note:   -->vectorizing SLP node starting from: patt_6 = _4 != 
> > > > x_9(D);
> > > > note:   vect_is_simple_use: operand # RANGE [irange] unsigned int [0, 
> > > > 0][2,
> +INF]
> > > MASK 0x
> > > > _3 * 2, type of def: internal
> > > > note:   add new stmt: mask_patt_6.18_58 = _53 != vect__4.17_57;
> > > > note:=== vectorizable_early_exit ===
> > > > note:trans

Re: [PATCH 3/3] aarch64: libgcc: Add -Werror support

2024-10-09 Thread Christophe Lyon
On Wed, 9 Oct 2024 at 03:05, Eric Gallager  wrote:
>
> On Tue, Oct 8, 2024 at 6:25 AM Richard Sandiford
>  wrote:
> >
> > Christophe Lyon  writes:
> > > When --enable-werror is enabled when running the top-level configure,
> > > it passes --enable-werror-always to subdirs.  Some of them, like
> > > libgcc, ignore it.
> > >
> > > This patch adds support for it, enabled only for aarch64, to avoid
> > > breaking bootstrap for other targets.
> > >
> > > The patch also adds -Wno-prio-ctor-dtor to avoid a warning when compiling 
> > > lse_init.c
> > >
> > >   libgcc/
> > >   * Makefile.in (WERROR): New.
> > >   * config/aarch64/t-aarch64: Handle WERROR. Always use
> > >   -Wno-prio-ctor-dtor.
> > >   * configure.ac: Add support for --enable-werror-always.
> > >   * configure: Regenerate.
> > > ---
> > >  libgcc/Makefile.in  |  1 +
> > >  libgcc/config/aarch64/t-aarch64 |  1 +
> > >  libgcc/configure| 31 +++
> > >  libgcc/configure.ac |  5 +
> > >  4 files changed, 38 insertions(+)
> > >
> > > [...]
> > > diff --git a/libgcc/configure.ac b/libgcc/configure.ac
> > > index 4e8c036990f..6b3ea2aea5c 100644
> > > --- a/libgcc/configure.ac
> > > +++ b/libgcc/configure.ac
> > > @@ -13,6 +13,7 @@ sinclude(../config/unwind_ipinfo.m4)
> > >  sinclude(../config/gthr.m4)
> > >  sinclude(../config/sjlj.m4)
> > >  sinclude(../config/cet.m4)
> > > +sinclude(../config/warnings.m4)
> > >
> > >  AC_INIT([GNU C Runtime Library], 1.0,,[libgcc])
> > >  AC_CONFIG_SRCDIR([static-object.mk])
> > > @@ -746,6 +747,10 @@ AC_SUBST(HAVE_STRUB_SUPPORT)
> > >  # Determine what GCC version number to use in filesystem paths.
> > >  GCC_BASE_VER
> > >
> > > +# Only enable with --enable-werror-always until existing warnings are
> > > +# corrected.
> > > +ACX_PROG_CC_WARNINGS_ARE_ERRORS([manual])
> >
> > It looks like this is borrowed from libcpp and/or libdecnumber.
> > Those are a bit different from libgcc in that they're host libraries
> > that can be built with any supported compiler (including non-GCC ones).
> > In constrast, libgcc can only be built with the corresponding version
> > of GCC.  The usual restrictions on -Werror -- only use it during stages
> > 2 and 3, or if the user explicitly passes --enable-werror -- don't apply
> > in libgcc's case.  We should always be building with the "right" version
> > of GCC (even for Canadian crosses) and so should always be able to use
> > -Werror.
> >
> > So personally, I think we should just go with:
> >
> > diff --git a/libgcc/config/aarch64/t-aarch64 
> > b/libgcc/config/aarch64/t-aarch64
> > index b70e7b94edd..ae1588ce307 100644
> > --- a/libgcc/config/aarch64/t-aarch64
> > +++ b/libgcc/config/aarch64/t-aarch64
> > @@ -30,3 +30,4 @@ LIB2ADDEH += \
> > $(srcdir)/config/aarch64/__arm_za_disable.S
> >
> >  SHLIB_MAPFILES += $(srcdir)/config/aarch64/libgcc-sme.ver
> > +LIBGCC2_CFLAGS += $(WERROR) -Wno-prio-ctor-dtor
> >
> > ...this, but with $(WERROR) replaced by -Werror.
> >
> > At least, it would be a good way of finding out if there's a case
> > I've forgotten :)
> >
> > Let's see what others think though.
>
> I think it would be worthwhile to test this assumption first; I have a
> vague memory of having seen warnings in libgcc previously that would
> presumably get turned into errors if -Werror were applied
> unconditionally...
>
Sorry, it's not clear to me what you mean by "test this assumption" ?
Do you mean I should push the patch with unconditional -Werror and
monitor what happens for a while?
Or investigate more / other targets?
Or wait for others to commit?

Thanks,

Christophe

> >
> > Thanks,
> > Richard


Re: [PATCH] testsuite: Define missing and use ET for arm_arch_* and arm_cpu_*

2024-10-09 Thread Richard Earnshaw (lists)

On 07/10/2024 20:04, Torbjorn SVENSSON wrote:

Hi Richard,

On 2024-10-07 12:45, Richard Earnshaw (lists) wrote:

On 07/10/2024 09:03, Torbjörn SVENSSON wrote:

Ok for trunk?

--

Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.


The acronym ET isn't one I recognize - I'm guessing you intend it to 
be Effective Target, rather than Extra Terrestrial, or Elf Target or 
some other expansion?  I think perhaps it would be better to avoid 
this in the commit log.  Your summary line is also a little imprecise 
as I suspect we will have more patches of a similar nature for some 
other patches soon.  Something like:


testsuite: arm: use effective-target for vsel* and mod* tests

would be closer


I'm fairly certain that I've seen the abbr ET for effective-target 
somewhere, but I could be wrong. Anyway, I've used your suggestion and 
will push it as soon as I get a comment on my questions below.






gcc/testsuite/ChangeLog

* gcc.target/arm/pr65647.c: Use ET arm_arch_v6m.
* gcc.target/arm/mod_2.c: Use ET arm_cpu_cortex_a57.
* gcc.target/arm/mod_256.c: Likewise.
* gcc.target/arm/vseleqdf.c: Likewise.
* gcc.target/arm/vseleqsf.c: Likewise.
* gcc.target/arm/vselgedf.c: Likewise.
* gcc.target/arm/vselgesf.c: Likewise.
* gcc.target/arm/vselgtdf.c: Likewise.
* gcc.target/arm/vselgtsf.c: Likewise.
* gcc.target/arm/vselledf.c: Likewise.
* gcc.target/arm/vsellesf.c: Likewise.
* gcc.target/arm/vselltdf.c: Likewise.
* gcc.target/arm/vselltsf.c: Likewise.
* gcc.target/arm/vselnedf.c: Likewise.
* gcc.target/arm/vselnesf.c: Likewise.
* gcc.target/arm/vselvcdf.c: Likewise.
* gcc.target/arm/vselvcsf.c: Likewise.
* gcc.target/arm/vselvsdf.c: Likewise.
* gcc.target/arm/vselvssf.c: Likewise.
* lib/target-supports.exp: Define EF arm_cpu_cortex_a57.  Update ET

   ^^
Typo for ET?


Yes :S



The body of the patch is OK with an updated commit message.

Thanks.
R.



arm_v8_1_lob_ok to use -mcpu=unset.

Signed-off-by: Torbjörn SVENSSON 
---
  gcc/testsuite/gcc.target/arm/mod_2.c    | 4 +++-
  gcc/testsuite/gcc.target/arm/mod_256.c  | 4 +++-
  gcc/testsuite/gcc.target/arm/pr65647.c  | 3 ++-
  gcc/testsuite/gcc.target/arm/vseleqdf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vseleqsf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselgedf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselgesf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselgtdf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselgtsf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselledf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vsellesf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselltdf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselltsf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselnedf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselnesf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselvcdf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselvcsf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselvsdf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselvssf.c | 5 +++--
  gcc/testsuite/lib/target-supports.exp   | 3 ++-
  20 files changed, 58 insertions(+), 36 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c 
b/gcc/testsuite/gcc.target/arm/mod_2.c

index 1143725d59a..3a203b67d73 100644
--- a/gcc/testsuite/gcc.target/arm/mod_2.c
+++ b/gcc/testsuite/gcc.target/arm/mod_2.c
@@ -1,7 +1,9 @@
  /* { dg-do compile } */
  /* { dg-skip-if "-mpure-code supports M-profile only" { *-*-* } { 
"-mpure-code" } } */

  /* { dg-require-effective-target arm32 } */
-/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+/* { dg-require-effective-target arm_cpu_cortex_a57 } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_cpu_cortex_a57 } */
  #include "../aarch64/mod_2.x"
diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c 
b/gcc/testsuite/gcc.target/arm/mod_256.c

index d8dca0fe7d5..3521d7a05f3 100644
--- a/gcc/testsuite/gcc.target/arm/mod_256.c
+++ b/gcc/testsuite/gcc.target/arm/mod_256.c
@@ -1,7 +1,9 @@
  /* { dg-do compile } */
  /* { dg-skip-if "-mpure-code supports M-profile only" { *-*-* } { 
"-mpure-code" } } */

  /* { dg-require-effective-target arm32 } */
-/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+/* { dg-require-effective-target arm_cpu_cortex_a57 } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_cpu_cortex_a57 } */
  #include "../aarch64/mod_256.x"
diff --git a/gcc/testsuite/gcc.target/arm/pr65647.c 
b/gcc/testsuite/gcc.target/arm/pr65647.c

index 26b4e399f6b..dc3a3ca1184 100644
--- a/gcc/testsuite/gcc.target/arm/pr65647.c
+++ b/gcc/testsuite/gcc.target/arm/pr65647.c
@@ -1,7 +1,8 @@
  /* { dg-do compile } */
  /* { dg-require-effective-target arm_arch_v6m_ok } */
  /* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { 
"-mfloat-abi=*" } {"-mfloat-abi=soft" } } */

-/* { dg-options "-march=armv6-m -mthumb -O3 -w -mfloat-abi=soft" } */
+/* { dg-options "-mthumb -O3 -

[PATCH] config: add -Werror=lto-type-mismatch,odr to bootstrap-lto*

2024-10-09 Thread Sam James
Add -Werror=lto-type-mismatch,odr to bootstrap-lto* configurations to
help stop LTO breakage/correctness issues sneaking in.

We discussed -Werror=strict-aliasing but it runs early and doesn't
give better diagnostics with LTO so left it out.

config/ChangeLog:
PR rust/108087
PR ada/115917
PR modula2/114529
PR modula2/116181
PR other/116182

* bootstrap-lto-lean.mk: Pass -Werror=lto-type-mismatch,odr.
* bootstrap-lto-noplugin.mk: Ditto.
* bootstrap-lto.mk: Ditto.
---
OK once PR117038 is fixed? (It snuck in yesterday).

Bootstrapped all languages on x86_64-pc-linux-gnu.

 config/bootstrap-lto-lean.mk |  8 +---
 config/bootstrap-lto-noplugin.mk | 10 +-
 config/bootstrap-lto.mk  | 10 +-
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/config/bootstrap-lto-lean.mk b/config/bootstrap-lto-lean.mk
index 42cb3394c70b..f176390ba21a 100644
--- a/config/bootstrap-lto-lean.mk
+++ b/config/bootstrap-lto-lean.mk
@@ -1,10 +1,12 @@
 # This option enables LTO for stage4 and LTO for generators in stage3 with 
profiledbootstrap.
 # Otherwise, LTO is used in only stage3.
 
-STAGE3_CFLAGS += -flto=jobserver
+
+STAGE2_CFLAGS += -flto=jobserver -Werror=lto-type-mismatch -Werror=odr
+STAGE3_CFLAGS += -flto=jobserver -Werror=lto-type-mismatch -Werror=odr
 override STAGEtrain_CFLAGS := $(filter-out 
-flto=jobserver,$(STAGEtrain_CFLAGS))
-STAGEtrain_GENERATOR_CFLAGS += -flto=jobserver
-STAGEfeedback_CFLAGS += -flto=jobserver
+STAGEtrain_GENERATOR_CFLAGS += -flto=jobserver -Werror=lto-type-mismatch 
-Werror=odr
+STAGEfeedback_CFLAGS += -flto=jobserver -Werror=lto-type-mismatch -Werror=odr
 
 # assumes the host supports the linker plugin
 LTO_AR = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-ar$(exeext) 
-B$$r/$(HOST_SUBDIR)/prev-gcc/
diff --git a/config/bootstrap-lto-noplugin.mk b/config/bootstrap-lto-noplugin.mk
index 0f50708e49d1..660ca60dbd3d 100644
--- a/config/bootstrap-lto-noplugin.mk
+++ b/config/bootstrap-lto-noplugin.mk
@@ -1,9 +1,9 @@
 # This option enables LTO for stage2 and stage3 on
 # hosts without linker plugin support.
 
-STAGE2_CFLAGS += -flto=jobserver -frandom-seed=1 -ffat-lto-objects
-STAGE3_CFLAGS += -flto=jobserver -frandom-seed=1 -ffat-lto-objects
-STAGEprofile_CFLAGS += -flto=jobserver -frandom-seed=1
-STAGEtrain_CFLAGS += -flto=jobserver -frandom-seed=1
-STAGEfeedback_CFLAGS += -flto=jobserver -frandom-seed=1
+STAGE2_CFLAGS += -flto=jobserver -frandom-seed=1 -ffat-lto-objects 
-Werror=lto-type-mismatch -Werror=odr
+STAGE3_CFLAGS += -flto=jobserver -frandom-seed=1 -ffat-lto-objects 
-Werror=lto-type-mismatch -Werror=odr
+STAGEprofile_CFLAGS += -flto=jobserver -frandom-seed=1 
-Werror=lto-type-mismatch -Werror=odr
+STAGEtrain_CFLAGS += -flto=jobserver -frandom-seed=1 -Werror=lto-type-mismatch 
-Werror=odr
+STAGEfeedback_CFLAGS += -flto=jobserver -frandom-seed=1 
-Werror=lto-type-mismatch -Werror=odr
 do-compare = /bin/true
diff --git a/config/bootstrap-lto.mk b/config/bootstrap-lto.mk
index 1ddb1d870bab..9f76c03f8a68 100644
--- a/config/bootstrap-lto.mk
+++ b/config/bootstrap-lto.mk
@@ -1,10 +1,10 @@
 # This option enables LTO for stage2 and stage3 in slim mode
 
-STAGE2_CFLAGS += -flto=jobserver -frandom-seed=1
-STAGE3_CFLAGS += -flto=jobserver -frandom-seed=1
-STAGEprofile_CFLAGS += -flto=jobserver -frandom-seed=1
-STAGEtrain_CFLAGS += -flto=jobserver -frandom-seed=1
-STAGEfeedback_CFLAGS += -flto=jobserver -frandom-seed=1
+STAGE2_CFLAGS += -flto=jobserver -frandom-seed=1 -Werror=lto-type-mismatch 
-Werror=odr
+STAGE3_CFLAGS += -flto=jobserver -frandom-seed=1 -Werror=lto-type-mismatch 
-Werror=odr
+STAGEprofile_CFLAGS += -flto=jobserver -frandom-seed=1 
-Werror=lto-type-mismatch -Werror=odr
+STAGEtrain_CFLAGS += -flto=jobserver -frandom-seed=1 -Werror=lto-type-mismatch 
-Werror=odr
+STAGEfeedback_CFLAGS += -flto=jobserver -frandom-seed=1 
-Werror=lto-type-mismatch -Werror=odr
 
 # assumes the host supports the linker plugin
 LTO_AR = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-ar$(exeext) 
-B$$r/$(HOST_SUBDIR)/prev-gcc/

base-commit: 9df0772d50d8f8a75389d319949632e5d111cc6c
-- 
2.47.0



[committed] libstdc++: Fix -Wsign-compare in std::latch::count_down

2024-10-09 Thread Jonathan Wakely
Tested x86_64-linux. Pushed to trunk.

-- >8 --

Also add assertions for the precondition on the parameter's value.

libstdc++-v3/ChangeLog:

* include/std/latch (latch::count_down): Add assertions for
preconditions. Cast parameter to avoid -Wsign-compare on some
targets.
---
 libstdc++-v3/include/std/latch | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/latch b/libstdc++-v3/include/std/latch
index 146e1860979..1d254aa2581 100644
--- a/libstdc++-v3/include/std/latch
+++ b/libstdc++-v3/include/std/latch
@@ -63,9 +63,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _GLIBCXX_ALWAYS_INLINE void
 count_down(ptrdiff_t __update = 1)
 {
+  __glibcxx_assert(__update >= 0);
   auto const __old = __atomic_impl::fetch_sub(&_M_a,
__update, memory_order::release);
-  if (__old == __update)
+  __glibcxx_assert(__update >= 0);
+  if (__old == static_cast<__detail::__platform_wait_t>(__update))
__atomic_impl::notify_all(&_M_a);
 }
 
@@ -88,6 +90,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 }
 
   private:
+// This alignas is not redundant, it increases the alignment for
+// long long on x86.
 alignas(__alignof__(__detail::__platform_wait_t)) 
__detail::__platform_wait_t _M_a;
   };
 _GLIBCXX_END_NAMESPACE_VERSION
-- 
2.46.2



[committed] libstdc++: Ignore _GLIBCXX_USE_POSIX_SEMAPHORE if not supported [PR116992]

2024-10-09 Thread Jonathan Wakely
Tested x86_64-linux. Pushed to trunk.

-- >8 --

If _GLIBCXX_HAVE_POSIX_SEMAPHRE is undefined then users get an error
when defining _GLIBCXX_USE_POSIX_SEMAPHORE. We can just ignore it
instead (and warn them it's being ignored).

This fixes a testsuite failure on hppa64-hp-hpux11.11 (and probably some
other targets):

FAIL: 30_threads/semaphore/platform_try_acquire_for.cc  -std=gnu++20 (test for 
excess errors)
Excess errors:
semaphore:49: error: '__semaphore_impl' has not been declared

libstdc++-v3/ChangeLog:

PR libstdc++/116992
* include/bits/semaphore_base.h (_GLIBCXX_USE_POSIX_SEMAPHORE):
Undefine and issue a warning if POSIX sem_t is not supported.
* testsuite/30_threads/semaphore/platform_try_acquire_for.cc:
Prune new warning.
---
 libstdc++-v3/include/bits/semaphore_base.h | 3 +++
 .../testsuite/30_threads/semaphore/platform_try_acquire_for.cc | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/libstdc++-v3/include/bits/semaphore_base.h 
b/libstdc++-v3/include/bits/semaphore_base.h
index 9d73b37e60a..dd16d2c9249 100644
--- a/libstdc++-v3/include/bits/semaphore_base.h
+++ b/libstdc++-v3/include/bits/semaphore_base.h
@@ -45,6 +45,9 @@
 # include  // errno, EINTR, EAGAIN etc.
 # include// SEM_VALUE_MAX
 # include // sem_t, sem_init, sem_wait, sem_post etc.
+#elif defined(_GLIBCXX_USE_POSIX_SEMAPHORE)
+# warning "POSIX semaphore not available, ignoring 
_GLIBCXX_USE_POSIX_SEMAPHORE"
+# undef _GLIBCXX_USE_POSIX_SEMAPHORE
 #endif
 
 namespace std _GLIBCXX_VISIBILITY(default)
diff --git 
a/libstdc++-v3/testsuite/30_threads/semaphore/platform_try_acquire_for.cc 
b/libstdc++-v3/testsuite/30_threads/semaphore/platform_try_acquire_for.cc
index bf6cd142bf0..6d90564ea8a 100644
--- a/libstdc++-v3/testsuite/30_threads/semaphore/platform_try_acquire_for.cc
+++ b/libstdc++-v3/testsuite/30_threads/semaphore/platform_try_acquire_for.cc
@@ -5,3 +5,5 @@
 // { dg-add-options libatomic }
 
 #include "try_acquire_for.cc"
+
+// { dg-prune-output "ignoring _GLIBCXX_USE_POSIX_SEMAPHORE" }
-- 
2.46.2



[committed] libstdc++: Fix -Wnarrowing in [PR116991]

2024-10-09 Thread Jonathan Wakely
Tested x86_64-linux. Pushed to trunk.

-- >8 --

When _GLIBCXX_USE_C99_COMPLEX_ARC is undefined we use the generic
__complex_acos function template for _Float32 etc. and that gives a
-Wnarrowing warning:

complex:2043: warning: ISO C++ does not allow converting to '_Float32' from 
'long double' with greater conversion rank [-Wnarrowing]

Use a cast to do the conversion so that it doesn't warn.

libstdc++-v3/ChangeLog:

PR libstdc++/116991
* include/std/complex (__complex_acos): Cast literal to
destination type.
---
 libstdc++-v3/include/std/complex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/complex b/libstdc++-v3/include/std/complex
index 5bc6618f7de..eb89e3a8bcf 100644
--- a/libstdc++-v3/include/std/complex
+++ b/libstdc++-v3/include/std/complex
@@ -2040,7 +2040,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 __complex_acos(const std::complex<_Tp>& __z)
 {
   const std::complex<_Tp> __t = std::asin(__z);
-  const _Tp __pi_2 = 1.5707963267948966192313216916397514L;
+  const _Tp __pi_2 = (_Tp) 1.5707963267948966192313216916397514L;
   return std::complex<_Tp>(__pi_2 - __t.real(), -__t.imag());
 }
 
-- 
2.46.2



[wwwdocs] Document that gcc-8 changed the default to -std=gnu17

2024-10-09 Thread Jonathan Wakely
OK for wwwdocs?

---
 htdocs/gcc-8/changes.html | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/htdocs/gcc-8/changes.html b/htdocs/gcc-8/changes.html
index c329a509..6761fb90 100644
--- a/htdocs/gcc-8/changes.html
+++ b/htdocs/gcc-8/changes.html
@@ -24,6 +24,8 @@ You may also want to check out our
 
 Caveats
 
+  The default mode for C is now -std=gnu17 instead of
+-std=gnu11.
   Support for the obsolete SDB/coff debug info format has been
   removed.  The option -gcoff no longer
   does anything.
@@ -536,6 +538,11 @@ $ gcc unclosed-2.c
 
 
 
+C
+  
+The default mode has been changed to -std=gnu17.
+  
+
 C++
 
   GCC 8 (-fabi-version=12) has a couple of corrections to the 
calling
-- 
2.46.2



Re: [PATCH v4 2/7] OpenMP: middle-end support for dispatch + adjust_args

2024-10-09 Thread Tobias Burnus

Hi PA,

sorry for the slow review. Looks mostly fine, but I stumbled over a few 
minor points. Some are only RFC items (some of the 'I wonder …').



Paul-Antoine Arras wrote:

This patch adds middle-end support for the `dispatch` construct and the
`adjust_args` clause. The heavy lifting is done in `gimplify_omp_dispatch` and
`gimplify_call_expr` respectively. For `adjust_args`, this mostly consists in
emitting a call to `gomp_get_mapped_ptr` for the adequate device.


omp_get_… not gomp_get_…


For dispatch, the following steps are performed:

* Handle the device clause, if any: set the default-device ICV at the top of the
dispatch region and restore its previous value at the end.

* Handle novariants and nocontext clauses, if any. Evaluate compile-time
constants and select a variant, if possible. Otherwise, emit code to handle all
possible cases at run time.

* If depend clauses are present, add a taskwait construct before the dispatch
region and move them there.


The latter is not done here – but already in the front ends, i.e. 
OMP_TASK are handled in part 3 (C), 4 (C++) and 6 (Fortran) of this series.


...


--- a/gcc/gimple.cc
+++ b/gcc/gimple.cc


...


+/* Build a GIMPLE_OMP_DISPATCH statement.
+
+   BODY is the target function call to be dispatched.
+   CLAUSES are any of the OMP dispatch construct's clauses: ...  */


Looks as if you planned to add something here. How about:
s/: ..././ ?



@@ -4067,23 +4069,125 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, 
bool want_value)



+ if (flag_openmp && EXPR_P (CALL_EXPR_FN (*expr_p))
+ && DECL_P (TREE_OPERAND (CALL_EXPR_FN (*expr_p), 0))
+ && (adjust_args_list = lookup_attribute (
+   "omp declare variant variant adjust_args",
+   DECL_ATTRIBUTES (
+ TREE_OPERAND (CALL_EXPR_FN (*expr_p), 0
+  != NULL_TREE
+ && gimplify_omp_ctxp != NULL
+ && gimplify_omp_ctxp->code == OMP_DISPATCH
+ && !gimplify_omp_ctxp->in_call_args)
+   {


!= should be under 'a' of 'adjust (remove one space)

And I wonder whether it is a bit more readable and a tiny bit faster if 
you move the gimplify_omp_ctx checks directly after flag_openmp

and only if successfull ('&&') check for the attributes.



+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IS_DEVICE_PTR)
+   {
+ tree decl1 = DECL_NAME (OMP_CLAUSE_DECL (c));
+ tree decl2 = tree_strip_nop_conversions (*arg_p);
+ if (TREE_CODE (decl2) == ADDR_EXPR)
+   decl2 = TREE_OPERAND (decl2, 0);
+ gcc_assert (TREE_CODE (decl2) == VAR_DECL
+ || TREE_CODE (decl2) == PARM_DECL);


The first one can be 'VAR_P (decl2)'. I keep wondering whether there
can be cases where that's not true (e.g. VAR_DECL) or some indirect ref.

For Fortran, I could imagine that array descriptors make problems, e.g.

subroutine f(x)
  integer, pointer :: x(:)

where 'x->data' is the device pointer and not 'x'.

(TODO: something to check + possibly to revisite when handling the 
Fortran part; for now (including C/C++), I think we can leave it as is.)


Or something with reference types (→ C++, Fortran), albeit that's more 
for need_device_addr / has_device_addr, which is not yet implemeted.



+ bool need_device_ptr = false;
+ for (tree arg
+  = TREE_PURPOSE (TREE_VALUE (adjust_args_list));
+  arg != NULL; arg = TREE_CHAIN (arg))
+   {


...


+   }
+
+ if (need_device_ptr && !is_device_ptr)


Actually, the is_device_ptr loop is only needed when need_device_ptr 
(or, later, need_device_addr) is true; I wonder whether it should be 
swapped and is_device_ptr only be checked conditionally?



+ *arg_p = (TREE_CODE (*arg_p) == NOP_EXPR)
+? TREE_OPERAND (*arg_p, 0)
+: *arg_p;


Use tree_strip_nop_conversions or STRIP_NOPS ? However, it is not clear 
why it is needed here ...



+ gimplify_arg (arg_p, pre_p, loc);
+ gimplify_arg (&device_num, pre_p, loc);
+ call = gimple_build_call (fn, 2, *arg_p, device_num);
+ tree mapped_arg
+   = create_tmp_var (gimple_call_return_type (call));
+ gimple_call_set_lhs (call, mapped_arg);
+ gimplify_seq_add_stmt (pre_p, call);
+
+ *arg_p = mapped_arg;


This line causes the following to attempt to fail:


+ // Mark mapped argument as device pointer to ensure
+  

Re: [PATCH v2] Add -ftime-report-wall

2024-10-09 Thread Andi Kleen
> So, shouldn't we go without the new option and simply change
> -ftime-report behavior?

I think it's fine (given the constraints I outlined earlier).
It will slightly change the output, but I guess there aren't that many
users that parse it mechanically.

I can do that unless someoneelse objects.

-Andi


Re: [PATCH v3 0/2] ia64: enable LRA and un-obsolete ia64*-*-linux

2024-10-09 Thread Richard Biener
On Tue, 8 Oct 2024, Frank Scheiner wrote:

> With stage 3 of GCC 15 approaching, to save me some time by finally
> dropping the non-LRA testcase from my cross builds of GCC and Linux and
> as I had the time, I updated the patch set from René with the requested
> changes and rebased it to 0ad2c76bea20dbeac753f10df6f9f86d142348d4.
> 
> Patch 1/2: Remove ia64*-*-linux from the list of obsolete targets
> Patch 2/2: Enable LRA for ia64
> 
> LRA functionality was tested by bootstrapping GCC natively and running
> the testsuite on ia64 based on
> 236116068151bbc72aaaf53d0f223fe06f7e3bac:
> 
> https://gcc.gnu.org/pipermail/gcc-testresults/2024-June/817268.html
> 
> For comparison, the same with just
> 236116068151bbc72aaaf53d0f223fe06f7e3bac:
> 
> https://gcc.gnu.org/pipermail/gcc-testresults/2024-June/817267.html
> 
> A diff between them is attached.
> 
> Can this be brought forward now as is?

I'll push this for you.

Thanks,
Richard.

> Cheers,
> Frank
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH v3 0/2] ia64: enable LRA and un-obsolete ia64*-*-linux

2024-10-09 Thread Richard Biener
On Wed, 9 Oct 2024, Richard Biener wrote:

> On Tue, 8 Oct 2024, Frank Scheiner wrote:
> 
> > With stage 3 of GCC 15 approaching, to save me some time by finally
> > dropping the non-LRA testcase from my cross builds of GCC and Linux and
> > as I had the time, I updated the patch set from René with the requested
> > changes and rebased it to 0ad2c76bea20dbeac753f10df6f9f86d142348d4.
> > 
> > Patch 1/2: Remove ia64*-*-linux from the list of obsolete targets
> > Patch 2/2: Enable LRA for ia64
> > 
> > LRA functionality was tested by bootstrapping GCC natively and running
> > the testsuite on ia64 based on
> > 236116068151bbc72aaaf53d0f223fe06f7e3bac:
> > 
> > https://gcc.gnu.org/pipermail/gcc-testresults/2024-June/817268.html
> > 
> > For comparison, the same with just
> > 236116068151bbc72aaaf53d0f223fe06f7e3bac:
> > 
> > https://gcc.gnu.org/pipermail/gcc-testresults/2024-June/817267.html
> > 
> > A diff between them is attached.
> > 
> > Can this be brought forward now as is?
> 
> I'll push this for you.

I spoke too fast - something between you and me corrupts the patch
so it doesn't apply (even after manually resolving line-wrapping,
I suspect whitespace is also broken).  Can you re-send them as
attachments please?

Thanks,
Richard.

Re: Ping: [PATCH] gcc/doc: adjust __builtin_choose_expr() description

2024-10-09 Thread Jan Beulich
On 08.10.2024 17:38, Sandra Loosemore wrote:
> On 10/8/24 09:35, Jan Beulich wrote:
>> On 08.10.2024 17:30, Sandra Loosemore wrote:
>>> [snip]
>>>
>>> Hmmm, looking at the complete documentation for this built-in, and the
>>> code, I think I'd go a little farther with fixing up the docs.
>>>
>>> Since requiring the first operand to be a constant is also different
>>> behavior than the ?: operator, it's misleading to state only the return
>>> type as being different.  So I'd rewrite the whole paragraph quoted above:
>>>
>>> Like the @samp{? :} operator, the built-in function does not evaluate
>>> the expression that is not chosen.  For example, if @var{const_exp}
>>> evaluates
>>> to @code{true}, @var{exp2} is not evaluated even if it has side effects.
>>> On the other hand, @code{__builtin_choose_expr} differs from @samp{? :}
>>> in that the first operand must be a compile-time constant, and the other
>>> operands are not subject to the @samp{? :} type constraints and promotions.
>>
>> Fine with me; I assume you will then simply put in your version of the
>> change?
> 
> I'm actually not in a good position to do that anytime soon (I'm both 
> sick with COVID and in the middle of hacking on something else), so if 
> you could push the change I'd be grateful.

Done.

Jan


Re: [PATCH v3 1/2] aarch64: Add SVE2 faminmax intrinsics

2024-10-09 Thread Saurabh Jha




On 10/8/2024 3:27 PM, Richard Sandiford wrote:

Saurabh Jha  writes:

Thanks for the review. Wanted to clarify your comment:

On 10/8/2024 11:51 AM, Richard Sandiford wrote:

 writes:

diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c
new file mode 100644
index 000..de4a6f8efaa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c
@@ -0,0 +1,312 @@
[...]
+/*
+** amax_h4_f16_m_untied:
+** mov (z[0-9]+\.h), h4
+** movprfx z0, z1
+** famax   z0\.h, p0/m, z0\.h, \1
+** ret
+*/
+TEST_UNIFORM_ZD (amax_h4_f16_m_untied, svfloat16_t, __fp16,
+z0 = svamax_n_f16_m (p0, z1, d4),
+z0 = svamax_m (p0, z1, d4))
+
+/*
+** amax_2_f16_m:
+** fmov(z[0-9]+\.h), #2\.0(?:e\+0)?
+** famax   z0\.h, p0/m, z0\.h, \1
+** ret
+*/
+TEST_UNIFORM_Z (amax_2_f16_m, svfloat16_t,
+   z0 = svamax_n_f16_m (p0, z0, 2),
+   z0 = svamax_m (p0, z0, 2))


Rather than dropping the tests for 0 and 1, I think we should keep
them and verify that we don't try to use non-existent immediate forms.
(It would be easy for that to happen, if they were added to the wrong
iterators.)

Maybe:

/*
** amax_0_f16_m_tied1:
**  ...
**  famax   z0\.h, p0/m, z0\.h, z[0-9]+\.h
**  ret
*/
TEST_UNIFORM_Z (amax_0_f16_m_tied1, svfloat16_t,
z0 = svamax_n_f16_m (p0, z0, 0),
z0 = svamax_m (p0, z0, 0))

/*
** amax_1_f16_m_tied1:
**  ...
**  famax   z0\.h, p0/m, z0\.h, z[0-9]+\.h
**  ret
*/
TEST_UNIFORM_Z (amax_1_f16_m_tied1, svfloat16_t,
z0 = svamax_n_f16_m (p0, z0, 1),
z0 = svamax_m (p0, z0, 1))

(untested).  Similarly for the other files, and for _z and _x.


Right now, if we do this, we get an ICE like this

unrecognizable insn:
(insn 18 17 19 2 (set (reg:VNx8HF 107)
  (unspec:VNx8HF [
  (reg:VNx8BI 108)
  (unspec:VNx8HF [
  (reg:VNx8BI 108)
  (const_int 1 [0x1])
  (reg:VNx8HF 109)
  (const_vector:VNx8HF repeat [
  (const_double:HF 0.0 [0x0.0p+0])
  ])
  ] UNSPEC_COND_FAMAX)
  (reg:VNx8HF 109)
  ] UNSPEC_SEL))
"/home/saujha01/gnu-work/src/gcc/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c":54:1
-1
   (nil))
during RTL pass: vregs


Ah, yeah, that's the kind of failure that the tests above would
defend against.


Most likely, ICE is not the right answer here. But if we want to have a
test for immediate forms, we would have to add support for it in
instruction patterns, isn't it? But these functions don't expect
immediate operands.

Should we add support for immediate operands in the instruction patterns
or should we somehow write test so that we make sure we don't
inadvertently add support for immediate forms? I think you meant the
latter but not sure how could we add that test without adding support
for it in the compiler.


Yeah, I meant the latter.  From a user's point of view, this is a
similar sort of situation to:

/*
** amax_2_f32_m:
**  fmov(z[0-9]+\.s), #2\.0(?:e\+0)?
**  famax   z0\.s, p0/m, z0\.s, \1
**  ret
*/
TEST_UNIFORM_Z (amax_2_f32_m, svfloat32_t,
z0 = svamax_n_f32_m (p0, z0, 2),
z0 = svamax_m (p0, z0, 2))

which we do handle correctly.  The idea is the same for 0 and 1:
we should force the constant into a register and use a pure
register FAMIN/FAMAX.

(In the tests above, I dropped matching the fmov part, because being
overly specific about ways of moving zeros into registers forced Tamar
to do a lot of make-work for one of his recent patches.)

I think at least one of the bugs is:

  (define_int_attr sve_pred_fp_rhs2_operand
[(UNSPEC_COND_FADD "aarch64_sve_float_arith_with_sub_operand")
+   (UNSPEC_COND_FAMAX "aarch64_sve_float_maxmin_operand")
+   (UNSPEC_COND_FAMIN "aarch64_sve_float_maxmin_operand")
 (UNSPEC_COND_FDIV "register_operand")
 (UNSPEC_COND_FMAX "aarch64_sve_float_maxmin_operand")
 (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_operand")

this should use register_operand rather than aarch64_sve_float_maxmin_operand,
since FAMIN and FAMAX don't have the same immediate forms as FMIN and FMAX.


Thanks for the explanation. Totally makes sense now. I will send a new 
version shortly :)


Thanks,
Richard




[wwwdocs] Document defining _GLIBCXX_ASSERTIONS for -O0

2024-10-09 Thread Jonathan Wakely
---
Pushed to wwwdocs.

 htdocs/gcc-15/changes.html | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/htdocs/gcc-15/changes.html b/htdocs/gcc-15/changes.html
index ad0daf04..805a6703 100644
--- a/htdocs/gcc-15/changes.html
+++ b/htdocs/gcc-15/changes.html
@@ -93,6 +93,9 @@ a work-in-progress.
 Runtime Library (libstdc++)
 
 
+  Debug assertions are now enabled by default for unoptimized builds.
+Use -D_GLIBCXX_NO_ASSERTIONS to override this.
+  
   Improved experimental support for C++26, including:
 
 views::concat.
-- 
2.46.2



Re: [PATCH] libstdc++: Implement P0849R8 auto(x) library changes

2024-10-09 Thread Patrick Palka
On Mon, 7 Oct 2024, Patrick Palka wrote:

> Tested on x86_64-pc-linux-gnu, does this look OK for trunk only?
> This doesn't seem worth backporting since there should be no
> behavior change.
> 
> -- >8 --
> 
> This implements the library changes in P0849R8 "auto(x): decay-copy
> in the language" which consist of replacing most uses of the
> exposition-only function decay-copy with auto(x) throughout the library
> wording.
> 
> Note the main difference between the two is that decay-copy materializes
> its argument whereas auto(x) doesn't, and so the latter is a no-op when
> its argument is a prvalue.  Effectively the former could introduce an
> unnecessary move constructor call in some contexts.  In C++20 and earlier
> we could emulate auto(x) with decay_t(x).

I should note this patch is treating this paper as a DR against C++20,
which should be fine since there should be no behavior change in
practice (especially in light of LWG 3724 which constrains decay-copy
to make it SFINAE-friendly).

> 
> After this paper the only remaining uses of decay-copy in the library
> are in the specification of some range adaptors.  In our implementation
> of those range adaptors I believe decay-copy is already implied which
> is why we don't mirror the wording and use __decay_copy explicitly.  So
> since it's apparently no longer needed this patch goes ahead and removes
> __decay_copy.
> 
> libstdc++-v3/ChangeLog:
> 
>   * c++config (_GLIBCXX_AUTO_CAST): Define.
>   * include/bits/iterator_concepts.h (_Decay_copy, __decay_copy):
>   Remove.
>   (__member_begin, __adl_begin): Use _GLIBCXX_AUTO_CAST instead of
>   __decay_copy as per P0849R8.
>   * include/bits/ranges_base.h (_Begin): Likewise.
>   (__member_end, __adl_end, _End): Likewise.
>   (__member_rbegin, __adl_rbegin, _RBegin): Likewise.
>   (__member_rend, __adl_rend, _Rend): Likewise.
>   (__member_size, __adl_size, _Size): Likewise.
>   (_Data): Likewise.
> ---
>  libstdc++-v3/include/bits/c++config   |  6 +++
>  libstdc++-v3/include/bits/iterator_concepts.h | 13 +-
>  libstdc++-v3/include/bits/ranges_base.h   | 40 +--
>  3 files changed, 28 insertions(+), 31 deletions(-)
> 
> diff --git a/libstdc++-v3/include/bits/c++config 
> b/libstdc++-v3/include/bits/c++config
> index 29d795f687c..fdbf90e28fc 100644
> --- a/libstdc++-v3/include/bits/c++config
> +++ b/libstdc++-v3/include/bits/c++config
> @@ -265,6 +265,12 @@
>  #define _GLIBCXX_NOEXCEPT_QUAL
>  #endif
>  
> +#if __cpp_auto_cast
> +# define _GLIBCXX_AUTO_CAST(X) auto(X)
> +#else
> +# define _GLIBCXX_AUTO_CAST(X) ::std::__decay_t(X)
> +#endif
> +
>  // Macro for extern template, ie controlling template linkage via use
>  // of extern keyword on template declaration. As documented in the g++
>  // manual, it inhibits all implicit instantiations and is used
> diff --git a/libstdc++-v3/include/bits/iterator_concepts.h 
> b/libstdc++-v3/include/bits/iterator_concepts.h
> index 490a362cdf1..0fcfed56737 100644
> --- a/libstdc++-v3/include/bits/iterator_concepts.h
> +++ b/libstdc++-v3/include/bits/iterator_concepts.h
> @@ -1003,19 +1003,10 @@ namespace ranges
>{
>  using std::__detail::__class_or_enum;
>  
> -struct _Decay_copy final
> -{
> -  template
> - constexpr decay_t<_Tp>
> - operator()(_Tp&& __t) const
> - noexcept(is_nothrow_convertible_v<_Tp, decay_t<_Tp>>)
> - { return std::forward<_Tp>(__t); }
> -} inline constexpr __decay_copy{};
> -
>  template
>concept __member_begin = requires(_Tp& __t)
>   {
> -   { __decay_copy(__t.begin()) } -> input_or_output_iterator;
> +   { _GLIBCXX_AUTO_CAST(__t.begin()) } -> input_or_output_iterator;
>   };
>  
>  // Poison pill so that unqualified lookup doesn't find std::begin.
> @@ -1025,7 +1016,7 @@ namespace ranges
>concept __adl_begin = __class_or_enum>
>   && requires(_Tp& __t)
>   {
> -   { __decay_copy(begin(__t)) } -> input_or_output_iterator;
> +   { _GLIBCXX_AUTO_CAST(begin(__t)) } -> input_or_output_iterator;
>   };
>  
>  // Simplified version of std::ranges::begin that only supports lvalues,
> diff --git a/libstdc++-v3/include/bits/ranges_base.h 
> b/libstdc++-v3/include/bits/ranges_base.h
> index cb2eba1f841..80ff1e300ce 100644
> --- a/libstdc++-v3/include/bits/ranges_base.h
> +++ b/libstdc++-v3/include/bits/ranges_base.h
> @@ -115,9 +115,9 @@ namespace ranges
> if constexpr (is_array_v>)
>   return true;
> else if constexpr (__member_begin<_Tp>)
> - return noexcept(__decay_copy(std::declval<_Tp&>().begin()));
> + return noexcept(_GLIBCXX_AUTO_CAST(std::declval<_Tp&>().begin()));
> else
> - return noexcept(__decay_copy(begin(std::declval<_Tp&>(;
> + return noexcept(_GLIBCXX_AUTO_CAST(begin(std::declval<_Tp&>(;
>   }
>  
>  public:
> @@ -142,7 +142,7 @@ namespace ranges
>  template
>concept __m

Re: [PATCH v3 1/2] Enable vectorization for unknown tripcount in very cheap cost model but disable epilog vectorization.

2024-10-09 Thread Richard Biener
On Wed, Oct 9, 2024 at 3:27 AM liuhongt  wrote:
>
> >We'd also need to update the documentation:
>
> >... The @samp{very-cheap} model only
> >allows vectorization if the vector code would entirely replace the
> >scalar code that is being vectorized.  For example, if each iteration
> >of a vectorized loop would only be able to handle exactly four iterations
> >of the scalar loop, the @samp{very-cheap} model would only allow
> >vectorization if the scalar iteration count is known to be a multiple
> >of four.
> Changed.
>
> >And since it's a change in documented behaviour, it should probably
> >be in the release notes too.
>
> Will submit another patch for that when it lands on trunk.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}, 
> aarch64-unknown-linux-gnu{-m32,}.
>
> Ok for trunk?

OK.

Richard.

> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vect_analyze_loop_costing): Enable
> vectorization for LOOP_VINFO_PEELING_FOR_NITER in very cheap
> cost model.
> (vect_analyze_loop): Disable epilogue vectorization in very
> cheap cost model.
> * doc/invoke.texi: Adjust documents for very-cheap cost model.
> ---
>  gcc/doc/invoke.texi   | 11 ---
>  gcc/tree-vect-loop.cc |  6 +++---
>  2 files changed, 7 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index b2f16b45eaf..edcadeb108a 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -14309,13 +14309,10 @@ counts that will likely execute faster than when 
> executing the original
>  scalar loop.  The @samp{cheap} model disables vectorization of
>  loops where doing so would be cost prohibitive for example due to
>  required runtime checks for data dependence or alignment but otherwise
> -is equal to the @samp{dynamic} model.  The @samp{very-cheap} model only
> -allows vectorization if the vector code would entirely replace the
> -scalar code that is being vectorized.  For example, if each iteration
> -of a vectorized loop would only be able to handle exactly four iterations
> -of the scalar loop, the @samp{very-cheap} model would only allow
> -vectorization if the scalar iteration count is known to be a multiple
> -of four.
> +is equal to the @samp{dynamic} model.  The @samp{very-cheap} model disables
> +vectorization of loops when any runtime check for data dependence or 
> alignment
> +is required, it also disables vectorization of epilogue loops but otherwise 
> is
> +equal to the @samp{cheap} model.
>
>  The default cost model depends on other optimization flags and is
>  either @samp{dynamic} or @samp{cheap}.
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 6933f597b4d..a76d3b8ea5f 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -2375,8 +2375,7 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo,
>   a copy of the scalar code (even if we might be able to vectorize it).  
> */
>if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
>&& (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
> - || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
> - || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
> + || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)))
>  {
>if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -3681,7 +3680,8 @@ vect_analyze_loop (class loop *loop, gimple 
> *loop_vectorized_call,
>/* No code motion support for multiple epilogues 
> so for now
>   not supported when multiple exits.  */
>  && !LOOP_VINFO_EARLY_BREAKS (first_loop_vinfo)
> -&& !loop->simduid);
> +&& !loop->simduid
> +&& loop_cost_model (loop) > 
> VECT_COST_MODEL_VERY_CHEAP);
>if (!vect_epilogues)
>  return first_loop_vinfo;
>
> --
> 2.31.1
>


Re: [PATCH v3 2/2] Adjust testcase after relax O2 vectorization.

2024-10-09 Thread Richard Biener
On Wed, Oct 9, 2024 at 3:27 AM liuhongt  wrote:
>
> Update in V3.
> >The testcase looks bogus:
> >
> >   b[i+k] = b[i+k-5] + 2;
> >
> >accesses b[-3], can you instead adjust the inner loop to start with k == 4?
>
> Changed, also adjust b[100] to b[200] to avoid array out of bound.
>
> >Please remove this testcase - even with fully masking we'd need alias
> >versioning.
>
> Changed.
>
> Ready push to trunk.

OK.

Thanks,
Richard.

> gcc/testsuite/ChangeLog:
>
> * gcc.dg/fstack-protector-strong.c: Adjust
> scan-assembler-times.
> * gcc.dg/graphite/scop-6.c: Refine the testcase to avoid array
> out of bounds.
> * gcc.dg/graphite/scop-9.c: Ditto.
> * gcc.dg/tree-ssa/ivopts-lt-2.c: Add -fno-tree-vectorize.
> * gcc.dg/tree-ssa/ivopts-lt.c: Ditto.
> * gcc.dg/tree-ssa/loop-16.c: Ditto.
> * gcc.dg/tree-ssa/loop-28.c: Ditto.
> * gcc.dg/tree-ssa/loop-bound-2.c: Ditto.
> * gcc.dg/tree-ssa/loop-bound-4.c: Ditto.
> * gcc.dg/tree-ssa/loop-bound-6.c: Ditto.
> * gcc.dg/tree-ssa/predcom-4.c: Ditto.
> * gcc.dg/tree-ssa/predcom-5.c: Ditto.
> * gcc.dg/tree-ssa/scev-11.c: Ditto.
> * gcc.dg/tree-ssa/scev-9.c: Ditto.
> * gcc.dg/tree-ssa/split-path-11.c: Ditto.
> * gcc.dg/unroll-8.c: Ditto.
> * gcc.dg/var-expand1.c: Ditto.
> * gcc.dg/vect/vect-cost-model-6.c: Removed.
> * gcc.target/i386/pr86270.c: Ditto.
> * gcc.target/i386/pr86722.c: Ditto.
> * gcc.target/x86_64/abi/callabi/leaf-2.c: Ditto.
> ---
>  gcc/testsuite/gcc.dg/fstack-protector-strong.c   |  2 +-
>  gcc/testsuite/gcc.dg/graphite/scop-6.c   |  7 +++
>  gcc/testsuite/gcc.dg/graphite/scop-9.c   |  4 ++--
>  gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c  |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c|  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/loop-16.c  |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/loop-28.c  |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/loop-bound-2.c |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/loop-bound-4.c |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/loop-bound-6.c |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/predcom-4.c|  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/predcom-5.c|  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/scev-11.c  |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/scev-9.c   |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/split-path-11.c|  2 +-
>  gcc/testsuite/gcc.dg/unroll-8.c  |  3 +--
>  gcc/testsuite/gcc.dg/var-expand1.c   |  2 +-
>  gcc/testsuite/gcc.dg/vect/vect-cost-model-6.c| 12 
>  gcc/testsuite/gcc.target/i386/pr86270.c  |  2 +-
>  gcc/testsuite/gcc.target/i386/pr86722.c  |  2 +-
>  gcc/testsuite/gcc.target/x86_64/abi/callabi/leaf-2.c |  2 +-
>  21 files changed, 23 insertions(+), 37 deletions(-)
>  delete mode 100644 gcc/testsuite/gcc.dg/vect/vect-cost-model-6.c
>
> diff --git a/gcc/testsuite/gcc.dg/fstack-protector-strong.c 
> b/gcc/testsuite/gcc.dg/fstack-protector-strong.c
> index 94dc3508f1a..b9f63966b7c 100644
> --- a/gcc/testsuite/gcc.dg/fstack-protector-strong.c
> +++ b/gcc/testsuite/gcc.dg/fstack-protector-strong.c
> @@ -154,4 +154,4 @@ void foo12 ()
>global3 ();
>  }
>
> -/* { dg-final { scan-assembler-times "stack_chk_fail" 12 } } */
> +/* { dg-final { scan-assembler-times "stack_chk_fail" 11 } } */
> diff --git a/gcc/testsuite/gcc.dg/graphite/scop-6.c 
> b/gcc/testsuite/gcc.dg/graphite/scop-6.c
> index 9bc1d9f4ccd..e7e0a080c5f 100644
> --- a/gcc/testsuite/gcc.dg/graphite/scop-6.c
> +++ b/gcc/testsuite/gcc.dg/graphite/scop-6.c
> @@ -4,7 +4,7 @@ int toto()
>  {
>int i, j, k;
>int a[100][100];
> -  int b[100];
> +  int b[200];
>
>for (i = 1; i < 100; i++)
>  {
> @@ -18,9 +18,8 @@ int toto()
>  for (k = 1; k < 100; k++)
>b[i+k] = b[i+k-1] + 2;
>  }
> -
> -  for (k = 1; k < 100; k++)
> -b[i+k] = b[i+k-5] + 2;
> +  for (k = 4; k < 100; k++)
> +   b[i+k] = b[i+k-5] + 2;
>  }
>
>return a[3][5] + b[2];
> diff --git a/gcc/testsuite/gcc.dg/graphite/scop-9.c 
> b/gcc/testsuite/gcc.dg/graphite/scop-9.c
> index b19291be2f8..2676452b1e6 100644
> --- a/gcc/testsuite/gcc.dg/graphite/scop-9.c
> +++ b/gcc/testsuite/gcc.dg/graphite/scop-9.c
> @@ -4,7 +4,7 @@ int toto()
>  {
>int i, j, k;
>int a[100][100];
> -  int b[100];
> +  int b[200];
>
>for (i = 1; i < 100; i++)
>  {
> @@ -14,7 +14,7 @@ int toto()
>if (i * 2 == i + 8)
> a[i][i] = 2;
>
> -  for (k = 1; k < 100; k++)
> +  for (k = 4; k < 100; k++)
>  b[i+k] = b[i+k-5] + 2;
>  }
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c
> index bdbdbff19ff..be325775fbb 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c
> +++ b/

[PATCH][aarch64][libstdc++] Use shufflevector instead of shuffle in opt_random.h

2024-10-09 Thread Ricardo Jesus

This patch modifies the implementation of the vectorized Mersenne
Twister random number generator to use __builtin_shufflevector instead
of __builtin_shuffle. This makes it (almost) compatible with Clang.

To make the implementation fully compatible with Clang, Clang will need
to support internal Neon types like __Uint8x16_t and __Uint32x4_t, which
currently it does not. This looks like an oversight in Clang and so will
be addressed separately.

I see no codegen change with this patch.

Bootstrapped and tested on aarch64-none-linux-gnu.

Signed-off-by: Ricardo Jesus 

2024-09-05  Ricardo Jesus  

* config/cpu/aarch64/opt/ext/opt_random.h (__VEXT): Replace uses
of __builtin_shuffle with __builtin_shufflevector.
(__aarch64_lsl_128): Move shift amount to a template parameter.
(__aarch64_lsr_128): Move shift amount to a template parameter.
(__aarch64_recursion): Update call sites of __aarch64_lsl_128
and __aarch64_lsr_128.
---
 .../config/cpu/aarch64/opt/ext/opt_random.h   | 28 +++
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h 
b/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h

index 7f756d1572f..7eb816abcd0 100644
--- a/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h
+++ b/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h
@@ -35,13 +35,13 @@
 #ifdef __ARM_NEON

 #ifdef __ARM_BIG_ENDIAN
-# define __VEXT(_A,_B,_C) __builtin_shuffle (_A, _B, (__Uint8x16_t) \
-{16-_C, 17-_C, 18-_C, 19-_C, 20-_C, 21-_C, 22-_C, 23-_C, \
- 24-_C, 25-_C, 26-_C, 27-_C, 28-_C, 29-_C, 30-_C, 31-_C})
+# define __VEXT(_A,_B,_C) __builtin_shufflevector (_A, _B, \
+16-_C, 17-_C, 18-_C, 19-_C, 20-_C, 21-_C, 22-_C, 23-_C, \
+24-_C, 25-_C, 26-_C, 27-_C, 28-_C, 29-_C, 30-_C, 31-_C)
 #else
-# define __VEXT(_A,_B,_C) __builtin_shuffle (_B, _A, (__Uint8x16_t) \
-{_C, _C+1, _C+2, _C+3, _C+4, _C+5, _C+6, _C+7, \
- _C+8, _C+9, _C+10, _C+11, _C+12, _C+13, _C+14, _C+15})
+# define __VEXT(_A,_B,_C) __builtin_shufflevector (_B, _A, \
+_C, _C+1, _C+2, _C+3, _C+4, _C+5, _C+6, _C+7, \
+_C+8, _C+9, _C+10, _C+11, _C+12, _C+13, _C+14, _C+15)
 #endif

 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
@@ -52,9 +52,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   namespace {
 // Logical Shift right 128-bits by c * 8 bits

-__extension__ extern __inline __Uint32x4_t
+__extension__
+template
+extern __inline __Uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__aarch64_lsr_128 (__Uint8x16_t __a, __const int __c)
+__aarch64_lsr_128 (__Uint8x16_t __a)
 {
   const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0};
@@ -64,9 +66,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION

 // Logical Shift left 128-bits by c * 8 bits

-__extension__ extern __inline __Uint32x4_t
+__extension__
+template
+extern __inline __Uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__aarch64_lsl_128 (__Uint8x16_t __a, __const int __c)
+__aarch64_lsl_128 (__Uint8x16_t __a)
 {
   const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0};
@@ -82,14 +86,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __Uint32x4_t __e)
 {
   __Uint32x4_t __y = (__b >> __sr1);
-  __Uint32x4_t __z = __aarch64_lsr_128 ((__Uint8x16_t) __c, __sr2);
+  __Uint32x4_t __z = __aarch64_lsr_128<__sr2> ((__Uint8x16_t) __c);

   __Uint32x4_t __v = __d << __sl1;

   __z = __z ^ __a;
   __z = __z ^ __v;

-  __Uint32x4_t __x = __aarch64_lsl_128 ((__Uint8x16_t) __a, __sl2);
+  __Uint32x4_t __x = __aarch64_lsl_128<__sl2> ((__Uint8x16_t) __a);

   __y = __y & __e;
   __z = __z ^ __x;
--
2.44.0


RE: [PATCH] aarch64: Fix folding of degenerate svwhilele case [PR117045]

2024-10-09 Thread Tamar Christina
Hi Richard,

> -Original Message-
> From: Richard Sandiford 
> Sent: Wednesday, October 9, 2024 12:58 PM
> To: gcc-patches@gcc.gnu.org
> Cc: ktkac...@nvidia.com; Richard Earnshaw ;
> Tamar Christina 
> Subject: [PATCH] aarch64: Fix folding of degenerate svwhilele case [PR117045]
> 
> The svwhilele folder mishandled the degenerate case in which
> the second argument is the maximum integer.  In that case,
> the result is all-true regardless of the first parameter:
> 
>   If the second scalar operand is equal to the maximum signed integer
>   value then a condition which includes an equality test can never fail
>   and the result will be an all-true predicate.
> 
> This is because the conceptual "increment the first operand
> by 1 after each element" is done modulo the range of the operand.
> The GCC code was instead treating it as infinite precision.
> whilele_5.c even had a test for the incorrect behaviour.
> 
> The easiest fix seemed to be to handle that case specially before
> doing constant folding.  This also copes with variable first operands.
> 
> Tested on aarch64-linux-gnu.  I'll push on Friday if there are no
> comments before then.  Since it's a wrong-code bug, I'd also like
> to backport to release branches.
> 
> Thanks,
> Richard
> 
> 
> gcc/
>   PR target/116999
>   PR target/117045
>   * config/aarch64/aarch64-sve-builtins-base.cc
>   (svwhilelx_impl::fold): Check for WHILELTs of the minimum value
>   and WHILELEs of the maximum value.  Fold them to all-false and
>   all-true respectively.
> 
> gcc/testsuite/
>   PR target/116999
>   PR target/117045
>   * gcc.target/aarch64/sve/acle/general/whilele_5.c: Fix bogus
>   expected result.
>   * gcc.target/aarch64/sve/acle/general/whilele_11.c: New test.
>   * gcc.target/aarch64/sve/acle/general/whilele_12.c: Likewise.
> ---
>  .../aarch64/aarch64-sve-builtins-base.cc  | 11 +-
>  .../aarch64/sve/acle/general/whilele_11.c | 31 +
>  .../aarch64/sve/acle/general/whilele_12.c | 34 +++
>  .../aarch64/sve/acle/general/whilele_5.c  |  2 +-
>  4 files changed, 76 insertions(+), 2 deletions(-)
>  create mode 100644
> gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
>  create mode 100644
> gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_12.c
> 
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> index 4b33585d981..3d0975e4294 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> @@ -2945,7 +2945,9 @@ public:
>  : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
>{}
> 
> -  /* Try to fold a call by treating its arguments as constants of type T.  */
> +  /* Try to fold a call by treating its arguments as constants of type T.
> + We have already filtered out the degenerate cases of X .LT. MIN
> + and X .LE. MAX.  */
>template
>gimple *
>fold_type (gimple_folder &f) const
> @@ -3001,6 +3003,13 @@ public:
>  if (f.vectors_per_tuple () > 1)
>return nullptr;
> 
> +/* Filter out cases where the condition is always true or always false.  
> */
> +tree arg1 = gimple_call_arg (f.call, 1);
> +if (!m_eq_p && operand_equal_p (arg1, TYPE_MIN_VALUE (TREE_TYPE
> (arg1
> +  return f.fold_to_pfalse ();

Just a quick question for my own understanding, I assume the reason MIN
is handled here is because fold_type will decrement the value at some point?

Otherwise wouldn't MIN + 1 still fit inside the type's precision?

FWIW patch looks good to me, just wondering why the MIN case is needed :)

Cheers,
Tamar

> +if (m_eq_p && operand_equal_p (arg1, TYPE_MAX_VALUE (TREE_TYPE
> (arg1
> +  return f.fold_to_ptrue ();
> +
>  if (f.type_suffix (1).unsigned_p)
>return fold_type (f);
>  else
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
> new file mode 100644
> index 000..2be9dc5c534
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include 
> +#include 
> +
> +svbool_t
> +f1 (volatile int32_t *ptr)
> +{
> +  return svwhilelt_b8_s32 (*ptr, INT32_MIN);
> +}
> +
> +svbool_t
> +f2 (volatile uint32_t *ptr)
> +{
> +  return svwhilelt_b16_u32 (*ptr, 0);
> +}
> +
> +svbool_t
> +f3 (volatile int64_t *ptr)
> +{
> +  return svwhilelt_b32_s64 (*ptr, INT64_MIN);
> +}
> +
> +svbool_t
> +f4 (volatile uint64_t *ptr)
> +{
> +  return svwhilelt_b64_u64 (*ptr, 0);
> +}
> +
> +/* { dg-final { scan-assembler-times {\tpfalse\tp[0-9]+\.b\n} 4 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_12.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_12.c
> new file mode 100644
> index 00

Re: [PATCH] tree-optimization/117000 - elide .REDUC_IOR with compare against zero

2024-10-09 Thread Uros Bizjak
On Tue, Oct 8, 2024 at 10:19 AM Richard Biener  wrote:
>
> The following adds a pattern to elide a .REDUC_IOR operation when
> the result is compared against zero with a cbranch.  I've resorted
> to using can_compare_p since that's what RTL expansion eventually
> checks - while GIMPLE allowed whole vector equality compares for long
> I'll notice vector lowering won't lower unsupported ones and RTL
> expansion doesn't seem to try using [u]cmp optabs
> (and neither x86 nor aarch64 implements those).  There's cstore
> but no target implements that for vector modes either.
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> If anybody has suggestions on how to better guard the pattern I'm
> all ears.
>
> Thanks,
> Richard.
>
> PR tree-optimization/117000
> * match.pd (.REDUC_IOR !=/== 0): New pattern.
> * gimple-match-head.cc: Include memmodel.h and optabs.h.
> * generic-match-head.cc: Likewise.
>
> * gcc.target/i386/pr117000.c: New testcase.
> ---
>  gcc/generic-match-head.cc|  2 ++
>  gcc/gimple-match-head.cc |  2 ++
>  gcc/match.pd |  9 +
>  gcc/testsuite/gcc.target/i386/pr117000.c | 13 +
>  4 files changed, 26 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr117000.c
>
> diff --git a/gcc/generic-match-head.cc b/gcc/generic-match-head.cc
> index 42dee626613..7d7e2a9f792 100644
> --- a/gcc/generic-match-head.cc
> +++ b/gcc/generic-match-head.cc
> @@ -35,6 +35,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "builtins.h"
>  #include "case-cfn-macros.h"
>  #include "gimplify.h"
> +#include "memmodel.h"
> +#include "optabs.h"
>  #include "optabs-tree.h"
>  #include "dbgcnt.h"
>  #include "tm.h"
> diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
> index 4147a0eb38a..b9d5f751b7c 100644
> --- a/gcc/gimple-match-head.cc
> +++ b/gcc/gimple-match-head.cc
> @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "internal-fn.h"
>  #include "case-cfn-macros.h"
>  #include "gimplify.h"
> +#include "memmodel.h"
> +#include "optabs.h"
>  #include "optabs-tree.h"
>  #include "tree-eh.h"
>  #include "dbgcnt.h"
> diff --git a/gcc/match.pd b/gcc/match.pd
> index ba83f0f29e6..c2efebd98bb 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -10332,6 +10332,15 @@ and,
>(simplify (reduc (op @0 VECTOR_CST@1))
>  (op (reduc:type @0) (reduc:type @1
>
> +/* Simplify .REDUC_IOR (@0) ==/!= 0 to @0 ==/!= 0.  */
> +(for cmp (eq ne)
> + (simplify
> +  (cmp (IFN_REDUC_IOR @0) integer_zerop)
> +  (if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (@0)))
> +   && can_compare_p (cmp == EQ_EXPR ? EQ : NE, TYPE_MODE (TREE_TYPE 
> (@0)),
> +ccp_jump))
> +   (cmp @0 { build_zero_cst (TREE_TYPE (@0)); }
> +
>  /* Simplify vector floating point operations of alternating sub/add pairs
> into using an fneg of a wider element type followed by a normal add.
> under IEEE 754 the fneg of the wider type will negate every even entry
> diff --git a/gcc/testsuite/gcc.target/i386/pr117000.c 
> b/gcc/testsuite/gcc.target/i386/pr117000.c
> new file mode 100644
> index 000..04f94344eb1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr117000.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4.1" { target sse4 } } */

FYI, you don't have to guard compile tests with ISA target selectors.
This is intended for assembly and execute tests, where assembler can't
handle instructions from selected ISA.

Uros.


[PATCH v3] RISC-V: Optimize branches with shifted immediate operands

2024-10-09 Thread Jovan Vukic
After the valuable feedback I received, it’s clear to me that the
oversight was in the tests showing the benefits of the patch. In the
test file, I added functions f5 and f6, which now generate more
efficient code with fewer instructions.

Before the patch:

f5:
li  a4,2097152
addia4,a4,-2048
li  a5,1167360
and a0,a0,a4
addia5,a5,-2048
beq a0,a5,.L4

f6:
li  a5,3407872
addia5,a5,-2048
and a0,a0,a5
li  a5,1114112
beq a0,a5,.L7

After the patch:

f5:
srlia5,a0,11
andia5,a5,1023
li  a4,569
beq a5,a4,.L5

f6:
srlia5,a0,11
andia5,a5,1663
li  a4,544
beq a5,a4,.L9

2024-10-09  Jovan Vukic  

PR target/115921

gcc/ChangeLog:

* config/riscv/iterators.md (any_eq): New code iterator.
* config/riscv/riscv.h (COMMON_TRAILING_ZEROS): New macro.
(SMALL_AFTER_COMMON_TRAILING_SHIFT): Ditto.
* config/riscv/riscv.md 
(*branch_shiftedarith__shifted):
New pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/branch-1.c: Additional tests.

CONFIDENTIALITY: The contents of this e-mail are confidential and intended only 
for the above addressee(s). If you are not the intended recipient, or the 
person responsible for delivering it to the intended recipient, copying or 
delivering it to anyone else or using it in any unauthorized manner is 
prohibited and may be unlawful. If you receive this e-mail by mistake, please 
notify the sender and the systems administrator at straym...@rt-rk.com 
immediately.
---
 gcc/config/riscv/iterators.md |  4 +++
 gcc/config/riscv/riscv.h  | 12 +
 gcc/config/riscv/riscv.md | 32 +++
 gcc/testsuite/gcc.target/riscv/branch-1.c | 18 ++---
 4 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 872c542e906..081659499a9 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -233,6 +233,8 @@
 (define_code_iterator any_ge [ge geu])
 (define_code_iterator any_lt [lt ltu])
 (define_code_iterator any_le [le leu])
+(define_code_iterator any_eq [eq ne])
+
 ;; Iterators for conditions we can emit a sCC against 0 or a reg directly
 (define_code_iterator scc_0  [eq ne gt gtu])
 
@@ -285,6 +287,8 @@
 (le "le")
 (gt "gt")
 (lt "lt")
+(eq "eq")
+(ne "ne")
 (ior "ior")
 (xor "xor")
 (and "and")
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 53b7b2a40ed..ca1b8329cdc 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -667,6 +667,18 @@ enum reg_class
 /* True if bit BIT is set in VALUE.  */
 #define BITSET_P(VALUE, BIT) (((VALUE) & (1ULL << (BIT))) != 0)
 
+/* Returns the smaller (common) number of trailing zeros for VAL1 and VAL2.  */
+#define COMMON_TRAILING_ZEROS(VAL1, VAL2)  \
+  (ctz_hwi (VAL1) < ctz_hwi (VAL2) \
+   ? ctz_hwi (VAL1)\
+   : ctz_hwi (VAL2))
+
+/* Returns true if both VAL1 and VAL2 are SMALL_OPERANDs after shifting by
+   the common number of trailing zeros.  */
+#define SMALL_AFTER_COMMON_TRAILING_SHIFT(VAL1, VAL2)  \
+  (SMALL_OPERAND ((VAL1) >> COMMON_TRAILING_ZEROS (VAL1, VAL2))
\
+   && SMALL_OPERAND ((VAL2) >> COMMON_TRAILING_ZEROS (VAL1, VAL2)))
+
 /* Stack layout; function entry, exit and calling.  */
 
 #define STACK_GROWS_DOWNWARD 1
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 688c07df46c..78112afbb26 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3129,6 +3129,38 @@
 }
 [(set_attr "type" "branch")])
 
+(define_insn_and_split "*branch_shiftedarith__shifted"
+  [(set (pc)
+   (if_then_else (any_eq
+   (and:ANYI (match_operand:ANYI 1 "register_operand" "r")
+ (match_operand 2 "shifted_const_arith_operand" "i"))
+   (match_operand 3 "shifted_const_arith_operand" "i"))
+(label_ref (match_operand 0 "" ""))
+(pc)))
+   (clobber (match_scratch:X 4 "=&r"))
+   (clobber (match_scratch:X 5 "=&r"))]
+  "!SMALL_OPERAND (INTVAL (operands[2]))
+&& !SMALL_OPERAND (INTVAL (operands[3]))
+&& SMALL_AFTER_COMMON_TRAILING_SHIFT (INTVAL (operands[2]),
+INTVAL (operands[3]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (lshiftrt:X (match_dup 1) (match_dup 7)))
+   (set (match_dup 4) (and:X (match_dup 4) (match_dup 8)))
+   (set (match_dup 5) (match_dup 9))
+   (set (pc) (if_then_else (

Re: [PATCH] testsuite: Define missing and use ET for arm_arch_* and arm_cpu_*

2024-10-09 Thread Torbjorn SVENSSON




On 2024-10-09 12:18, Richard Earnshaw (lists) wrote:

On 07/10/2024 20:04, Torbjorn SVENSSON wrote:

Hi Richard,

On 2024-10-07 12:45, Richard Earnshaw (lists) wrote:

On 07/10/2024 09:03, Torbjörn SVENSSON wrote:

Ok for trunk?

--

Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.


The acronym ET isn't one I recognize - I'm guessing you intend it to 
be Effective Target, rather than Extra Terrestrial, or Elf Target or 
some other expansion?  I think perhaps it would be better to avoid 
this in the commit log.  Your summary line is also a little imprecise 
as I suspect we will have more patches of a similar nature for some 
other patches soon.  Something like:


testsuite: arm: use effective-target for vsel* and mod* tests

would be closer


I'm fairly certain that I've seen the abbr ET for effective-target 
somewhere, but I could be wrong. Anyway, I've used your suggestion and 
will push it as soon as I get a comment on my questions below.






gcc/testsuite/ChangeLog

* gcc.target/arm/pr65647.c: Use ET arm_arch_v6m.
* gcc.target/arm/mod_2.c: Use ET arm_cpu_cortex_a57.
* gcc.target/arm/mod_256.c: Likewise.
* gcc.target/arm/vseleqdf.c: Likewise.
* gcc.target/arm/vseleqsf.c: Likewise.
* gcc.target/arm/vselgedf.c: Likewise.
* gcc.target/arm/vselgesf.c: Likewise.
* gcc.target/arm/vselgtdf.c: Likewise.
* gcc.target/arm/vselgtsf.c: Likewise.
* gcc.target/arm/vselledf.c: Likewise.
* gcc.target/arm/vsellesf.c: Likewise.
* gcc.target/arm/vselltdf.c: Likewise.
* gcc.target/arm/vselltsf.c: Likewise.
* gcc.target/arm/vselnedf.c: Likewise.
* gcc.target/arm/vselnesf.c: Likewise.
* gcc.target/arm/vselvcdf.c: Likewise.
* gcc.target/arm/vselvcsf.c: Likewise.
* gcc.target/arm/vselvsdf.c: Likewise.
* gcc.target/arm/vselvssf.c: Likewise.
* lib/target-supports.exp: Define EF arm_cpu_cortex_a57.  Update ET

   ^^
Typo for ET?


Yes :S



The body of the patch is OK with an updated commit message.

Thanks.
R.



arm_v8_1_lob_ok to use -mcpu=unset.

Signed-off-by: Torbjörn SVENSSON 
---
  gcc/testsuite/gcc.target/arm/mod_2.c    | 4 +++-
  gcc/testsuite/gcc.target/arm/mod_256.c  | 4 +++-
  gcc/testsuite/gcc.target/arm/pr65647.c  | 3 ++-
  gcc/testsuite/gcc.target/arm/vseleqdf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vseleqsf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselgedf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselgesf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselgtdf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselgtsf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselledf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vsellesf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselltdf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselltsf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselnedf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselnesf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselvcdf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselvcsf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselvsdf.c | 5 +++--
  gcc/testsuite/gcc.target/arm/vselvssf.c | 5 +++--
  gcc/testsuite/lib/target-supports.exp   | 3 ++-
  20 files changed, 58 insertions(+), 36 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c b/gcc/testsuite/ 
gcc.target/arm/mod_2.c

index 1143725d59a..3a203b67d73 100644
--- a/gcc/testsuite/gcc.target/arm/mod_2.c
+++ b/gcc/testsuite/gcc.target/arm/mod_2.c
@@ -1,7 +1,9 @@
  /* { dg-do compile } */
  /* { dg-skip-if "-mpure-code supports M-profile only" { *-*-* } 
{ "-mpure-code" } } */

  /* { dg-require-effective-target arm32 } */
-/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+/* { dg-require-effective-target arm_cpu_cortex_a57 } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_cpu_cortex_a57 } */
  #include "../aarch64/mod_2.x"
diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c b/gcc/testsuite/ 
gcc.target/arm/mod_256.c

index d8dca0fe7d5..3521d7a05f3 100644
--- a/gcc/testsuite/gcc.target/arm/mod_256.c
+++ b/gcc/testsuite/gcc.target/arm/mod_256.c
@@ -1,7 +1,9 @@
  /* { dg-do compile } */
  /* { dg-skip-if "-mpure-code supports M-profile only" { *-*-* } 
{ "-mpure-code" } } */

  /* { dg-require-effective-target arm32 } */
-/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+/* { dg-require-effective-target arm_cpu_cortex_a57 } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_cpu_cortex_a57 } */
  #include "../aarch64/mod_256.x"
diff --git a/gcc/testsuite/gcc.target/arm/pr65647.c b/gcc/testsuite/ 
gcc.target/arm/pr65647.c

index 26b4e399f6b..dc3a3ca1184 100644
--- a/gcc/testsuite/gcc.target/arm/pr65647.c
+++ b/gcc/testsuite/gcc.target/arm/pr65647.c
@@ -1,7 +1,8 @@
  /* { dg-do compile } */
  /* { dg-require-effective-target arm_arch_v6m_ok } */
  /* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "- 
mfloat-abi=*" } {"-mfloat-abi=soft" } } */

-/* { dg-options "-march=armv6-m -mthumb 

[PATCH] aarch64: Fix folding of degenerate svwhilele case [PR117045]

2024-10-09 Thread Richard Sandiford
The svwhilele folder mishandled the degenerate case in which
the second argument is the maximum integer.  In that case,
the result is all-true regardless of the first parameter:

  If the second scalar operand is equal to the maximum signed integer
  value then a condition which includes an equality test can never fail
  and the result will be an all-true predicate.

This is because the conceptual "increment the first operand
by 1 after each element" is done modulo the range of the operand.
The GCC code was instead treating it as infinite precision.
whilele_5.c even had a test for the incorrect behaviour.

The easiest fix seemed to be to handle that case specially before
doing constant folding.  This also copes with variable first operands.

Tested on aarch64-linux-gnu.  I'll push on Friday if there are no
comments before then.  Since it's a wrong-code bug, I'd also like
to backport to release branches.

Thanks,
Richard


gcc/
PR target/116999
PR target/117045
* config/aarch64/aarch64-sve-builtins-base.cc
(svwhilelx_impl::fold): Check for WHILELTs of the minimum value
and WHILELEs of the maximum value.  Fold them to all-false and
all-true respectively.

gcc/testsuite/
PR target/116999
PR target/117045
* gcc.target/aarch64/sve/acle/general/whilele_5.c: Fix bogus
expected result.
* gcc.target/aarch64/sve/acle/general/whilele_11.c: New test.
* gcc.target/aarch64/sve/acle/general/whilele_12.c: Likewise.
---
 .../aarch64/aarch64-sve-builtins-base.cc  | 11 +-
 .../aarch64/sve/acle/general/whilele_11.c | 31 +
 .../aarch64/sve/acle/general/whilele_12.c | 34 +++
 .../aarch64/sve/acle/general/whilele_5.c  |  2 +-
 4 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_12.c

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 4b33585d981..3d0975e4294 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -2945,7 +2945,9 @@ public:
 : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
   {}
 
-  /* Try to fold a call by treating its arguments as constants of type T.  */
+  /* Try to fold a call by treating its arguments as constants of type T.
+ We have already filtered out the degenerate cases of X .LT. MIN
+ and X .LE. MAX.  */
   template
   gimple *
   fold_type (gimple_folder &f) const
@@ -3001,6 +3003,13 @@ public:
 if (f.vectors_per_tuple () > 1)
   return nullptr;
 
+/* Filter out cases where the condition is always true or always false.  */
+tree arg1 = gimple_call_arg (f.call, 1);
+if (!m_eq_p && operand_equal_p (arg1, TYPE_MIN_VALUE (TREE_TYPE (arg1
+  return f.fold_to_pfalse ();
+if (m_eq_p && operand_equal_p (arg1, TYPE_MAX_VALUE (TREE_TYPE (arg1
+  return f.fold_to_ptrue ();
+
 if (f.type_suffix (1).unsigned_p)
   return fold_type (f);
 else
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
new file mode 100644
index 000..2be9dc5c534
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_11.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include 
+#include 
+
+svbool_t
+f1 (volatile int32_t *ptr)
+{
+  return svwhilelt_b8_s32 (*ptr, INT32_MIN);
+}
+
+svbool_t
+f2 (volatile uint32_t *ptr)
+{
+  return svwhilelt_b16_u32 (*ptr, 0);
+}
+
+svbool_t
+f3 (volatile int64_t *ptr)
+{
+  return svwhilelt_b32_s64 (*ptr, INT64_MIN);
+}
+
+svbool_t
+f4 (volatile uint64_t *ptr)
+{
+  return svwhilelt_b64_u64 (*ptr, 0);
+}
+
+/* { dg-final { scan-assembler-times {\tpfalse\tp[0-9]+\.b\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_12.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_12.c
new file mode 100644
index 000..713065c3145
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_12.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include 
+#include 
+
+svbool_t
+f1 (volatile int32_t *ptr)
+{
+  return svwhilele_b8_s32 (*ptr, INT32_MAX);
+}
+
+svbool_t
+f2 (volatile uint32_t *ptr)
+{
+  return svwhilele_b16_u32 (*ptr, UINT32_MAX);
+}
+
+svbool_t
+f3 (volatile int64_t *ptr)
+{
+  return svwhilele_b32_s64 (*ptr, INT64_MAX);
+}
+
+svbool_t
+f4 (volatile uint64_t *ptr)
+{
+  return svwhilele_b64_u64 (*ptr, UINT64_MAX);
+}
+
+/* { dg-final { scan-assembler {\tptrue\tp[0-9]+\.b(?:, all)\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp[0-9]+\.h(?:, all)\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp[0-9]+\.s(?:, all)\n} } } */
+/* { dg-final { scan-asse

[PATCH] tree-optimization/117041 - fix load classification of former grouped load

2024-10-09 Thread Richard Biener
When we first detect a grouped load but later dis-associate it we
only set DR_GROUP_FIRST_ELEMENT to NULL, indicating it is not a
STMT_VINFO_GROUPED_ACCESS but leave DR_GROUP_NEXT_ELEMENT set.  This
causes a stray DR_GROUP_NEXT_ELEMENT access in get_group_load_store_type
to go wrong, indicating a load isn't single_element_p when it actually
is, leading to wrong classification and an ICE.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/117041
* tree-vect-stmts.cc (get_group_load_store_type): Only
check DR_GROUP_NEXT_ELEMENT for STMT_VINFO_GROUPED_ACCESS.

* gcc.dg/torture/pr117041.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr117041.c | 10 ++
 gcc/tree-vect-stmts.cc  |  6 --
 2 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr117041.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr117041.c 
b/gcc/testsuite/gcc.dg/torture/pr117041.c
new file mode 100644
index 000..09dbbf4c00f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr117041.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+
+unsigned short a;
+int b, c[7][6];
+int main() {
+  for (a = 0; a < 6; a++)
+for (b = 5; b; b--)
+  c[a][b] = c[a+1][b];
+  return 0;
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 43358767934..ad4a3141ab8 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1991,21 +1991,23 @@ get_group_load_store_type (vec_info *vinfo, 
stmt_vec_info stmt_info,
   stmt_vec_info first_stmt_info;
   unsigned int group_size;
   unsigned HOST_WIDE_INT gap;
+  bool single_element_p;
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
 {
   first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   group_size = DR_GROUP_SIZE (first_stmt_info);
   gap = DR_GROUP_GAP (first_stmt_info);
+  single_element_p = (stmt_info == first_stmt_info
+ && !DR_GROUP_NEXT_ELEMENT (stmt_info));
 }
   else
 {
   first_stmt_info = stmt_info;
   group_size = 1;
   gap = 0;
+  single_element_p = true;
 }
   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
-  bool single_element_p = (stmt_info == first_stmt_info
-  && !DR_GROUP_NEXT_ELEMENT (stmt_info));
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
 
   /* True if the vectorized statements would access beyond the last
-- 
2.43.0


[PATCH] Clear DR_GROUP_NEXT_ELEMENT upon group dissolving

2024-10-09 Thread Richard Biener
I've tried to sanitize DR_GROUP_NEXT_ELEMENT accesses but there are too
many so the following instead makes sure DR_GROUP_NEXT_ELEMENT is never
non-NULL for !STMT_VINFO_GROUPED_ACCESS.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-vect-data-refs.cc (vect_analyze_data_ref_access): When
cancelling a DR group also clear DR_GROUP_NEXT_ELEMENT.
---
 gcc/tree-vect-data-refs.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 920e3c120a6..202af7a8952 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -3206,6 +3206,7 @@ vect_analyze_data_ref_access (vec_info *vinfo, 
dr_vec_info *dr_info)
   if (loop_vinfo && integer_zerop (step))
 {
   DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
+  DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
   if (!nested_in_vect_loop_p (loop, stmt_info))
return DR_IS_READ (dr);
   /* Allow references with zero step for outer loops marked
@@ -3225,6 +3226,7 @@ vect_analyze_data_ref_access (vec_info *vinfo, 
dr_vec_info *dr_info)
   /* Interleaved accesses are not yet supported within outer-loop
 vectorization for references in the inner-loop.  */
   DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
+  DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
 
   /* For the rest of the analysis we use the outer-loop step.  */
   step = STMT_VINFO_DR_STEP (stmt_info);
@@ -3247,6 +3249,7 @@ vect_analyze_data_ref_access (vec_info *vinfo, 
dr_vec_info *dr_info)
{
  /* Mark that it is not interleaving.  */
  DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
+ DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
  return true;
}
 }
-- 
2.43.0


RE: [PATCH v5] gcc, libcpp: Add warning switch for "#pragma once in main file" [PR89808]

2024-10-09 Thread Ken Matsui
On Wednesday, October 9th, 2024 at 4:27 AM, Jiang, Haochen 
 wrote:

> 
> 
> > From: Andreas Schwab sch...@suse.de
> 
> > Sent: Wednesday, October 9, 2024 2:04 PM
> > 
> > ../../libcpp/directives.cc: In function 'void do_pragma_once(cpp_reader*)':
> > ../../libcpp/directives.cc:2078:20: error: unknown conversion type character
> > '<' in format [-Werror=format=]
> > 2078 | "%<#pragma once%> in main file");
> > | ^
> > ../../libcpp/directives.cc:2078:34: error: unknown conversion type character
> > '>' in format [-Werror=format=]
> > 2078 | "%<#pragma once%> in main file");
> > | ^
> > cc1plus: all warnings being treated as errors
> > make[3]: *** [Makefile:227: directives.o] Error 1
> 
> 
> Same bootstrap fail for me and my script on x86_64:
> 
> https://gcc.gnu.org/pipermail/gcc-regression/2024-October/080957.html
> 
> Thx,
> Haochen

Thank you for your report.  I addressed this issue in 
https://gcc.gnu.org/g:f709990333597b30dff54876bfdaada14e9cde30.

> 
> > --
> > Andreas Schwab, SUSE Labs, sch...@suse.de GPG Key fingerprint = 0196
> > BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 "And now for something
> > completely different."


Re: [PATCH v3 0/2] ia64: enable LRA and un-obsolete ia64*-*-linux

2024-10-09 Thread Frank Scheiner

On 09.10.24 10:26, Richard Biener wrote:

On Wed, 9 Oct 2024, Richard Biener wrote:


On Tue, 8 Oct 2024, Frank Scheiner wrote:


With stage 3 of GCC 15 approaching, to save me some time by finally
dropping the non-LRA testcase from my cross builds of GCC and Linux and
as I had the time, I updated the patch set from René with the requested
changes and rebased it to 0ad2c76bea20dbeac753f10df6f9f86d142348d4.

Patch 1/2: Remove ia64*-*-linux from the list of obsolete targets
Patch 2/2: Enable LRA for ia64

LRA functionality was tested by bootstrapping GCC natively and running
the testsuite on ia64 based on
236116068151bbc72aaaf53d0f223fe06f7e3bac:

https://gcc.gnu.org/pipermail/gcc-testresults/2024-June/817268.html

For comparison, the same with just
236116068151bbc72aaaf53d0f223fe06f7e3bac:

https://gcc.gnu.org/pipermail/gcc-testresults/2024-June/817267.html

A diff between them is attached.

Can this be brought forward now as is?


I'll push this for you.


I spoke too fast - something between you and me corrupts the patch
so it doesn't apply (even after manually resolving line-wrapping,
I suspect whitespace is also broken).  Can you re-send them as
attachments please?


Is it OK to attach them to my reply here or better with a v4?

If the latter, should the patches be attached to the cover letter and 
the numbering in the subject be removed then, as everything is included 
in one email?


Cheers,
Frank
From 0c6e038d25b377bf3a5efe5ab456643bf73be3f4 Mon Sep 17 00:00:00 2001
From: Frank Scheiner 
Date: Wed, 12 Jun 2024 12:42:00 +0200
Subject: [PATCH 1/2] Remove ia64*-*-linux from the list of obsolete targets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: René Rebe 

The following un-deprecates ia64*-*-linux for GCC 15. Since we plan to
support this for some years to come.

gcc/
* config.gcc: Only list ia64*-*-(hpux|vms|elf) in the list of
  obsoleted targets.

contrib/
* config-list.mk (LIST): no --enable-obsolete for ia64-linux.

Signed-off-by: René Rebe 
---
 contrib/config-list.mk | 2 +-
 gcc/config.gcc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/contrib/config-list.mk b/contrib/config-list.mk
index f282cd95c..4ddb3eeab 100644
--- a/contrib/config-list.mk
+++ b/contrib/config-list.mk
@@ -61,7 +61,7 @@ LIST = \
   i686-rtems i686-solaris2.11 i686-wrs-vxworks \
   i686-wrs-vxworksae \
   i686-cygwinOPT-enable-threads=yes i686-mingw32crt ia64-elfOPT-enable-obsolete \
-  ia64-linuxOPT-enable-obsolete ia64-hpuxOPT-enable-obsolete \
+  ia64-linux ia64-hpuxOPT-enable-obsolete \
   ia64-hp-vmsOPT-enable-obsolete iq2000-elf lm32-elf \
   lm32-rtems lm32-uclinux \
   loongarch64-linux-gnuf64 loongarch64-linux-gnuf32 loongarch64-linux-gnusf \
diff --git a/gcc/config.gcc b/gcc/config.gcc
index f09ce9f63..71ac3bada 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -272,7 +272,7 @@ esac
 
 # Obsolete configurations.
 case ${target} in
- ia64*-*-*\
+ ia64*-*-hpux* | ia64*-*-*vms* | ia64*-*-elf*	\
| nios2*-*-*\
  )
 if test "x$enable_obsolete" != xyes; then
-- 
2.25.1

From cd331e11af3786ffaceb0269db4a9cf529ca95a1 Mon Sep 17 00:00:00 2001
From: Frank Scheiner 
Date: Wed, 12 Jun 2024 12:42:00 +0200
Subject: [PATCH 2/2] Enable LRA for ia64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: René Rebe 

This was tested by bootstrapping GCC natively on ia64-t2-linux-gnu and
running the testsuite (based on
236116068151bbc72aaaf53d0f223fe06f7e3bac):

https://gcc.gnu.org/pipermail/gcc-testresults/2024-June/817268.html

For comparison, the same with just
236116068151bbc72aaaf53d0f223fe06f7e3bac:

https://gcc.gnu.org/pipermail/gcc-testresults/2024-June/817267.html

gcc/
* config/ia64/ia64.cc: Enable LRA for ia64.
* config/ia64/ia64.md: Likewise.
* config/ia64/predicates.md: Likewise.

Signed-off-by: René Rebe 
---
 gcc/config/ia64/ia64.cc   | 7 ++-
 gcc/config/ia64/ia64.md   | 4 ++--
 gcc/config/ia64/predicates.md | 2 +-
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/gcc/config/ia64/ia64.cc b/gcc/config/ia64/ia64.cc
index cd6ed8952..54706da33 100644
--- a/gcc/config/ia64/ia64.cc
+++ b/gcc/config/ia64/ia64.cc
@@ -619,9 +619,6 @@ static const scoped_attribute_specs *const ia64_attribute_table[] =
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_false
-
 #undef TARGET_CANNOT_FORCE_CONST_MEM
 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
 
@@ -1333,7 +1330,7 @@ ia64_expand_move (rtx op0, rtx op1)
 {
   machine_mode mode = GET_MODE (op0);
 
-  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
+  if (!lra_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
 op1 = force_reg (mode, op1);
 
   if ((mode == Pmode || mode == ptr_mode) && symbolic_

Re: [PATCH v3 0/2] ia64: enable LRA and un-obsolete ia64*-*-linux

2024-10-09 Thread Richard Biener
On Wed, 9 Oct 2024, Frank Scheiner wrote:

> On 09.10.24 10:26, Richard Biener wrote:
> > On Wed, 9 Oct 2024, Richard Biener wrote:
> > 
> >> On Tue, 8 Oct 2024, Frank Scheiner wrote:
> >>
> >>> With stage 3 of GCC 15 approaching, to save me some time by finally
> >>> dropping the non-LRA testcase from my cross builds of GCC and Linux and
> >>> as I had the time, I updated the patch set from René with the requested
> >>> changes and rebased it to 0ad2c76bea20dbeac753f10df6f9f86d142348d4.
> >>>
> >>> Patch 1/2: Remove ia64*-*-linux from the list of obsolete targets
> >>> Patch 2/2: Enable LRA for ia64
> >>>
> >>> LRA functionality was tested by bootstrapping GCC natively and running
> >>> the testsuite on ia64 based on
> >>> 236116068151bbc72aaaf53d0f223fe06f7e3bac:
> >>>
> >>> https://gcc.gnu.org/pipermail/gcc-testresults/2024-June/817268.html
> >>>
> >>> For comparison, the same with just
> >>> 236116068151bbc72aaaf53d0f223fe06f7e3bac:
> >>>
> >>> https://gcc.gnu.org/pipermail/gcc-testresults/2024-June/817267.html
> >>>
> >>> A diff between them is attached.
> >>>
> >>> Can this be brought forward now as is?
> >>
> >> I'll push this for you.
> > 
> > I spoke too fast - something between you and me corrupts the patch
> > so it doesn't apply (even after manually resolving line-wrapping,
> > I suspect whitespace is also broken).  Can you re-send them as
> > attachments please?
> 
> Is it OK to attach them to my reply here or better with a v4?
> 
> If the latter, should the patches be attached to the cover letter and the
> numbering in the subject be removed then, as everything is included in one
> email?

The attachments worked fine, I pushed them

Richard.

Re: [Bug tree-optimization/109429] [PATCH v2] ivopts: fixed complexities

2024-10-09 Thread Aleksandar Rakic
A kind remind/ping on the patch.

Kind regards,
Aleksandar Rakić


From: Aleksandar Rakic 
Sent: Wednesday, September 25, 2024 5:32 PM
To: richard.guent...@gmail.com
Cc: gcc-patches@gcc.gnu.org; Djordje Todorovic; Jovan Dmitrovic
Subject: [Bug tree-optimization/109429] [PATCH v2] ivopts: fixed complexities

Hi,

I think I managed to fix indentation from the previous version.

When comparing the tables showing the candidates for the group 1 before
and after applying this patch, it can be observed that complexities for
the candidates where the computation depends on the invariant
expressions or the invariant variables should be at least one, which
aligns with the approach used in the commit c2b64ce.

= Before this patch =
Group 1:
  cand  costcompl.  inv.expr.   inv.vars
  1 11  0   5;  NIL;
  2 11  0   6;  NIL;
  4 8   0   7;  NIL;
  5 9   0   8;  NIL;
  6 1   0   NIL;NIL;
  7 1   1   NIL;NIL;
  9 7   0   5;  NIL;
= Before this patch =
= After this patch =
Group 1:
  cand  costcompl.  inv.expr.   inv.vars
  1 11  2   4;  NIL;
  2 11  1   4;  NIL;
  4 8   1   5;  NIL;
  5 8   2   6;  NIL;
  6 1   0   NIL;NIL;
  7 1   1   NIL;NIL;
  9 7   2   4;  NIL;
= After this patch =

Hence, if the invariant expressions or the invariant variables are used
when representing use with candidate, the complexity should be larger
for more complex expressions, so it is incremented by one. I am not sure
whether inv_present could be expressed as parts.

Regards,
Aleksandar


Re: [PATCH v13 0/4] c: Add __lengthof__ operator

2024-10-09 Thread Alejandro Colomar
[CC -= Jens]

Hi Chris,

On Tue, Oct 08, 2024 at 03:13:11PM GMT, Chris Bazley wrote:
> > ​Because I don't like the paper that has been voted into the standard.
> > I kind of presented that paper against my will.  I wish GCC merged the
> > feature with a different name, and forced the standard to reconsider
> > what they merged, which I consider to be a security problem.
> >
> > Alternatively, I wish GCC decided to do nothing, wait for Graz, where
> > I'll try to convince WG14 to change what was voted.
> >
> > But merging what was voted into the standard would be nefarious, IMO.
> 
> I don't understand this security problem that you are referring to.
> 
> The vast majority of strings use 'char' as the element type.
> 
> Existing code might look something like this:
> 
> #define A "foo"
> #define B "bar"
> #define STRING_LEN(s) (sizeof(s) - 1)
> 
> char *c = malloc(STRING_LEN(A) + STRING_LEN(B) + 1);
> if (c) {
>   strcpy(c, A);
>   strcat(c, B);
> }
> 
> Supposing that _Length gets support in GCC, the equivalent source code would 
> be almost
> identical and the compiled code would be identical:
> 
> #define A "foo"
> #define B "bar"
> #define STRING_LEN(s) (_Lengthof(s) - 1)
> 
> char *c = malloc(STRING_LEN(A) + STRING_LEN(B) + 1);
> if (c) {
>   strcpy(c, A);
>   strcat(c, B);
> }
> 
> Are you concerned that people will start writing new code that does something 
> like the following?
> 
> #define A "foo"
> #define B "bar"
> 
> char *c = malloc(_Lengthof(A) + _Lengthof(B));
> if (c) {
>   strcpy(c, A);
>   strcat(c, B);
> }
> 
> If they do, the only consequence will be that the string buffer is longer 
> than it needs to be; not shorter.

Yes, off-by-one bugs on the safe side are more frequent than on the
unsafe side in this case.  However, I expect unsafe off-by-ones too.
And even in the safe side, there's the chance of secondary problems like
the following:

Let's say the maximum supported size is limited by a system limit.
For example, sysconf(_SC_LOGIN_NAME_MAX) or LOGIN_NAME_MAX.  If you try
to allocate one extra byte, so sysconf(_SC_LOGIN_NAME_MAX)+1, you may
overflow something somewhere, or cause some other important issues in
your system if you manage to create a user with such a long username.
Or your program will just crash and cause a DoS.

Or another combination of events that may cause another class of bugs.
In all cases, there's an off-by-one somewhere, but will result in a
different bug type.


I'm not fabricating, BTW.  Here's a list of off-by-one bugs in login
code, precisely due to this size-length naming issue:




Have a lovely day!
Alex

> 
> Best regards,
> --
> Christopher Bazley
> Staff Software Engineer, GPU team, Central Engineering Group
> ARM Ltd, 110 Fulbourn Road, Cambridge, CB1 9NJ, UK.
> Web:   http://www.arm.com/

-- 



signature.asc
Description: PGP signature


[PATCH] libcpp, c, middle-end, v2: Optimize initializers using #embed in C

2024-10-09 Thread Jakub Jelinek
On Tue, Oct 08, 2024 at 07:31:12PM +, Joseph Myers wrote:
> On Thu, 11 Jul 2024, Jakub Jelinek wrote:
> 
> > With the CPP_NUMBERs around it, I believe in the C FE the only places which
> > need handling of the CPP_EMBED token are initializer parsing (that is the
> > only one which adds actual optimizations for it), comma expressions (I
> > believe nothing really cares whether it is 25,13,95 or
> > 25,13,0,1,2,3,4,5,6,7,8,9,10,13,95 etc., so besides the 2 outer CPP_NUMBER
> > the parsing just adds one INTEGER_CST to the comma expression, I doubt users
> > want to be spammed with millions of -Wunused warnings per #embed) and
> > whatever uses c_parser_expr_list (function calls, attribute arguments,
> > OpenMP sizes clause argument, OpenACC tile clause argument.  Please correct
> > me if I'm wrong.
> 
> I think c_parser_get_builtin_args is also relevant.  In most cases a list 
> of 4 or more elements wouldn't be valid, but for __builtin_shufflevector 
> you can have such a list of integers are part of the arguments.

You're right, thanks for catching that.

Here is an updated patch with that implemented plus additional testcase
which tests it (and was failing with the old version of the patch for C).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-10-09  Jakub Jelinek  

libcpp/
* include/cpplib.h (TTYPE_TABLE): Add CPP_EMBED token type.
* files.cc (finish_embed): For limit >= 64 and C preprocessing
instead of emitting CPP_NUMBER CPP_COMMA separated sequence for the
whole embed emit it just for the first and last byte and in between
emit a CPP_EMBED token or tokens if too large.
gcc/
* treestruct.def (TS_RAW_DATA_CST): New.
* tree.def (RAW_DATA_CST): New tree code.
* tree-core.h (struct tree_raw_data): New type.
(union tree_node): Add raw_data_cst member.
* tree.h (RAW_DATA_LENGTH, RAW_DATA_POINTER, RAW_DATA_OWNER): Define.
(gt_ggc_mx, gt_pch_nx): Declare overloads for tree_raw_data *.
* tree.cc (tree_node_structure_for_code): Handle RAW_DATA_CST.
(initialize_tree_contains_struct): Handle TS_RAW_DATA_CST.
(tree_code_size): Handle RAW_DATA_CST.
(initializer_zerop): Likewise.
(gt_ggc_mx, gt_pch_nx): Define overloads for tree_raw_data *.
* gimplify.cc (gimplify_init_ctor_eval): Handle RAW_DATA_CST.
* fold-const.cc (operand_compare::operand_equal_p): Handle
RAW_DATA_CST.  Formatting fix.
(operand_compare::hash_operand): Handle RAW_DATA_CST.
(native_encode_initializer): Likewise.
(get_array_ctor_element_at_index): Likewise.
(fold): Likewise.
* gimple-fold.cc (fold_array_ctor_reference): Likewise.  Formatting
fix.
* varasm.cc (const_hash_1): Handle RAW_DATA_CST.
(initializer_constant_valid_p_1): Likewise.
(array_size_for_constructor): Likewise.
(output_constructor_regular_field): Likewise.
* expr.cc (categorize_ctor_elements_1): Likewise.
(expand_expr_real_1) : Punt for RAW_DATA_CST.
* tree-streamer.cc (streamer_check_handled_ts_structures): Mark
TS_RAW_DATA_CST as handled.
* tree-streamer-in.cc (streamer_alloc_tree): Handle RAW_DATA_CST.
(lto_input_ts_raw_data_cst_tree_pointers): New function.
(streamer_read_tree_body): Call it for RAW_DATA_CST.
* tree-streamer-out.cc (write_ts_raw_data_cst_tree_pointers): New
function.
(streamer_write_tree_body): Call it for RAW_DATA_CST.
(streamer_write_tree_header): Handle RAW_DATA_CST.
* lto-streamer-out.cc (DFS::DFS_write_tree_body): Handle RAW_DATA_CST.
* tree-pretty-print.cc (dump_generic_node): Likewise.
gcc/c-family/
* c-ppoutput.cc (token_streamer::stream): Add special code to spell
CPP_EMBED token.
* c-lex.cc (c_lex_with_flags): Handle CPP_EMBED.  Formatting fix.
* c-common.cc (c_parse_error): Handle CPP_EMBED.
(braced_list_to_string): Optimize RAW_DATA_CST surrounded by
INTEGER_CSTs which match some bytes before or after RAW_DATA_CST in
its owner.
gcc/c/
* c-parser.cc (c_parser_braced_init): Handle CPP_EMBED.
(c_parser_get_builtin_args): Likewise.
(c_parser_expression): Likewise.
(c_parser_expr_list): Likewise.
* c-typeck.cc (digest_init): Handle RAW_DATA_CST.  Formatting fix.
(init_node_successor): New function.
(add_pending_init): Handle RAW_DATA_CST.
(set_nonincremental_init): Formatting fix.
(output_init_element): Handle RAW_DATA_CST.  Formatting fixes.
(maybe_split_raw_data): New function.
(process_init_element): Use maybe_split_raw_data.  Handle
RAW_DATA_CST.
gcc/testsuite/
* c-c++-common/cpp/embed-20.c: New test.
* c-c++-common/cpp/embed-21.c: New test.
* c-c++-common/cpp/embed-28.c: New test.
* gcc.dg/cpp/embed-8.c: New

[PATCH] c, v2: Speed up compilation of large char array initializers when not using #embed

2024-10-09 Thread Jakub Jelinek
On Tue, Oct 08, 2024 at 07:42:11PM +, Joseph Myers wrote:
> On Sat, 3 Aug 2024, Jakub Jelinek wrote:
> 
> > * c-c++-common/init-1.c: New test.
> 
> I think there should also be tests of initializing signed char (and plain 
> char) arrays; I don't see any such tests here.

Here is an updated patch with init-2.c (signed char rather than unsigned
char) and init-3.c (plain char) tests added.

Bootstrapped/regtested on x86_64-linux and i686-linux, additionally tested
on the former with explicit -fsigned-char and with explicit -funsigned-char,
ok for trunk?

2024-10-09  Jakub Jelinek  

* c-tree.h (c_maybe_optimize_large_byte_initializer): Declare.
* c-parser.cc (c_parser_initval): Attempt to optimize large char array
initializers into RAW_DATA_CST.
* c-typeck.cc (c_maybe_optimize_large_byte_initializer): New function.

* c-c++-common/init-1.c: New test.
* c-c++-common/init-2.c: New test.
* c-c++-common/init-3.c: New test.

--- gcc/c/c-tree.h.jj   2024-09-10 17:31:22.697920858 +0200
+++ gcc/c/c-tree.h  2024-10-09 12:36:21.723048065 +0200
@@ -807,6 +807,7 @@ extern struct c_expr pop_init_level (loc
 location_t);
 extern void set_init_index (location_t, tree, tree, struct obstack *);
 extern void set_init_label (location_t, tree, location_t, struct obstack *);
+unsigned c_maybe_optimize_large_byte_initializer (void);
 extern void process_init_element (location_t, struct c_expr, bool,
  struct obstack *);
 extern tree build_compound_literal (location_t, tree, tree, bool,
--- gcc/c/c-parser.cc.jj2024-10-09 12:16:56.722130497 +0200
+++ gcc/c/c-parser.cc   2024-10-09 12:36:21.726048024 +0200
@@ -6506,7 +6506,125 @@ c_parser_initval (c_parser *parser, stru
(init.value
init = convert_lvalue_to_rvalue (loc, init, true, true, true);
 }
+  tree val = init.value;
   process_init_element (loc, init, false, braced_init_obstack);
+
+  /* Attempt to optimize large char array initializers into RAW_DATA_CST
+ to save compile time and memory even when not using #embed.  */
+  static unsigned vals_to_ignore;
+  if (vals_to_ignore)
+/* If earlier call determined there is certain number of CPP_COMMA
+   CPP_NUMBER tokens with 0-255 int values, but not enough for
+   RAW_DATA_CST to be beneficial, don't try to check it again until
+   they are all parsed.  */
+--vals_to_ignore;
+  else if (val
+  && TREE_CODE (val) == INTEGER_CST
+  && TREE_TYPE (val) == integer_type_node
+  && c_parser_next_token_is (parser, CPP_COMMA))
+if (unsigned int len = c_maybe_optimize_large_byte_initializer ())
+  {
+   char buf1[64];
+   unsigned int i;
+   gcc_checking_assert (len >= 64);
+   location_t last_loc = UNKNOWN_LOCATION;
+   for (i = 0; i < 64; ++i)
+ {
+   c_token *tok = c_parser_peek_nth_token_raw (parser, 1 + 2 * i);
+   if (tok->type != CPP_COMMA)
+ break;
+   tok = c_parser_peek_nth_token_raw (parser, 2 + 2 * i);
+   if (tok->type != CPP_NUMBER
+   || TREE_CODE (tok->value) != INTEGER_CST
+   || TREE_TYPE (tok->value) != integer_type_node
+   || wi::neg_p (wi::to_wide (tok->value))
+   || wi::to_widest (tok->value) > UCHAR_MAX)
+ break;
+   buf1[i] = (char) tree_to_uhwi (tok->value);
+   if (i == 0)
+ loc = tok->location;
+   last_loc = tok->location;
+ }
+   if (i < 64)
+ {
+   vals_to_ignore = i;
+   return;
+ }
+   c_token *tok = c_parser_peek_nth_token_raw (parser, 1 + 2 * i);
+   /* If 64 CPP_COMMA CPP_NUMBER pairs are followed by CPP_CLOSE_BRACE,
+  punt if len is INT_MAX as that can mean this is a flexible array
+  member and in that case we need one CPP_NUMBER afterwards
+  (as guaranteed for CPP_EMBED).  */
+   if (tok->type == CPP_CLOSE_BRACE && len != INT_MAX)
+ len = i;
+   else if (tok->type != CPP_COMMA)
+ {
+   vals_to_ignore = i;
+   return;
+ }
+   /* Ensure the STRING_CST fits into 128K.  */
+   unsigned int max_len = 131072 - offsetof (struct tree_string, str) - 1;
+   unsigned int orig_len = len;
+   unsigned int off = 0, last = 0;
+   if (!wi::neg_p (wi::to_wide (val)) && wi::to_widest (val) <= UCHAR_MAX)
+ off = 1;
+   len = MIN (len, max_len - off);
+   char *buf2 = XNEWVEC (char, len + off);
+   if (off)
+ buf2[0] = (char) tree_to_uhwi (val);
+   memcpy (buf2 + off, buf1, i);
+   for (unsigned int j = 0; j < i; ++j)
+ {
+   c_parser_peek_token (parser);
+   c_parser_consume_token (parser);
+   c_parser_peek_token (parser);
+   c_parser_consume_token (parser);
+ }
+   fo

Re: [PATCH] RISC-V: Enable builtin __riscv_mul with Zmmul extension.

2024-10-09 Thread Patrick O'Neill



On 10/9/24 14:50, Jeff Law wrote:



On 10/9/24 3:21 PM, Patrick O'Neill wrote:


On 10/9/24 14:07, Jeff Law wrote:



Also note that if you use the tag "[RISC-V]" in your subject line 
your patch will be automatically picked up by a pre-commit tester 
that can be subsequently examined to verify behavior.


This patch's subject line looks good to me. It would've been picked 
up as-is since it mentions riscv/risc-v.


The patch doesn't show up in patchworks so that's what stopped the 
risc- v pre-commit from finding it.


Sadly I don't have much insight into what stopped patchworks from 
seeing it. :-/
I'd assumed it wasn't [RISC-V], but you know that aspect better than I 
:-)



That's a safe first guess :)
The flow for precommit gets new patches from the Patchworks API, so if 
it isn't in patchworks then precommit won't see it.

We have patchworks to handle parsing emails/extracting patches for us :)

From poking around the patchworks source code my new best guess is that 
the Content-Type header of the attachment in the original email threw it 
off:


--79e1d00623f13532
Content-Type: application/octet-stream;
 name="0001-RISC-V-Enable-builtin-__riscv_mul-with-Zmmul-extensi.patch"

Seems like patchworks ignores all attachments that aren't `*/x-patch`, 
`*/x-diff`, `text/*`?

https://github.com/getpatchwork/patchwork/blob/4dfe6991a7bcdb11fd878a087aba314e9fdaa2db/patchwork/parser.py#L686
https://github.com/getpatchwork/patchwork/blob/4dfe6991a7bcdb11fd878a087aba314e9fdaa2db/patchwork/parser.py#L639

Patrick


[pushed: r15-4231] diagnostics: move text output member functions to correct file

2024-10-09 Thread David Malcolm
No functional change intended.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r15-4231-g00ede02bc8bb73.

gcc/ChangeLog:
* diagnostic-format-text.cc
(diagnostic_text_output_format::after_diagnostic): Replace call to
show_any_path with body, taken from diagnostic.cc.
(diagnostic_text_output_format::build_prefix): Move here from
diagnostic.cc, updating to use get_diagnostic_kind_text and
diagnostic_get_color_for_kind.
(diagnostic_text_output_format::file_name_as_prefix): Move here
from diagnostic.cc
(diagnostic_text_output_format::append_note): Likewise.
* diagnostic-format-text.h
(diagnostic_text_output_format::show_any_path): Drop decl.
* diagnostic.cc
(diagnostic_text_output_format::file_name_as_prefix): Move to
diagnostic-format-text.cc.
(diagnostic_text_output_format::build_prefix): Likewise.
(diagnostic_text_output_format::show_any_path): Move to body of
diagnostic_text_output_format::after_diagnostic.
(diagnostic_text_output_format::append_note): Move to
diagnostic-format-text.cc.

Signed-off-by: David Malcolm 
---
 gcc/diagnostic-format-text.cc | 72 +-
 gcc/diagnostic-format-text.h  |  2 -
 gcc/diagnostic.cc | 84 ---
 3 files changed, 71 insertions(+), 87 deletions(-)

diff --git a/gcc/diagnostic-format-text.cc b/gcc/diagnostic-format-text.cc
index a6592fe93e6c..0d58d5fb082d 100644
--- a/gcc/diagnostic-format-text.cc
+++ b/gcc/diagnostic-format-text.cc
@@ -117,7 +117,77 @@ void
 diagnostic_text_output_format::
 after_diagnostic (const diagnostic_info &diagnostic)
 {
-  show_any_path (diagnostic);
+  if (const diagnostic_path *path = diagnostic.richloc->get_path ())
+print_path (*path);
+}
+
+/* Return a malloc'd string describing a location and the severity of the
+   diagnostic, e.g. "foo.c:42:10: error: ".  The caller is responsible for
+   freeing the memory.  */
+char *
+diagnostic_text_output_format::
+build_prefix (const diagnostic_info &diagnostic) const
+{
+  gcc_assert (diagnostic.kind < DK_LAST_DIAGNOSTIC_KIND);
+
+  const char *text = _(get_diagnostic_kind_text (diagnostic.kind));
+  const char *text_cs = "", *text_ce = "";
+  pretty_printer *pp = get_printer ();
+
+  if (const char *color_name = diagnostic_get_color_for_kind (diagnostic.kind))
+{
+  text_cs = colorize_start (pp_show_color (pp), color_name);
+  text_ce = colorize_stop (pp_show_color (pp));
+}
+
+  const expanded_location s = diagnostic_expand_location (&diagnostic);
+  label_text location_text = get_location_text (s);
+
+  char *result = build_message_string ("%s %s%s%s", location_text.get (),
+  text_cs, text, text_ce);
+  return result;
+}
+
+/* Same as build_prefix, but only the source FILE is given.  */
+char *
+diagnostic_text_output_format::file_name_as_prefix (const char *f) const
+{
+  pretty_printer *const pp = get_printer ();
+  const char *locus_cs
+= colorize_start (pp_show_color (pp), "locus");
+  const char *locus_ce = colorize_stop (pp_show_color (pp));
+  return build_message_string ("%s%s:%s ", locus_cs, f, locus_ce);
+}
+
+/* Add a purely textual note with text GMSGID and with LOCATION.  */
+
+void
+diagnostic_text_output_format::append_note (location_t location,
+   const char * gmsgid, ...)
+{
+  diagnostic_context *context = &get_context ();
+
+  diagnostic_info diagnostic;
+  va_list ap;
+  rich_location richloc (line_table, location);
+
+  va_start (ap, gmsgid);
+  diagnostic_set_info (&diagnostic, gmsgid, &ap, &richloc, DK_NOTE);
+  if (context->m_inhibit_notes_p)
+{
+  va_end (ap);
+  return;
+}
+  pretty_printer *pp = get_printer ();
+  char *saved_prefix = pp_take_prefix (pp);
+  pp_set_prefix (pp, build_prefix (diagnostic));
+  pp_format (pp, &diagnostic.message);
+  pp_output_formatted_text (pp);
+  pp_destroy_prefix (pp);
+  pp_set_prefix (pp, saved_prefix);
+  pp_newline (pp);
+  diagnostic_show_locus (context, &richloc, DK_NOTE, pp);
+  va_end (ap);
 }
 
 /* If DIAGNOSTIC has a CWE identifier, print it.
diff --git a/gcc/diagnostic-format-text.h b/gcc/diagnostic-format-text.h
index aacd699cd90a..2e57e27c739d 100644
--- a/gcc/diagnostic-format-text.h
+++ b/gcc/diagnostic-format-text.h
@@ -78,8 +78,6 @@ private:
   label_text get_location_text (const expanded_location &s) const;
   bool includes_seen_p (const line_map_ordinary *map);
 
-  void show_any_path (const diagnostic_info &diagnostic);
-
   diagnostic_column_policy m_column_policy;
 
   /* Used to detect when the input file stack has changed since last
diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
index 7f741a04f62e..9647e1382dd1 100644
--- a/gcc/diagnostic.cc
+++ b/gcc/diagnostic.cc
@@ -87,18 +87,6 @@ build_message_string (const char *msg, ...)
   return str;
 }

[pushed: r15-4230] diagnostics: mark the JSON output format as deprecated

2024-10-09 Thread David Malcolm
The bulk of the documentation for -fdiagnostics-format= is taken up
by a description of the "json" format added in r9-4156-g478dd60ddcf177.

I don't plan to add any extra features to the "json" format; all my
future work on machine-readable GCC diagnostics is likely to be on the
SARIF output format (https://gcc.gnu.org/wiki/SARIF).

Hence users seeking machine-readable output from GCC should use SARIF.

This patch removes the long documentation of the format and describes it
as deprecated.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r15-4230-ga4e4f2d22589a8.

gcc/ChangeLog:
* doc/invoke.texi (fdiagnostics-format): Describe "json" et al as
deprecated, and remove the long description of the output format.

Signed-off-by: David Malcolm 
---
 gcc/doc/invoke.texi | 266 +---
 1 file changed, 2 insertions(+), 264 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d38c1feb86f7..9aa05b81d10c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -5907,276 +5907,14 @@ The @samp{sarif-stderr} and @samp{sarif-file} formats 
both emit
 diagnostics in SARIF Version 2.1.0 format, either to stderr, or to a file
 named @file{@var{source}.sarif}, respectively.
 
+The various @samp{json}, @samp{json-stderr}, and @samp{json-file} values
+are deprecated and refer to a legacy JSON-based output format.
 The @samp{json} format is a synonym for @samp{json-stderr}.
 The @samp{json-stderr} and @samp{json-file} formats are identical, apart from
 where the JSON is emitted to.  With @samp{json-stderr}, the JSON is emitted
 to stderr, whereas with @samp{json-file} it is written to
 @file{@var{source}.gcc.json}.
 
-The emitted JSON consists of a top-level JSON array containing JSON objects
-representing the diagnostics.
-
-Diagnostics can have child diagnostics.  For example, this error and note:
-
-@smallexample
-misleading-indentation.c:15:3: warning: this 'if' clause does not
-  guard... [-Wmisleading-indentation]
-   15 |   if (flag)
-  |   ^~
-misleading-indentation.c:17:5: note: ...this statement, but the latter
-  is misleadingly indented as if it were guarded by the 'if'
-   17 | y = 2;
-  | ^
-@end smallexample
-
-@noindent
-might be printed in JSON form (after formatting) like this:
-
-@smallexample
-[
-@{
-"kind": "warning",
-"locations": [
-@{
-"caret": @{
-   "display-column": 3,
-   "byte-column": 3,
-"column": 3,
-"file": "misleading-indentation.c",
-"line": 15
-@},
-"finish": @{
-   "display-column": 4,
-   "byte-column": 4,
-"column": 4,
-"file": "misleading-indentation.c",
-"line": 15
-@}
-@}
-],
-"message": "this \u2018if\u2019 clause does not guard...",
-"option": "-Wmisleading-indentation",
-"option_url": 
"https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wmisleading-indentation";,
-"children": [
-@{
-"kind": "note",
-"locations": [
-@{
-"caret": @{
-   "display-column": 5,
-   "byte-column": 5,
-"column": 5,
-"file": "misleading-indentation.c",
-"line": 17
-@}
-@}
-],
-"escape-source": false,
-"message": "...this statement, but the latter is @dots{}"
-@}
-]
-   "escape-source": false,
-   "column-origin": 1,
-@}
-]
-@end smallexample
-
-@noindent
-where the @code{note} is a child of the @code{warning}.
-
-A diagnostic has a @code{kind}.  If this is @code{warning}, then there is
-an @code{option} key describing the command-line option controlling the
-warning.
-
-A diagnostic can contain zero or more locations.  Each location has an
-optional @code{label} string and up to three positions within it: a
-@code{caret} position and optional @code{start} and @code{finish} positions.
-A position is described by a @code{file} name, a @code{line} number, and
-three numbers indicating a column position:
-@itemize @bullet
-
-@item
-@code{display-column} counts display columns, accounting for tabs and
-multibyte characters.
-
-@item
-@code{byte-column} counts raw bytes.
-
-@item
-@code{column} is equal to one of
-the previous two, as dictated by the @option{-fdiagnostics-column-unit}
-option.
-
-@end itemize
-All three columns are relative to the origin specified by
-@option{-fdiagnostics-column-origin}, which is typically equal to 1 but may
-be set, for instance, to 0 for compatibility with other utilities that
-

[pushed: r15-4229] lto: reimplement print_lto_docs_link [PR116613]

2024-10-09 Thread David Malcolm
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r15-4229-g8d0de31c931dda.

gcc/ChangeLog:
PR other/116613
* lto-wrapper.cc (print_lto_docs_link): Use a format string rather
than building the string manually.  Fix memory leak of "url" by
using label_text.

Signed-off-by: David Malcolm 
---
 gcc/lto-wrapper.cc | 17 -
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/gcc/lto-wrapper.cc b/gcc/lto-wrapper.cc
index 141856c9b67b..9d7fe18b2296 100644
--- a/gcc/lto-wrapper.cc
+++ b/gcc/lto-wrapper.cc
@@ -1366,19 +1366,10 @@ init_num_threads (void)
 void
 print_lto_docs_link ()
 {
-  bool print_url = global_dc->m_printer->supports_urls_p ();
-  const char *url = global_dc->make_option_url (OPT_flto);
-
-  pretty_printer pp;
-  pp.set_url_format (URL_FORMAT_DEFAULT);
-  pp_string (&pp, "see the ");
-  if (print_url)
-pp_begin_url (&pp, url);
-  pp_string (&pp, "%<-flto%> option documentation");
-  if (print_url)
-pp_end_url (&pp);
-  pp_string (&pp, " for more information");
-  inform (UNKNOWN_LOCATION, pp_formatted_text (&pp));
+  label_text url = label_text::take (global_dc->make_option_url (OPT_flto));
+  inform (UNKNOWN_LOCATION,
+ "see the %{%<-flto%> option documentation%} for more information",
+ url.get ());
 }
 
 /* Test that a make command is present and working, return true if so.  */
-- 
2.26.3



[PATCH v1 1/2] Match: Support form 4 for scalar signed integer SAT_TRUNC

2024-10-09 Thread pan2 . li
From: Pan Li 

This patch would like to support the form 4 of the scalar signed
integer SAT_TRUNC.  Aka below example:

Form 4:
  #define DEF_SAT_S_TRUNC_FMT_4(NT, WT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_4 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN <= x && x < (WT)NT_MAX\
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  }

DEF_SAT_S_TRUNC_FMT_4(int8_t, int16_t, INT8_MIN, INT8_MAX)

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_trunc_int16_t_to_int8_t_fmt_4 (int16_t x)
   6   │ {
   7   │   int8_t trunc;
   8   │   unsigned short x.0_1;
   9   │   unsigned short _2;
  10   │   int8_t _3;
  11   │   _Bool _7;
  12   │   signed char _8;
  13   │   signed char _9;
  14   │   signed char _10;
  15   │
  16   │ ;;   basic block 2, loop depth 0
  17   │ ;;pred:   ENTRY
  18   │   x.0_1 = (unsigned short) x_4(D);
  19   │   _2 = x.0_1 + 128;
  20   │   if (_2 > 254)
  21   │ goto ; [50.00%]
  22   │   else
  23   │ goto ; [50.00%]
  24   │ ;;succ:   4
  25   │ ;;3
  26   │
  27   │ ;;   basic block 3, loop depth 0
  28   │ ;;pred:   2
  29   │   trunc_5 = (int8_t) x_4(D);
  30   │   goto ; [100.00%]
  31   │ ;;succ:   5
  32   │
  33   │ ;;   basic block 4, loop depth 0
  34   │ ;;pred:   2
  35   │   _7 = x_4(D) < 0;
  36   │   _8 = (signed char) _7;
  37   │   _9 = -_8;
  38   │   _10 = _9 ^ 127;
  39   │ ;;succ:   5
  40   │
  41   │ ;;   basic block 5, loop depth 0
  42   │ ;;pred:   3
  43   │ ;;4
  44   │   # _3 = PHI 
  45   │   return _3;
  46   │ ;;succ:   EXIT
  47   │
  48   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_trunc_int16_t_to_int8_t_fmt_4 (int16_t x)
   6   │ {
   7   │   int8_t _3;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _3 = .SAT_TRUNC (x_4(D)); [tail call]
  12   │   return _3;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add case 4 matching pattern for signed SAT_TRUNC.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 5e20651c8ce..6bd515fdd87 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3488,6 +3488,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   }
   (if (((wi::eq_p (int_cst_1, offset) && wi::eq_p (int_cst_2, limit_0))
 || (wi::eq_p (int_cst_1, itype_max) && wi::eq_p (int_cst_2, limit_2))
+|| (wi::eq_p (int_cst_1, offset) && wi::eq_p (int_cst_2, limit_2))
 || (wi::eq_p (int_cst_1, itype_max) && wi::eq_p (int_cst_2, limit_1)))
&& wi::eq_p (int_cst_3, otype_max))
 
-- 
2.43.0



[PATCH v1 2/2] RISC-V: Add testcases for form 4 of scalar signed SAT_TRUNC

2024-10-09 Thread pan2 . li
From: Pan Li 

Form 4:
  #define DEF_SAT_S_TRUNC_FMT_4(NT, WT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_4 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN <= x && x < (WT)NT_MAX\
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  }

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_trunc-4-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-4-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-4-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-4-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-4-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-4-i64-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-4-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-4-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-4-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-4-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-4-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-run-4-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 15 ++
 .../riscv/sat_s_trunc-4-i16-to-i8.c   | 26 +
 .../riscv/sat_s_trunc-4-i32-to-i16.c  | 28 +++
 .../riscv/sat_s_trunc-4-i32-to-i8.c   | 26 +
 .../riscv/sat_s_trunc-4-i64-to-i16.c  | 28 +++
 .../riscv/sat_s_trunc-4-i64-to-i32.c  | 26 +
 .../riscv/sat_s_trunc-4-i64-to-i8.c   | 26 +
 .../riscv/sat_s_trunc-run-4-i16-to-i8.c   | 16 +++
 .../riscv/sat_s_trunc-run-4-i32-to-i16.c  | 16 +++
 .../riscv/sat_s_trunc-run-4-i32-to-i8.c   | 16 +++
 .../riscv/sat_s_trunc-run-4-i64-to-i16.c  | 16 +++
 .../riscv/sat_s_trunc-run-4-i64-to-i32.c  | 16 +++
 .../riscv/sat_s_trunc-run-4-i64-to-i8.c   | 16 +++
 13 files changed, 271 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-4-i16-to-i8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-4-i32-to-i16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-4-i32-to-i8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-4-i64-to-i16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-4-i64-to-i32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-4-i64-to-i8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-run-4-i16-to-i8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/sat_s_trunc-run-4-i32-to-i16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-run-4-i32-to-i8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/sat_s_trunc-run-4-i64-to-i16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/sat_s_trunc-run-4-i64-to-i32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_trunc-run-4-i64-to-i8.c

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 607bc4fc82e..0b3d0ea7073 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -501,6 +501,18 @@ sat_s_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
 #define DEF_SAT_S_TRUNC_FMT_3_WRAP(NT, WT, NT_MIN, NT_MAX) \
   DEF_SAT_S_TRUNC_FMT_3(NT, WT, NT_MIN, NT_MAX)
 
+#define DEF_SAT_S_TRUNC_FMT_4(NT, WT, NT_MIN, NT_MAX) \
+NT __attribute__((noinline))  \
+sat_s_trunc_##WT##_to_##NT##_fmt_4 (WT x) \
+{ \
+  NT trunc = (NT)x;   \
+  return (WT)NT_MIN <= x && x < (WT)NT_MAX\
+? trunc   \
+: x < 0 ? NT_MIN : NT_MAX;\
+}
+#define DEF_SAT_S_TRUNC_FMT_4_WRAP(NT, WT, NT_MIN, NT_MAX) \
+  DEF_SAT_S_TRUNC_FMT_4(NT, WT, NT_MIN, NT_MAX)
+
 #define RUN_SAT_S_TRUNC_FMT_1(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_S_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_1(NT, WT, x)
 
@@ -510,4 +522,7 @@ sat_s_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
 #define RUN_SAT_S_TRUNC_FMT_3(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_3 (x)
 #define RUN_SAT_S_TRUNC_FMT_3_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_3(NT, WT, x)
 
+#define RUN_SAT_S_TRUNC_FMT_4(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_4 (x)
+#define RUN_SAT_S_TRUNC_FMT_4_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_4(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_trunc-4-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-4-i16-to-i8.c
new file mode 100644
i

[PATCH] Allow SLP store of mixed external and costant

2024-10-09 Thread Richard Biener
vect_build_slp_tree_1 rejected this during SLP discovery because it
ran into the rhs code comparison code for stores.  The following
skips that completely for loads and stores as those are handled
later anyway.

This needs a heuristic adjustment in vect_get_and_check_slp_defs
to avoid fallout with regard to BB vectorization and splitting
of a store group vs. demoting one operand to external.

gcc.dg/Wstringop-overflow-47.c needs adjustment given we now have
vast improvements for code generation.  gcc.dg/strlenopt-32.c
needs adjustment because the strlen pass doesn't handle

  _11 = {0, b_6(D)};
  __builtin_memcpy (&a, "foo.bar", 8);
  MEM  [(char *)&a + 3B] = _11;
  _9 = strlen (&a);

I have opened PR117057 for this.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-vect-slp.cc (vect_build_slp_tree_1): Do not compare
RHS codes for loads or stores.
(vect_get_and_check_slp_defs): Only demote operand to external
in case there is more than one operand.

* gcc.dg/vect/slp-57.c: New testcase.
* gcc.dg/Wstringop-overflow-47.c: Adjust.
* gcc.dg/strlenopt-32.c: XFAIL parts.
---
 gcc/testsuite/gcc.dg/Wstringop-overflow-47.c |  6 ++---
 gcc/testsuite/gcc.dg/strlenopt-32.c  |  3 ++-
 gcc/testsuite/gcc.dg/vect/slp-57.c   | 14 
 gcc/tree-vect-slp.cc | 24 ++--
 4 files changed, 26 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/slp-57.c

diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c 
b/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c
index 9fb78e55046..aa5402a060f 100644
--- a/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c
+++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c
@@ -31,15 +31,15 @@ void nowarn_c32 (char c)
 
 void warn_c32 (char c)
 {
-  extern char warn_a32[32];   // { dg-message "at offset (32|1) into 
destination object 'warn_a32' of size 32" "pr97027" }
+  extern char warn_a32[32];   // { dg-message "at offset (32|1|17) into 
destination object 'warn_a32' of size 32" "pr97027" }
 
   void *p = warn_a32 + 1;
-  *(C32*)p = (C32){ c };  // { dg-warning "writing (1 byte|32 bytes) into 
a region of size (0|31)" "pr97027" }
+  *(C32*)p = (C32){ c };  // { dg-warning "writing (1 byte|16 bytes|32 
bytes) into a region of size (0|15|31)" "pr97027" }
 
   /* Verify a local variable too. */
   char a32[32];
   p = a32 + 1;
-  *(C32*)p = (C32){ c };  // { dg-warning "writing (1 byte|32 bytes) into 
a region of size (0|31)" "pr97027" }
+  *(C32*)p = (C32){ c };  // { dg-warning "writing (1 byte|16 bytes|32 
bytes) into a region of size (0|15|31)" "pr97027" }
   sink (p);
 }
 
diff --git a/gcc/testsuite/gcc.dg/strlenopt-32.c 
b/gcc/testsuite/gcc.dg/strlenopt-32.c
index 4220314fb3f..c53168570fd 100644
--- a/gcc/testsuite/gcc.dg/strlenopt-32.c
+++ b/gcc/testsuite/gcc.dg/strlenopt-32.c
@@ -190,4 +190,5 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" } } */
+/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" { xfail 
vect_slp_v2qi_store_unalign } } } */
+/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen1" { target 
vect_slp_v2qi_store_unalign } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-57.c 
b/gcc/testsuite/gcc.dg/vect/slp-57.c
new file mode 100644
index 000..a35c4ef6203
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-57.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int a[1024];
+void foo (int x)
+{
+  for (int i = 0; i < 1024; i += 2)
+{
+  a[i] = x;
+  a[i+1] = 1;
+}
+}
+
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 597387d9710..00d5ae7fc48 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -905,7 +905,8 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char 
swap,
}
 
  if (is_a  (vinfo)
- && !oprnd_info->any_pattern)
+ && !oprnd_info->any_pattern
+ && number_of_oprnds > 1)
{
  /* Now for commutative ops we should see whether we can
 make the other operand matching.  */
@@ -1305,10 +1306,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
  /* Mismatch.  */
  continue;
}
- if (first_stmt_code != rhs_code
+ if (!ldst_p
+ && first_stmt_code != rhs_code
  && alt_stmt_code == ERROR_MARK)
alt_stmt_code = rhs_code;
- if ((first_stmt_code != rhs_code
+ if ((!ldst_p
+  && first_stmt_code != rhs_code
   && (first_stmt_code != IMAGPART_EXPR
   || rhs_code != REALPART_EXPR)
   && (first_stmt_code != REALPART_EXPR
@@ -1325,20 +1328,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
&& 

Re: [PATCH] x86: Implement Fast-Math Float Truncation to BF16 via PSRLD Instruction

2024-10-09 Thread Hongtao Liu
On Tue, Oct 8, 2024 at 3:24 PM Levy Hsu  wrote:
>
> Bootstrapped and tested on x86_64-linux-gnu, OK for trunk?
Ok.
>
> gcc/ChangeLog:
>
> * config/i386/i386.md: Rewrite insn truncsfbf2.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/truncsfbf-1.c: New test.
> * gcc.target/i386/truncsfbf-2.c: New test.
> ---
>  gcc/config/i386/i386.md | 16 ++---
>  gcc/testsuite/gcc.target/i386/truncsfbf-1.c |  9 +++
>  gcc/testsuite/gcc.target/i386/truncsfbf-2.c | 65 +
>  3 files changed, 83 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/truncsfbf-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/truncsfbf-2.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 9c2a0aa6112..d3fee0968d8 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -5672,16 +5672,18 @@
> (set_attr "mode" "HF")])
>
>  (define_insn "truncsfbf2"
> -  [(set (match_operand:BF 0 "register_operand" "=x, v")
> +  [(set (match_operand:BF 0 "register_operand" "=x,x,v,Yv")
> (float_truncate:BF
> - (match_operand:SF 1 "register_operand" "x,v")))]
> -  "((TARGET_AVX512BF16 && TARGET_AVX512VL) || TARGET_AVXNECONVERT)
> -   && !HONOR_NANS (BFmode) && flag_unsafe_math_optimizations"
> + (match_operand:SF 1 "register_operand" "0,x,v,Yv")))]
> +  "TARGET_SSE2 && flag_unsafe_math_optimizations && !HONOR_NANS (BFmode)"
>"@
> +  psrld\t{$16, %0|%0, 16}
>%{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}
> -  vcvtneps2bf16\t{%1, %0|%0, %1}"
> -  [(set_attr "isa" "avxneconvert,avx512bf16vl")
> -   (set_attr "prefix" "vex,evex")])
> +  vcvtneps2bf16\t{%1, %0|%0, %1}
> +  vpsrld\t{$16, %1, %0|%0, %1, 16}"
> +  [(set_attr "isa" "noavx,avxneconvert,avx512bf16vl,avx")
> +   (set_attr "prefix" "orig,vex,evex,vex")
> +   (set_attr "type" "sseishft1,ssecvt,ssecvt,sseishft1")])
>
>  ;; Signed conversion to DImode.
>
> diff --git a/gcc/testsuite/gcc.target/i386/truncsfbf-1.c 
> b/gcc/testsuite/gcc.target/i386/truncsfbf-1.c
> new file mode 100644
> index 000..dd3ff8a50b4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/truncsfbf-1.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-msse2 -O2 -ffast-math" } */
> +/* { dg-final { scan-assembler-times "psrld" 1 } } */
> +
> +__bf16
> +foo (float a)
> +{
> +  return a;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/truncsfbf-2.c 
> b/gcc/testsuite/gcc.target/i386/truncsfbf-2.c
> new file mode 100644
> index 000..f4952f88fc9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/truncsfbf-2.c
> @@ -0,0 +1,65 @@
> +/* { dg-do run } */
> +/* { dg-options "-msse2 -O2 -ffast-math" } */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +__bf16
> +foo (float a)
> +{
> +  return a;
> +}
> +
> +static __bf16
> +CALC (float *a)
> +{
> +  uint32_t bits;
> +  memcpy (&bits, a, sizeof (bits));
> +  bits >>= 16;
> +  uint16_t bfloat16_bits = (uint16_t) bits;
> +  __bf16 bf16;
> +  memcpy (&bf16, &bfloat16_bits, sizeof (bf16));
> +  return bf16;
> +}
> +
> +int
> +main (void)
> +{
> +  float test_values[] = { 0.0f, -0.0f, 1.0f, -1.0f, 0.5f, -0.5f, 1000.0f, 
> -1000.0f,
> +  3.1415926f, -3.1415926f, 1e-8f, -1e-8f,
> +  1.0e+38f, -1.0e+38f, 1.0e-38f, -1.0e-38f };
> +  size_t num_values = sizeof (test_values) / sizeof (test_values[0]);
> +
> +  for (size_t i = 0; i < num_values; ++i)
> +{
> +  float original = test_values[i];
> +  __bf16 hw_bf16 = foo (original);
> +  __bf16 sw_bf16 = CALC (&original);
> +
> +  /* Verify psrld $16, %0 == %0 >> 16 */
> +  if (memcmp (&hw_bf16, &sw_bf16, sizeof (__bf16)) != 0)
> +abort ();
> +
> +  /* Reconstruct the float value from the __bf16 bits */
> +  uint16_t bf16_bits;
> +  memcpy (&bf16_bits, &hw_bf16, sizeof (bf16_bits));
> +  uint32_t reconstructed_bits = ((uint32_t) bf16_bits) << 16;
> +  float converted;
> +  memcpy (&converted, &reconstructed_bits, sizeof (converted));
> +
> +  float diff = fabsf (original - converted);
> +
> +  /* Expected Maximum Precision Loss */
> +  uint32_t orig_bits;
> +  memcpy (&orig_bits, &original, sizeof (orig_bits));
> +  int exponent = ((orig_bits >> 23) & 0xFF) - 127;
> +  float expected_loss = (exponent == -127)
> +? ldexpf (1.0f, -126 - 7)
> +: ldexpf (1.0f, exponent - 7);
> +  if (diff > expected_loss)
> +abort ();
> +}
> +  return 0;
> +}
> --
> 2.31.1
>


-- 
BR,
Hongtao


Re: [PATCH v13 0/4] c: Add __lengthof__ operator

2024-10-09 Thread Xavier Del Campo Romero
Hello,

Could you please unCC me from this discussion? Despite I originally made this 
proposal, I no longer have an opinion on the subject and there is not much I 
can add to the discussion anyway.

Thank you all very much for your efforts into improving C.

Best regards,
--
Xavier Del Campo Romero



9 Oct 2024, 23:20 by a...@kernel.org:

> On Wed, Oct 09, 2024 at 09:11:52PM GMT, Joseph Myers wrote:
>
>> On Wed, 9 Oct 2024, Alejandro Colomar wrote:
>>
>> > Every little bit adds up.  Documentation is simpler if there is naming
>> > consistency.  We have SYNOPSISes in the man pages, and they're up front,
>> > because they constitute an important part of the documentation.
>>
>> We also have a convention for future standard C interfaces to put the 
>> length before the pointer so that a VLA parameter declaration can be used 
>> that makes very clear the intent for how many elements the array has, 
>> which seems much better for that purpose than relying on the name of a 
>> parameter.
>>
>
> I doubt that this will be doable for string functions.  Even newer
> additions to  will most likely have the size as the last
> element, if just for consistency with the existing APIs.  And this issue
> is primarily a string issue, so it won't be solved.
>
> [.identifier] is more likely to help with this.
>
> Cheers,
> Alex
>
>>
>> -- 
>> Joseph S. Myers
>> josmy...@redhat.com
>>
>
> -- 
> 
>


[PATCH] RISC-V:Bugfix for C++ code compilation failure with rv32imafc_zve32f[pr116883]

2024-10-09 Thread Li Xu
From: xuli 

Example as follows:

int main()
{
  unsigned long arraya[128], arrayb[128], arrayc[128];
  for (int i = 0; i < 128; i++)
   {
  arraya[i] = arrayb[i] + arrayc[i];
   }
  return 0;
}

Compiled with -march=rv32imafc_zve32f -mabi=ilp32f, it will cause a compilation 
issue:

riscv_vector.h:40:25: error: ambiguating new declaration of 'vint64m4_t 
__riscv_vle64(vbool16_t, const long long int*, unsigned int)'
   40 | #pragma riscv intrinsic "vector"
  | ^~~~
riscv_vector.h:40:25: note: old declaration 'vint64m1_t 
__riscv_vle64(vbool64_t, const long long int*, unsigned int)'

With zvl=32b, vbool16_t is registered in init_builtins() with
type_common.precision=0x101 (nunits=2), mode_nunits[E_RVVMF16BI]=[2,2].

Normally, vbool64_t is only valid when TARGET_MIN_VLEN > 32, so vbool64_t
is not registered in init_builtins(), meaning vbool64_t=null.

In order to implement __attribute__((target("arch=+v"))), we must register
all vector types and all RVV intrinsics. Therefore, vbool64_t will be registered
by default with zvl=128b in reinit_builtins(), resulting in
type_common.precision=0x101 (nunits=2) and mode_nunits[E_RVVMF64BI]=[2,2].

We then get TYPE_VECTOR_SUBPARTS(vbool16_t) == TYPE_VECTOR_SUBPARTS(vbool64_t),
calculated using type_common.precision, resulting in 2. Since vbool16_t and
vbool64_t have the same element type (boolean_type), the compiler treats them
as the same type, leading to a re-declaration conflict.

After all types and intrinsics have been registered, processing
__attribute__((target("arch=+v"))) will update the parameters option and
init_adjust_machine_modes. Therefore, to avoid conflicts, we can choose
zvl=4096b for the null type reinit_builtins().

command option zvl=32b
  type nunits
  vbool64_t => null
  vbool32_t=> [1,1]
  vbool16_t=> [2,2]
  vbool8_t=>  [4,4]
  vbool4_t=>  [8,8]
  vbool2_t=>  [16,16]
  vbool1_t=>  [32,32]

reinit zvl=128b
  vbool64_t => [2,2] conflict with zvl32b vbool16_t=> [2,2]
reinit zvl=256b
  vbool64_t => [4,4] conflict with zvl32b vbool8_t=>  [4,4]
reinit zvl=512b
  vbool64_t => [8,8] conflict with zvl32b vbool4_t=>  [8,8]
reinit zvl=1024b
  vbool64_t => [16,16] conflict with zvl32b vbool2_t=>  [16,16]
reinit zvl=2048b
  vbool64_t => [32,32] conflict with zvl32b vbool1_t=>  [32,32]
reinit zvl=4096b
  vbool64_t => [64,64] zvl=4096b is ok

Signed-off-by: xuli 

PR target/116883

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_pragma_intrinsic_flags_pollute):choose 
zvl4096b to initialize null type.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/pr116883.C: New test.
---
 gcc/config/riscv/riscv-c.cc   |  7 ++-
 .../g++.target/riscv/rvv/base/pr116883.C  | 15 +++
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 71112d9c66d..c59f408d3a8 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -59,7 +59,12 @@ riscv_pragma_intrinsic_flags_pollute (struct 
pragma_intrinsic_flags *flags)
   riscv_zvl_flags = riscv_zvl_flags
 | MASK_ZVL32B
 | MASK_ZVL64B
-| MASK_ZVL128B;
+| MASK_ZVL128B
+| MASK_ZVL256B
+| MASK_ZVL512B
+| MASK_ZVL1024B
+| MASK_ZVL2048B
+| MASK_ZVL4096B;
 
   riscv_vector_elen_flags = riscv_vector_elen_flags
 | MASK_VECTOR_ELEN_32
diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C
new file mode 100644
index 000..15bbec40bdd
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C
@@ -0,0 +1,15 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imafc_zve32f -mabi=ilp32f" } */
+
+#include 
+
+int main()
+{
+  unsigned long arraya[128], arrayb[128], arrayc[128];
+  for (int i; i < 128; i++)
+   {
+  arraya[i] = arrayb[i] + arrayc[i];
+   }
+  return 0;
+}
-- 
2.17.1



Re: [RFC/PATCH] libgcc: sh: Use soft-fp for non-hosted SH3/SH4

2024-10-09 Thread Oleg Endo
On Wed, 2024-07-03 at 11:59 +0200, Sébastien Michelland wrote:
> libgcc's fp-bit.c is quite slow and most modern/developed architectures
> have switched to using the soft-fp library. This patch does so for
> free-standing/unknown-OS SH3/SH4 builds, using soft-fp's default parameters
> for the most part, most notably no exceptions.
> 
> A quick run of Whetstone (built with OpenLibm) on an SH4 machine shows
> about x3 speedup (~320 -> 1050 Kwhets/s).
> 
> I'm sending this as RFC because I'm quite unsure about testing. I built
> the compiler and ran the benchmark, but I don't know if GCC has a test
> for soft-fp correctness and whether I can run that in my non-hosted
> environment. Any advice?
> 

As discussed, the patch was changed to use soft-fp not only for SH3/SH4 but
generally for all sh-elf targets.

sh-*-linux* and sh-*-rtems* are not affected by that and continue using the
fdpbit library for floating-point emulation on no-fpu variants.  If that
should be changed as well, please let me know.


Tested with

make -k check RUNTESTFLAGS="--target_board=sh-sim\{-m2/-ml,-m2/-mb}"

committed & pushed the attached version to master.


Best regards,
Oleg Endo


From e95512e2d5a317e8c043f232158df4b38186e51c Mon Sep 17 00:00:00 2001
From: Sébastien Michelland 
Date: Thu, 10 Oct 2024 09:24:39 +0900
Subject: [PATCH] SH: Use softfp for sh-elf

libgcc/ChangeLog:

	PR target/29845
	* config.host (sh-*-elf*): Replace fdpbit with softfp.
	* config/sh/sfp-machine.h: New file.

Signed-off-by: Sébastien Michelland 
---
 libgcc/config.host |  2 +-
 libgcc/config/sh/sfp-machine.h | 83 +++
 2 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 libgcc/config/sh/sfp-machine.h

diff --git a/libgcc/config.host b/libgcc/config.host
index fa001c5..06fae15 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1413,9 +1413,9 @@ s390x-ibm-tpf*)
 	extra_parts="crtbeginS.o crtendS.o"
 	md_unwind_header=s390/tpf-unwind.h
 	;;
 sh-*-elf* | sh[12346l]*-*-elf*)
-	tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-fdpbit"
+	tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-softfp-sfdf t-softfp"
 	extra_parts="$extra_parts crt1.o crti.o crtn.o crtbeginS.o crtendS.o \
 		libic_invalidate_array_4-100.a \
 		libic_invalidate_array_4-200.a \
 		libic_invalidate_array_4a.a \
diff --git a/libgcc/config/sh/sfp-machine.h b/libgcc/config/sh/sfp-machine.h
new file mode 100644
index 000..26f6516
--- /dev/null
+++ b/libgcc/config/sh/sfp-machine.h
@@ -0,0 +1,83 @@
+/* Software floating-point machine description for SuperH.
+
+Copyright (C) 2024 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+.  */
+
+#define _FP_W_TYPE_SIZE	32
+#define _FP_W_TYPE	unsigned long
+#define _FP_WS_TYPE	signed long
+#define _FP_I_TYPE	long
+
+#define _FP_MUL_MEAT_S(R,X,Y) \
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y) \
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y) \
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_udiv_norm(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_B	_FP_QNANBIT_B
+#define _FP_NANFRAC_H	_FP_QNANBIT_H
+#define _FP_NANFRAC_S	_FP_QNANBIT_S
+#define _FP_NANFRAC_D	_FP_QNANBIT_D, 0
+#define _FP_NANFRAC_Q	_FP_QNANBIT_Q, 0, 0, 0
+
+/* The type of the result of a floating point comparison.  This must
+   match __libgcc_cmp_return__ in GCC for the target.  */
+typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+#define CMPtype __gcc_CMPtype
+
+#define _FP_NANSIGN_B	0
+#define _FP_NANSIGN_H	0
+#define _FP_NANSIGN_S	0
+#define _FP_NANSIGN_D	0
+#define _FP_NANSIGN_Q	0
+
+#define _FP_KEEPNANFRACP 0
+#define _FP_QNANNEGATEDP 0
+
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)  \
+  do {  \
+R##_s = _FP_NANSIGN_##fs;   \
+_FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs);  \
+R##_c = FP_CLS_NAN

[PATCH] i386: Fix scalar VCOMSBF16 which only compares low word

2024-10-09 Thread Kong, Lingling
Hi,

Fixed scalar VCOMSBF16 misused in AVX10.2.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m64}.

Ok for trunk?

gcc/ChangeLog:

* config/i386/sse.md (avx10_2_comsbf16_v8bf): Fixed scalar
operands.
---
 gcc/config/i386/sse.md | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 
d6e2135423d..a529849898e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -32332,8 +32332,12 @@
 (define_insn "avx10_2_comsbf16_v8bf"
   [(set (reg:CCFP FLAGS_REG)
(unspec:CCFP
- [(match_operand:V8BF 0 "register_operand" "v")
-  (match_operand:V8BF 1 "nonimmediate_operand" "vm")]
+ [(vec_select:BF
+(match_operand:V8BF 0 "register_operand" "v")
+(parallel [(const_int 0)]))
+  (vec_select:BF
+(match_operand:V8BF 1 "nonimmediate_operand" "vm")
+(parallel [(const_int 0)]))]
 UNSPEC_VCOMSBF16))]
   "TARGET_AVX10_2_256"
   "vcomsbf16\t{%1, %0|%0, %1}"
--
2.31.1



[PATCH] i386: Fix some patterns's mem attribute.

2024-10-09 Thread Hu, Lin1
Hi, all

This is another patch to modify some pattern's type attr from ssemov to
ssemov2.

Some ssemov pattern's mem attr should be load when their 2 operand is a memory
operand.

Bootstrapped and regtested on x86-64-linux-pc, OK for trunk?

BRs,
Lin

gcc/ChangeLog:

* config/i386/sse.md
(sse_movhlps): Change type attr from ssemov to ssemov2.
(sse_loadhps): Ditto.
(*vec_concat): Ditto.
(vec_setv2df_0): Ditto.
(sse_loadlps): Change attr from ssemov to ssemov2 except for 2, 3.
(sse2_loadhps): Change attr from ssemov to ssemov2 except for 0, 1.
(sse2_loadlpd): Change attr from ssemov to ssemov2 except for 0, 1,
2.
(sse2_movsd_): Change attr from ssemov to ssemov2 except for 5.
(vec_concatv2df): Change attr from ssemov to ssemov2 except for 0, 1,
2.
(*vec_concat): Change attr from ssemov to ssemov2 for 3, 4.
(vec_concatv2di): Change attr from ssemov to ssemov2 except for 0, 1,
2, 3, 4, 5.
---
 gcc/config/i386/sse.md | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ccef3e063ec..a45b50ad732 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10995,7 +10995,7 @@ (define_insn "sse_movhlps"
vmovlps\t{%H2, %1, %0|%0, %1, %H2}
%vmovhps\t{%2, %0|%q0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
(set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
@@ -11557,7 +11557,7 @@ (define_insn "sse_loadhps"
vmovlhps\t{%2, %1, %0|%0, %1, %2}
%vmovlps\t{%2, %H0|%H0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
(set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
(set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
 
@@ -11610,7 +11610,7 @@ (define_insn "sse_loadlps"
vmovlps\t{%2, %1, %0|%0, %1, %q2}
%vmovlps\t{%2, %0|%q0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
-   (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
+   (set_attr "type" "sseshuf,sseshuf,ssemov2,ssemov2,ssemov")
(set (attr "length_immediate")
  (if_then_else (eq_attr "alternative" "0,1")
   (const_string "1")
@@ -11766,7 +11766,7 @@ (define_insn "*vec_concat"
movhps\t{%2, %0|%0, %q2}
vmovhps\t{%2, %1, %0|%0, %1, %q2}"
   [(set_attr "isa" "noavx,avx,noavx,avx")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
(set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
 
@@ -12214,7 +12214,7 @@ (define_insn "vec_setv2df_0"
movlpd\t{%2, %0|%0, %2}
vmovlpd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
(set_attr "mode" "DF")])
 
 (define_expand "vec_set"
@@ -14665,7 +14665,7 @@ (define_insn "sse2_loadhpd"
#
#"
   [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
-   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
+   (set_attr "type" "ssemov2,ssemov2,sselog,sselog,ssemov,fmov,imov")
(set (attr "prefix_data16")
  (if_then_else (eq_attr "alternative" "0")
   (const_string "1")
@@ -14735,6 +14735,8 @@ (define_insn "sse2_loadlpd"
  (const_string "fmov")
(eq_attr "alternative" "10")
  (const_string "imov")
+   (eq_attr "alternative" "0,1,2")
+ (const_string "ssemov2")
   ]
   (const_string "ssemov")))
(set (attr "prefix_data16")
@@ -14787,7 +14789,7 @@ (define_insn "sse2_movsd_"
  (if_then_else
(eq_attr "alternative" "5")
(const_string "sselog")
-   (const_string "ssemov")))
+   (const_string "ssemov2")))
(set (attr "prefix_data16")
  (if_then_else
(and (eq_attr "alternative" "2,4")
@@ -14859,7 +14861,7 @@ (define_insn "vec_concatv2df"
  (if_then_else
(eq_attr "alternative" "0,1,2")
(const_string "sselog")
-   (const_string "ssemov")))
+   (const_string "ssemov2")))
(set (attr "prefix_data16")
(if_then_else (eq_attr "alternative" "3")
  (const_string "1")
@@ -21545,7 +21547,7 @@ (define_insn "*vec_concat"
movhps\t{%2, %0|%0, %q2}
vmovhps\t{%2, %1, %0|%0, %1, %q2}"
   [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
-   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
+   (set_attr "type" "sselog,sselog,ssemov,ssemov2,ssemov2")
(set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
(set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
 
@@ -21653,7 +21655,7 @@ (define_insn "vec_concatv2di"
  (if_then_else
(eq_attr "alternative" "0,1,2,3,4,5")
(const_string "sselog")
-   (const_string "ssemov")))
+   (const_string "ssemov2")))
(set (attr "add