date:20250707

[PATCH, FYI] [vxworks] [x86] disable vxworks6 PIC on vxworks7

2025-07-07 Thread Alexandre Oliva



VxWorks6 used symbols __GOTT_BASE__ and __GOTT_INDEX__ to obtain the
address of the global offset table.  Starting with VxWorks7, that is
no longer the case, but we've still issued these symbols in
output_set_got.  Do that only with VxWorks<7.

Switching to the call-based PIC register sequence, we have to set the
flag that prevents the use of the red zone, and AFAICT the reasons
that ruled out GOTOFF and other relative addressing no longer apply to
VxWorks7+.

Bootstrapped on x86_64-linux-gnu; regression-tested on i586-vx7r2,
x86_64-vx7r2, ppc-vx7r2, ppc64-vx7r2, arm-vx7r2, and aarch64-vx7r2,
along with other patches.  I'm checking this in, pre-approved by Olivier
Hainque in his role of maintainer of VxWorks ports.


for  gcc/ChangeLog

* config/vxworks-dummy.h (TARGET_VXWORKS_VAROFF): New.
(TARGET_VXWORKS_GOTTPIC): New.
* config/vxworks.h (TARGET_VXWORKS_VAROFF): Override.
(TARGET_VXWORKS_GOTTPIC): Likewise.
* config/i386/i386.cc (output_set_got): Disable VxWorks6 GOT
sequence on VxWorks7.
(legitimize_pic_address): Accept relative addressing of
labels on VxWorks7.
(ix86_delegitimize_address_1): Likewise.
(ix86_output_addr_diff_elt): Likewise.
* config/i386/i386.md (tablejump): Likewise.
(set_got, set_got_labelled): Set no-red-zone flag on VxWorks7.
---
 gcc/config/i386/i386.cc   |8 
 gcc/config/i386/i386.md   |6 +++---
 gcc/config/i386/predicates.md |3 ++-
 gcc/config/vxworks-dummy.h|   12 
 gcc/config/vxworks.h  |   12 
 5 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b64175d6c9398..fd3f35de14d3d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -6526,7 +6526,7 @@ output_set_got (rtx dest, rtx label)
 
   xops[0] = dest;
 
-  if (TARGET_VXWORKS_RTP && flag_pic)
+  if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic)
 {
   /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
   xops[2] = gen_rtx_MEM (Pmode,
@@ -12245,7 +12245,7 @@ legitimize_pic_address (rtx orig, rtx reg)
   else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
   /* We can't always use @GOTOFF for text labels
  on VxWorks, see gotoff_operand.  */
-  || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
+  || (TARGET_VXWORKS_VAROFF && GET_CODE (addr) == LABEL_REF))
 {
 #if TARGET_PECOFF
   rtx tmp = legitimize_pe_coff_symbol (addr, true);
@@ -13472,7 +13472,7 @@ ix86_delegitimize_address_1 (rtx x, bool base_term_p)
   else if (base_term_p
   && pic_offset_table_rtx
   && !TARGET_MACHO
-  && !TARGET_VXWORKS_RTP)
+  && !TARGET_VXWORKS_VAROFF)
{
  rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
  tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
@@ -15872,7 +15872,7 @@ ix86_output_addr_diff_elt (FILE *file, int value, int 
rel)
   gcc_assert (!TARGET_64BIT);
 #endif
   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
-  if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+  if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
 fprintf (file, "%s%s%d-%s%d\n",
 directive, LPREFIX, value, LPREFIX, rel);
 #if TARGET_MACHO
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 21b9f5ccd7a1d..5825acabb9461 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -20102,7 +20102,7 @@ (define_expand "tablejump"
 
   /* We can't use @GOTOFF for text labels on VxWorks;
 see gotoff_operand.  */
-  if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+  if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
{
  code = PLUS;
  op0 = operands[0];
@@ -20970,7 +20970,7 @@ (define_expand "set_got"
   (clobber (reg:CC FLAGS_REG))])]
   "!TARGET_64BIT"
 {
-  if (flag_pic && !TARGET_VXWORKS_RTP)
+  if (flag_pic && !TARGET_VXWORKS_GOTTPIC)
 ix86_pc_thunk_call_expanded = true;
 })
 
@@ -20991,7 +20991,7 @@ (define_expand "set_got_labelled"
   (clobber (reg:CC FLAGS_REG))])]
   "!TARGET_64BIT"
 {
-  if (flag_pic && !TARGET_VXWORKS_RTP)
+  if (flag_pic && !TARGET_VXWORKS_GOTTPIC)
 ix86_pc_thunk_call_expanded = true;
 })
 
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 1bd63b2367e13..3afaf83a7a0c5 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -664,8 +664,9 @@ (define_predicate "local_func_symbolic_operand"
 ;; same segment as the GOT.  Unfortunately, the flexibility of linker
 ;; scripts means that we can't be sure of that in general, so assume
 ;; @GOTOFF is not valid on VxWorks, except with the large code model.
+;; The comments above seem to apply only to VxWorks releases before 7.
 (define_predicate "gotoff_operand"
-  (and (ior (not (match_test "TARGET_VXWORKS_RTP"))
+  (and (ior (not

[committed] Minor fix to gcc.dg/torture/pr120654.c

2025-07-07 Thread Jeff Law

I don't recall which port complained, but pr120654.c was failing on one 
or more of the embedded targets due to the use of malloc/free.  This 
change just turns them into the __builtin variants which makes everyone 
happy again.


Pushing to the trunk.

jeffcommit 477abe67d34f7f8d1b0b12408e08769995b6ca9e
Author: Jeff Law 
Date:   Mon Jul 7 20:48:17 2025 -0600

[committed] Minor fix to gcc.dg/torture/pr120654.c

I don't recall which port complained, but pr120654.c was failing on one or 
more
of the embedded targets due to the use of malloc/free.  This change just 
turns
them into the __builtin variants which makes everyone happy again.

gcc/testsuite
* gcc.dg/torture/pr120654.c: Use __builtin variants of malloc and 
free.

diff --git a/gcc/testsuite/gcc.dg/torture/pr120654.c 
b/gcc/testsuite/gcc.dg/torture/pr120654.c
index 3819b78281d..aacfeea29c9 100644
--- a/gcc/testsuite/gcc.dg/torture/pr120654.c
+++ b/gcc/testsuite/gcc.dg/torture/pr120654.c
@@ -2,8 +2,6 @@
 
 int a, c, e, f, h, j;
 long g, k;
-void *malloc(long);
-void free(void *);
 int b(int m) {
   if (m || a)
 return 1;
@@ -16,9 +14,9 @@ int i() {
 }
 void n() {
   long o;
-  int *p = malloc(sizeof(int));
+  int *p = __builtin_malloc(sizeof(int));
   k = 1 % j;
   for (; i() + f + h; o++)
 if (p[d(j + 6, (int)k + 1992695866) + h + f + j + (int)k - 1 + o])
-  free(p);
+  __builtin_free(p);
 }

Re: [PUSHED] GCN, nvptx: Support '-mfake-exceptions', and use it for offloading compilation [PR118794]

2025-07-07 Thread Thomas Schwinge

Hi Jan-Benedict!

On 2025-06-05T10:24:35+0200, Jan-Benedict Glaw  wrote:
> On Tue, 2025-04-15 00:22:26 +0200, Thomas Schwinge  
> wrote:
>> diff --git a/gcc/except.cc b/gcc/except.cc
>> index d5eb9274a62..205811c6567 100644
>> --- a/gcc/except.cc
>> +++ b/gcc/except.cc
>> @@ -970,12 +970,26 @@ expand_dw2_landing_pad_for_region (eh_region region)
>>  { /* Nothing */ }
>>  
>>if (region->exc_ptr_reg)
>> -emit_move_insn (region->exc_ptr_reg,
>> -gen_rtx_REG (ptr_mode, EH_RETURN_DATA_REGNO (0)));
>> +{
>> +  rtx exc_ptr_reg;
>> +  if (EH_RETURN_DATA_REGNO (0) != INVALID_REGNUM)
>
> This ...
>
>> +exc_ptr_reg = gen_rtx_REG (ptr_mode, EH_RETURN_DATA_REGNO (0));
>> +  else
>> +/* The target must be doing something special.  Submit a dummy.  */
>> +exc_ptr_reg = constm1_rtx;
>> +  emit_move_insn (region->exc_ptr_reg, exc_ptr_reg);
>> +}
>>if (region->filter_reg)
>> -emit_move_insn (region->filter_reg,
>> -gen_rtx_REG (targetm.eh_return_filter_mode (),
>> - EH_RETURN_DATA_REGNO (1)));
>> +{
>> +  rtx filter_reg;
>> +  if (EH_RETURN_DATA_REGNO (1) != INVALID_REGNUM)
>
> ...and this result in an signed <-> unsigned warning for targets
> that implement EH_RETURN_DATA_REGNO as a function (which usually
> returns just int), affecting at least m32c and mmix.

So shouldn't these signed 'int's get fixed up?  Per my understanding,
GCC register numbers explicitly are 'unsigned int':

gcc/rtl.h:extern rtx gen_rtx_REG (machine_mode, unsigned int);

gcc/rtl.h:#define INVALID_REGNUM(~(unsigned int) 0)


Grüße
 Thomas

Re: [PATCH 2/2] add masked-epilogue tuning

2025-07-07 Thread Hongtao Liu

On Mon, Jul 7, 2025 at 3:18 PM Hongtao Liu  wrote:
>
> On Fri, Jul 4, 2025 at 5:45 PM Richard Biener  wrote:
> >
> > The following adds a x86 tuning to enable the use of AVX512 masked
> > epilogues in cases we heuristically determine it to be not detrimental
> > by high chance.  Basically problematic cases are when there are
> > data streams that are both stored and loaded from and an outer loop
> > could end up executing only the inner loop masked epilogue and with
> > unlucky data stream advacement from the outer loop end up needing
> > to forward from masked stores to masked loads.  This isn't very
> > well handled, esp. for the case where unmasked operations would
> > not need to forward at all - that is, when forwarding completely
> > from the masked out portion of the store (like the AVX upper half
> > to the AVX lower half of a load).  There's also the case where
> > the number of iterations is known at compile time, only with
> > cost comparing we'd consider a non-masked epilog - as we are not
> > doing that we have to add heuristics to avoid masking when a
> > single vector epilog iteration would cover all scalar iterations
> > left (this is exercised by gcc.target/i386/pr110310.c).
> >
> > SPEC CPU 2017 shows 3% text size savings over not using masked
> > epilogues with performance impact in the noise.  Masking all vector
> > epilogues gets that to 4% text size savings with some major
> > runtime regressions in 503.bwaves_r and 527.cam4_r
> > (measured on a Zen4 system), we're leaving a 5% improvement
> > for 549.fotonik3d_r unrealized with the implemented heuristic.
> It looks interesting.
> I'll try with avx256_masked_epilougues to see if there's something unusual.
Oh, no need for a new tune, avx512_masked_epilogues can directly be
applied to those avx256_optimal avx512 processors, great!!!
> >
> > With the heuristics we turn 22513 vector epilogues + up to 12305 scalar
> > epilogues into 12305 masked vector epilogues of which 574 are for
> > AVX vector sizes, 79 for SSE vector sizes and the rest for AVX512.
> > When masking all epilogues we get 14567 of them from
> > 29467 vector + up to 14567 scalar epilogues, so the heuristics disable
> > an additional 20% of masked epilogues.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> >
> > OK?
> >
> > Thanks,
> > Richard.
> >
> > * config/i386/x86-tune.def (X86_TUNE_AVX512_MASKED_EPILOGUES):
> > New tunable, default on for m_ZNVER4 and m_ZNVER5.
> > * config/i386/i386.cc (ix86_vector_costs::finish_cost): With
> > X86_TUNE_AVX512_MASKED_EPILOGUES and when the main loop
> > had a vectorization factor > 2 use a masked epilogue when
> > possible and when not obviously problematic.
> >
> > * gcc.target/i386/vect-mask-epilogue-1.c: New testcase.
> > * gcc.target/i386/vect-mask-epilogue-2.c: Likewise.
> > * gcc.target/i386/vect-epilogues-3.c: Adjust.
> > ---
> >  gcc/config/i386/i386.cc   | 59 +++
> >  gcc/config/i386/x86-tune.def  |  5 ++
> >  .../gcc.target/i386/vect-epilogues-3.c|  2 +-
> >  .../gcc.target/i386/vect-mask-epilogue-1.c| 11 
> >  .../gcc.target/i386/vect-mask-epilogue-2.c| 14 +
> >  5 files changed, 90 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/vect-mask-epilogue-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/vect-mask-epilogue-2.c
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index b64175d6c93..8e796ea4033 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -26295,6 +26295,65 @@ ix86_vector_costs::finish_cost (const vector_costs 
> > *scalar_costs)
> >&& LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
> >  m_suggested_epilogue_mode = V8QImode;
> >
> > +  /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
> > + a masked epilogue if that doesn't seem detrimental.  */
> > +  if (loop_vinfo
> > +  && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
> > +  && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
> > +  && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
> > +  && !OPTION_SET_P (param_vect_partial_vector_usage))
> > +{
> > +  bool avoid = false;
> > +  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> > + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
> > +   {
> > + unsigned int peel_niter
> > +   = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
> > + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
> > +   peel_niter += 1;
> > + /* When we know the number of scalar iterations of the epilogue,
> > +avoid masking when a single vector epilog iteration handles
> > +it in full.  */
> > + if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
> > +% LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_consta

Re: [PATCH v3] x86: Improve vector_loop/unrolled_loop for memset/memcpy

2025-07-07 Thread Hongtao Liu

On Mon, Jul 7, 2025 at 3:27 PM Hongtao Liu  wrote:
>
> On Tue, Jun 24, 2025 at 2:11 PM H.J. Lu  wrote:
> >
> > On Mon, Jun 23, 2025 at 2:24 PM H.J. Lu  wrote:
> > >
> > > On Wed, Jun 18, 2025 at 3:17 PM H.J. Lu  wrote:
> > > >
> > > > 1. Don't generate the loop if the loop count is 1.
> > > > 2. For memset with vector on small size, use vector if small size 
> > > > supports
> > > > vector, otherwise use the scalar value.
> > > > 3. Duplicate the promoted scalar value for vector.
> > > > 4. Always expand vector-version of memset for vector_loop.
> > > > 5. Use misaligned prologue if alignment isn't needed.  When misaligned
> > > > prologue is used, check if destination is actually aligned and update
> > > > destination alignment if aligned.
> > > >
> > > > The included tests show that codegen of vector_loop/unrolled_loop for
> > > > memset/memcpy are significantly improved.  For
> > > >
> > > > ---
> > > > void
> > > > foo (void *p1, size_t len)
> > > > {
> > > >   __builtin_memset (p1, 0, len);
> > > > }
> > > > ---
> > > >
> > > > with
> > > >
> > > > -O2 -minline-all-stringops 
> > > > -mmemset-strategy=vector_loop:256:noalign,libcall:-1:noalign 
> > > > -march=x86-64
> > > >
> > > > we used to generate
> > > >
> > > > foo:
> > > > .LFB0:
> > > > .cfi_startproc
> > > > movq%rdi, %rax
> > > > pxor%xmm0, %xmm0
> > > > cmpq$64, %rsi
> > > > jnb .L18
> > > > .L2:
> > > > andl$63, %esi
> > > > je  .L1
> > > > xorl%edx, %edx
> > > > testb   $1, %sil
> > > > je  .L5
> > > > movl$1, %edx
> > > > movb$0, (%rax)
> > > > cmpq%rsi, %rdx
> > > > jnb .L19
> > > > .L5:
> > > > movb$0, (%rax,%rdx)
> > > > movb$0, 1(%rax,%rdx)
> > > > addq$2, %rdx
> > > > cmpq%rsi, %rdx
> > > > jb  .L5
> Lili found that the regression of 527.cam4_r (PR120943) is caused by
> more instructions due to the usage of movb instruction(takes more
> iterations) instead of original movq.
> The patch optimizes it with vector moves and solves the issue.

len with range_info will be inlined with vector_loop without
specifying  -mmemset-strategy=vector_loop:256:noalign,libcall:-1:noalign

void
foo (void *p1, int len)
{
if (len < 256)
  __builtin_memset (p1, 0, len);
}

>
> > > > .L1:
> > > > ret
> > > > .p2align 4,,10
> > > > .p2align 3
> > > > .L18:
> > > > movq%rsi, %rdx
> > > > xorl%eax, %eax
> > > > andq$-64, %rdx
> > > > .L3:
> > > > movups  %xmm0, (%rdi,%rax)
> > > > movups  %xmm0, 16(%rdi,%rax)
> > > > movups  %xmm0, 32(%rdi,%rax)
> > > > movups  %xmm0, 48(%rdi,%rax)
> > > > addq$64, %rax
> > > > cmpq%rdx, %rax
> > > > jb  .L3
> > > > addq%rdi, %rax
> > > > jmp .L2
> > > > .L19:
> > > > ret
> > > > .cfi_endproc
> > > >
> > > > with very poor prologue/epilogue.  With this patch, we now generate:
> > > >
> > > > foo:
> > > > .LFB0:
> > > > .cfi_startproc
> > > > pxor%xmm0, %xmm0
> > > > cmpq$64, %rsi
> > > > jnb .L2
> > > > testb   $32, %sil
> > > > jne .L19
> > > > testb   $16, %sil
> > > > jne .L20
> > > > testb   $8, %sil
> > > > jne .L21
> > > > testb   $4, %sil
> > > > jne .L22
> > > > testq   %rsi, %rsi
> > > > jne .L23
> > > > .L1:
> > > > ret
> > > > .p2align 4,,10
> > > > .p2align 3
> > > > .L2:
> > > > movups  %xmm0, -64(%rdi,%rsi)
> > > > movups  %xmm0, -48(%rdi,%rsi)
> > > > movups  %xmm0, -32(%rdi,%rsi)
> > > > movups  %xmm0, -16(%rdi,%rsi)
> > > > subq$1, %rsi
> > > > cmpq$64, %rsi
> > > > jb  .L1
> > > > andq$-64, %rsi
> > > > xorl%eax, %eax
> > > > .L9:
> > > > movups  %xmm0, (%rdi,%rax)
> > > > movups  %xmm0, 16(%rdi,%rax)
> > > > movups  %xmm0, 32(%rdi,%rax)
> > > > movups  %xmm0, 48(%rdi,%rax)
> > > > addq$64, %rax
> > > > cmpq%rsi, %rax
> > > > jb  .L9
> > > > ret
> > > > .p2align 4,,10
> > > > .p2align 3
> > > > .L23:
> > > > movb$0, (%rdi)
> > > > testb   $2, %sil
> > > > je  .L1
> > > > xorl%eax, %eax
> > > > movw%ax, -2(%rdi,%rsi)
> > > > ret
> > > > .p2align 4,,10
> > > > .p2align 3
> > > > .L19:
> > > > movups  %xmm0, (%rdi)
> > > > movups  %xmm0, 16(%rdi)
> > > > movups  %xmm0, -32(%rdi,%rsi)
> > > > movups  %xmm0, -16(%rdi,%rsi)
> > > > ret
> > > > .p2align 4,,10
> > > > .p2align 3
> > > > .L20:
> > > > movups  %xmm0, (%rdi)
> > > > mo

Re: [PATCH 2/2] add masked-epilogue tuning

2025-07-07 Thread Richard Biener

On Mon, 7 Jul 2025, Hongtao Liu wrote:

> On Mon, Jul 7, 2025 at 3:18 PM Hongtao Liu  wrote:
> >
> > On Fri, Jul 4, 2025 at 5:45 PM Richard Biener  wrote:
> > >
> > > The following adds a x86 tuning to enable the use of AVX512 masked
> > > epilogues in cases we heuristically determine it to be not detrimental
> > > by high chance.  Basically problematic cases are when there are
> > > data streams that are both stored and loaded from and an outer loop
> > > could end up executing only the inner loop masked epilogue and with
> > > unlucky data stream advacement from the outer loop end up needing
> > > to forward from masked stores to masked loads.  This isn't very
> > > well handled, esp. for the case where unmasked operations would
> > > not need to forward at all - that is, when forwarding completely
> > > from the masked out portion of the store (like the AVX upper half
> > > to the AVX lower half of a load).  There's also the case where
> > > the number of iterations is known at compile time, only with
> > > cost comparing we'd consider a non-masked epilog - as we are not
> > > doing that we have to add heuristics to avoid masking when a
> > > single vector epilog iteration would cover all scalar iterations
> > > left (this is exercised by gcc.target/i386/pr110310.c).
> > >
> > > SPEC CPU 2017 shows 3% text size savings over not using masked
> > > epilogues with performance impact in the noise.  Masking all vector
> > > epilogues gets that to 4% text size savings with some major
> > > runtime regressions in 503.bwaves_r and 527.cam4_r
> > > (measured on a Zen4 system), we're leaving a 5% improvement
> > > for 549.fotonik3d_r unrealized with the implemented heuristic.
> > It looks interesting.
> > I'll try with avx256_masked_epilougues to see if there's something unusual.
> Oh, no need for a new tune, avx512_masked_epilogues can directly be
> applied to those avx256_optimal avx512 processors, great!!!

Yes, it might be misnamed - it refers to the architectural masking
feature of AVX512 but extends to avx512vl, thus SSE and AVX2 vector
widths.  There's the possibility to add additional heuristics of course.

Richard.

> > >
> > > With the heuristics we turn 22513 vector epilogues + up to 12305 scalar
> > > epilogues into 12305 masked vector epilogues of which 574 are for
> > > AVX vector sizes, 79 for SSE vector sizes and the rest for AVX512.
> > > When masking all epilogues we get 14567 of them from
> > > 29467 vector + up to 14567 scalar epilogues, so the heuristics disable
> > > an additional 20% of masked epilogues.
> > >
> > > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> > >
> > > OK?
> > >
> > > Thanks,
> > > Richard.
> > >
> > > * config/i386/x86-tune.def (X86_TUNE_AVX512_MASKED_EPILOGUES):
> > > New tunable, default on for m_ZNVER4 and m_ZNVER5.
> > > * config/i386/i386.cc (ix86_vector_costs::finish_cost): With
> > > X86_TUNE_AVX512_MASKED_EPILOGUES and when the main loop
> > > had a vectorization factor > 2 use a masked epilogue when
> > > possible and when not obviously problematic.
> > >
> > > * gcc.target/i386/vect-mask-epilogue-1.c: New testcase.
> > > * gcc.target/i386/vect-mask-epilogue-2.c: Likewise.
> > > * gcc.target/i386/vect-epilogues-3.c: Adjust.
> > > ---
> > >  gcc/config/i386/i386.cc   | 59 +++
> > >  gcc/config/i386/x86-tune.def  |  5 ++
> > >  .../gcc.target/i386/vect-epilogues-3.c|  2 +-
> > >  .../gcc.target/i386/vect-mask-epilogue-1.c| 11 
> > >  .../gcc.target/i386/vect-mask-epilogue-2.c| 14 +
> > >  5 files changed, 90 insertions(+), 1 deletion(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/vect-mask-epilogue-1.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/vect-mask-epilogue-2.c
> > >
> > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > > index b64175d6c93..8e796ea4033 100644
> > > --- a/gcc/config/i386/i386.cc
> > > +++ b/gcc/config/i386/i386.cc
> > > @@ -26295,6 +26295,65 @@ ix86_vector_costs::finish_cost (const 
> > > vector_costs *scalar_costs)
> > >&& LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
> > >  m_suggested_epilogue_mode = V8QImode;
> > >
> > > +  /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
> > > + a masked epilogue if that doesn't seem detrimental.  */
> > > +  if (loop_vinfo
> > > +  && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
> > > +  && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
> > > +  && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
> > > +  && !OPTION_SET_P (param_vect_partial_vector_usage))
> > > +{
> > > +  bool avoid = false;
> > > +  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> > > + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
> > > +   {
> > > + unsigned int peel_niter
> > > +   = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);

[PATCH v2] libstdc++: Format chrono %a/%A/%b/%h/%B/%p using locale's time_put [PR117214]

2025-07-07 Thread Tomasz Kamiński

From: XU Kailiang 

C++ formatting locale could have a custom time_put that performs
differently from the C locale, so do not use __timepunct directly,
instead all of above specifiers use _M_locale_fmt.

For %a/%A/%b/%h/%B, the code handling the exception is now moved
to the _M_check_ok function, that is inovked before handling of the
conversion specifier. For time_points the values of months/weekday
are computed, and thus are always ok(), this information is indicated
by new _M_time_point member of the _ChronoSpec.

The different handling of j specifier for durations and time_points/
calendar types, is now handled using only _ChronoParts, and _M_time_only
_ChronoSpec is no longer needed, thus is was removed.

PR libstdc++/117214

libstdc++-v3/ChangeLog:

* include/bits/chrono_io.h (_ChronoSpec::_M_time_only): Remove.
(_ChronoSpec::_M_time_point): Define.
(__formatter_chrono::_M_parse): Use __parts to determine
interpretation of j.
(__formatter_chrono::_M_check_ok): Define.
(__formatter_chrono::_M_format_to): Invoke _M_check_ok.
(__formatter_chrono::_M_a_A, __formatter_chrono::_M_b_B): Move
exception throwing to _M_check_ok.
(__formatter_chrono::_M_j): Use _M_needs to define interpreation.
(__formatter_duration::_S_spec_for): Set _M_time_point.
* testsuite/std/time/format/pr117214_custom_timeput.cc: New
test.

Co-authored-by: Tomasz Kaminski 
Signed-off-by: XU Kailiang 
Signed-off-by: Tomasz Kaminski 
---
This patchs adjust the implementation as follows:
 * we use _M_locale_fmt for all specifiers
 * %h which is alias for %b is also covered

Tested on x86_64-linux localy.

 libstdc++-v3/include/bits/chrono_io.h | 58 ++-
 .../time/format/pr117214_custom_timeput.cc| 37 
 2 files changed, 81 insertions(+), 14 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/std/time/format/pr117214_custom_timeput.cc

diff --git a/libstdc++-v3/include/bits/chrono_io.h 
b/libstdc++-v3/include/bits/chrono_io.h
index 72cd569ccd6..863b3550e4f 100644
--- a/libstdc++-v3/include/bits/chrono_io.h
+++ b/libstdc++-v3/include/bits/chrono_io.h
@@ -280,8 +280,8 @@ namespace __format
   // in the format-spec, e.g. "{:L%a}" is localized and locale-specific,
   // but "{:L}" is only localized and "{:%a}" is only locale-specific.
   unsigned _M_locale_specific : 1;
-  // Indicates that we are handling duration.
-  unsigned _M_time_only : 1;
+  // Indicates that we are handling time_point.
+  unsigned _M_time_point : 1;
   // Indicates that duration should be treated as floating point.
   unsigned _M_floating_point_rep : 1;
   // Indicate that duration uses user-defined representation.
@@ -693,8 +693,10 @@ namespace __format
  __allowed_mods = _Mod_O;
  break;
case 'j':
- __needed = __spec._M_time_only ? _HoursMinutesSeconds
-: _DayOfYear;
+ __needed = __parts & _DayOfYear;
+ // 'j' is decimal number of days for durations
+ if (__needed == _None)
+   __needed = _HoursMinutesSeconds;
  break;
case 'm':
  __needed = _Month;
@@ -919,7 +921,13 @@ namespace __format
   {
switch (__conv)
  {
+ case 'a':
+ case 'A':
+ case 'b':
+ case 'B':
  case 'c':
+ case 'h':
+ case 'p':
  case 'r':
  case 'x':
  case 'X':
@@ -947,6 +955,32 @@ namespace __format
  return __out;
}
 
+  void
+  _M_check_ok(const _ChronoData<_CharT>& __t, _CharT __conv) const
+  {
+   // n.b. for time point all date parts are computed, so
+   // they are alwas ok.
+   if (_M_spec._M_time_point || _M_spec._M_debug)
+ return;
+
+   switch (__conv)
+   {
+   case 'a':
+   case 'A':
+ if (!__t._M_weekday.ok()) [[unlikely]]
+__throw_format_error("format error: invalid weekday");
+ return;
+   case 'b':
+   case 'h':
+   case 'B':
+ if (!__t._M_month.ok()) [[unlikely]]
+   __throw_format_error("format error: invalid month");
+ return;
+   default:
+ return;
+   }
+  }
+
   template
_OutIter
_M_format_to(const _ChronoData<_CharT>& __t, _OutIter __out,
@@ -1003,6 +1037,8 @@ namespace __format
  do
{
  _CharT __c = *__first++;
+ _M_check_ok(__t, __c);
+
  if (__use_locale_fmt && _S_localized_spec(__c, __mod)) 
[[unlikely]]
__out = _M_locale_fmt(std::move(__out), __fc.locale(),
  __tm, __c, __mod);
@@ -1153,11 +1189,8 @@ namespace __format
{
  // %a Locale's abbreviated weekday name.
  // %A Lo

Re: [PATCH v2] s390: Add some missing vector patterns.

2025-07-07 Thread Stefan Schulze Frielinghaus

On Wed, Jun 25, 2025 at 10:04:49AM +0200, Juergen Christ wrote:
> Some patterns that are detected by the autovectorizer can be supported by
> s390.  Add expanders such that autovectorization of these patterns works.
> 
> RTL for the builtins used unspec to represent highpart multiplication.
> Replace this by the correct RTL to allow further simplification.
> 
> Bootstrapped and regtested on s390.  Ok for trunk?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.md: Removed unused unspecs.
>   * config/s390/vector.md (avg3_ceil): New expander.
>   (uavg3_ceil): New expander.
>   (smul3_highpart): New expander.
>   (umul3_highpart): New expander.
>   * config/s390/vx-builtins.md (vec_umulh): Remove unspec.
>   (vec_smulh): Remove unspec.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/pattern-avg-1.c: New test.
>   * gcc.target/s390/vector/pattern-mulh-1.c: New test.
> 
> Signed-off-by: Juergen Christ 
> ---
>  gcc/config/s390/s390.md   |  3 --
>  gcc/config/s390/vector.md | 26 +
>  gcc/config/s390/vx-builtins.md| 10 +++
>  .../gcc.target/s390/vector/pattern-avg-1.c| 26 +
>  .../gcc.target/s390/vector/pattern-mulh-1.c   | 29 +++
>  5 files changed, 85 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
> 
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 97a4bdf96b2d..440ce93574f4 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -139,9 +139,6 @@
> UNSPEC_LCBB
>  
> ; Vector
> -   UNSPEC_VEC_SMULT_HI
> -   UNSPEC_VEC_UMULT_HI
> -   UNSPEC_VEC_SMULT_LO
> UNSPEC_VEC_SMULT_EVEN
> UNSPEC_VEC_UMULT_EVEN
> UNSPEC_VEC_SMULT_ODD
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 6f4e1929eb80..8d7ca1a520f3 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -3576,3 +3576,29 @@
>  ; vec_unpacks_float_lo
>  ; vec_unpacku_float_hi
>  ; vec_unpacku_float_lo
> +
> +(define_expand "avg3_ceil"
> +  [(set (match_operand:VIT_HW_VXE3_T0 
> "register_operand" "=v")
> + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 
> "register_operand"  "v")
> +(match_operand:VIT_HW_VXE3_T 2 
> "register_operand"  "v")]
> +   UNSPEC_VEC_AVG))]
> +  "TARGET_VX")

Expanders don't have constraints.

> +
> +(define_expand "uavg3_ceil"
> +  [(set (match_operand:VIT_HW_VXE3_T0 
> "register_operand" "=v")
> + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 
> "register_operand"  "v")
> +(match_operand:VIT_HW_VXE3_T 2 
> "register_operand"  "v")]
> +   UNSPEC_VEC_AVGU))]
> +  "TARGET_VX")

Ditto.

> +
> +(define_expand "smul3_highpart"
> +  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"   
> "=v")
> + (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 
> "register_operand" "v")
> +   (match_operand:VIT_HW_VXE3_DT 2 
> "register_operand" "v")))]
> +  "TARGET_VX")

Ditto.

> +
> +(define_expand "umul3_highpart"
> +  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"   
> "=v")
> + (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 
> "register_operand" "v")
> +   (match_operand:VIT_HW_VXE3_DT 2 
> "register_operand" "v")))]
> +  "TARGET_VX")

Ditto.


> diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
> index a7bb7ff92f5e..2478f74e161a 100644
> --- a/gcc/config/s390/vx-builtins.md
> +++ b/gcc/config/s390/vx-builtins.md
> @@ -983,9 +983,8 @@
>  ; vmhb, vmhh, vmhf, vmhg, vmhq
>  (define_insn "vec_smulh"
>[(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"   
> "=v")

^


Wrong indentation.

> - (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 
> "register_operand" "v")
> - (match_operand:VIT_HW_VXE3_DT 2 
> "register_operand" "v")]
> -UNSPEC_VEC_SMULT_HI))]
> + (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 
> "register_operand" "v")
> +   (match_operand:VIT_HW_VXE3_DT 2 
> "register_operand" "v")))]
>"TARGET_VX"
>"vmh\t%v0,%v1,%v2"
>[(set_attr "op_type" "VRR")])
> @@ -993,9 +992,8 @@
>  ; vmlhb, vmlhh, vmlhf, vmlhg, vmlhq
>  (define_insn "vec_umulh"
>[(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"   
> "=v")

Re: [PATCH v3 0/5] Implement mdspan.

2025-07-07 Thread Luc Grosheintz





On 7/7/25 11:01, Tomasz Kaminski wrote:

Hi,

The patches look good to me, the only real comment is about using header
guard, that I plan to do locally.
I am testing on full test suite now, and will merge them pending the
approval from Jonathan.

 From the patch that will set __cpp_lib_mdspan, do you want to create it? I
can also prepare that patch,
but given that you provided the whole implementation, it would be nice to
have also this one attributed to you.


I think I have it ready somewhere. I'll find it and send it.



On Fri, Jul 4, 2025 at 3:18 PM Luc Grosheintz 
wrote:


Additionally, I think we can set __cpp_lib_mdspan.


Yes, we should set it to value 202207. I can add this patch.



On 7/4/25 10:29, Luc Grosheintz wrote:

This patch series replaces:
https://gcc.gnu.org/pipermail/libstdc++/2025-June/062207.html

Addresses the review comments and improves commit messages. Most notably
the unnecessary patch to "strength" exception guarantees has been
removed (and tests moved into the mdspan commit).

The outstanding tasks for C++23 mdspan are:

- Update cxxapi-data.csv,
- Update C++23 status page.

IIUC, the cxxapi-data.csv can be updated separately in bulk for all of
C++23 mdspan and I can take care of it in a follow up patch. I'm not
entirely sure when and by whom the status page is updated.

Luc Grosheintz (5):
libstdc++: Check prerequisites of layout_*::operator().
libstdc++: Check prerequisite of extents::extents.
libstdc++: Restructure mdspan tests to reuse IntLike.
libstdc++: Implement __mdspan::__size.
libstdc++: Implement mdspan and tests [PR107761].

   libstdc++-v3/include/std/mdspan   | 314 -
   libstdc++-v3/src/c++23/std.cc.in  |   3 +-
   .../23_containers/mdspan/class_mandate_neg.cc |  41 ++
   .../mdspan/extents/class_mandates_neg.cc  |   2 +
   .../mdspan/extents/custom_integer.cc  |  27 +-
   .../mdspan/extents/extents_mismatch_neg.cc|  35 +
   .../23_containers/mdspan/extents/int_like.h   |  28 +
   .../23_containers/mdspan/layout_like.h|  80 +++
   .../mdspan/layouts/debug/out_of_bounds_neg.cc |  30 +
   .../testsuite/23_containers/mdspan/mdspan.cc  | 603 ++
   .../23_containers/mdspan/out_of_bounds_neg.cc |  24 +
   11 files changed, 1156 insertions(+), 31 deletions(-)
   create mode 100644

libstdc++-v3/testsuite/23_containers/mdspan/class_mandate_neg.cc

   create mode 100644

libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc

   create mode 100644

libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h

   create mode 100644

libstdc++-v3/testsuite/23_containers/mdspan/layout_like.h

   create mode 100644

libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc

   create mode 100644

libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc

   create mode 100644

libstdc++-v3/testsuite/23_containers/mdspan/out_of_bounds_neg.cc

Re: [PATCH v3 2/5] libstdc++: Check prerequisite of extents::extents.

2025-07-07 Thread Jonathan Wakely

On Fri, 4 Jul 2025 at 09:30, Luc Grosheintz  wrote:
>
> Previously the prerequisite of the extents ctors that
>
> static_extent(i) == dynamic_extent || extent(i) == other.extent(i).
>
> was not checked. This commit adds the __glibcxx_assert and test them.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan (extents): Check prerequisite of the ctor that
> static_extent(i) == dynamic_extent || extent(i) == other.extent(i).
> * testsuite/23_containers/mdspan/extents/class_mandates_neg.cc:
> Test the implemented prerequisite.
>
> Signed-off-by: Luc Grosheintz 
> ---
>  libstdc++-v3/include/std/mdspan   | 13 +++
>  .../mdspan/extents/class_mandates_neg.cc  |  2 ++
>  .../mdspan/extents/extents_mismatch_neg.cc| 35 +++
>  3 files changed, 50 insertions(+)
>  create mode 100644 
> libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
>
> diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
> index cf20553aaa5..1d6cdc93d80 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -110,10 +110,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> return __se;
> }
>
> +   template
> + constexpr bool
> + _S_is_compatible_extents(_GetOtherExtent __get_extent) noexcept

Was this intended to be a static member function?

> + {
> +   if constexpr (_OtherRank == _S_rank)
> + for (size_t __i = 0; __i < _S_rank; ++__i)
> +   if (_Extents[__i] != dynamic_extent
> +   && !cmp_equal(_Extents[__i], 
> _S_int_cast(__get_extent(__i
> + return false;
> +   return true;
> + }
> +
> template
>   constexpr void
>   _M_init_dynamic_extents(_GetOtherExtent __get_extent) noexcept
>   {
> +   
> __glibcxx_assert(_S_is_compatible_extents<_OtherRank>(__get_extent));
> for (size_t __i = 0; __i < _S_rank_dynamic; ++__i)
>   {
> size_t __di = __i;
> diff --git 
> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc 
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
> index f9c1c019666..67d18feda96 100644
> --- 
> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
> +++ 
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
> @@ -7,6 +7,8 @@ std::extents e1; // { dg-error "from 
> here" }
>  std::extents e2; // { dg-error "from here" }
>  std::extents e3; // { dg-error "from here" }
>  std::extents e4;   // { dg-error "from here" }
> +
>  // { dg-prune-output "dynamic or representable as IndexType" }
>  // { dg-prune-output "signed or unsigned integer" }
>  // { dg-prune-output "invalid use of incomplete type" }
> +// { dg-prune-output "non-constant condition for static assertion" }
> diff --git 
> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc 
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
> new file mode 100644
> index 000..b35e5310d41
> --- /dev/null
> +++ 
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
> @@ -0,0 +1,35 @@
> +// { dg-do compile { target c++23 } }
> +#include
> +
> +#include 
> +
> +constexpr size_t dyn = std::dynamic_extent;
> +
> +constexpr bool
> +test_dyn2sta_extents_mismatch_00()
> +{
> +  auto e0 = std::extents{1};
> +  [[maybe_unused]] auto e1 = std::extents{e0};// { dg-error 
> "expansion of" }
> +  return true;
> +}
> +static_assert(test_dyn2sta_extents_mismatch_00());// { dg-error 
> "expansion of" }
> +
> +constexpr bool
> +test_dyn2sta_extents_mismatch_01()
> +{
> +  [[maybe_unused]] auto e = std::extents{2, 2}; // { dg-error 
> "expansion of" }
> +  return true;
> +}
> +static_assert(test_dyn2sta_extents_mismatch_01());   // { dg-error 
> "expansion of" }
> +
> +constexpr bool
> +test_dyn2sta_extents_mismatch_02()
> +{
> +  std::array exts{2, 2};
> +  [[maybe_unused]] auto e = std::extents{exts}; // { dg-error 
> "expansion of" }
> +  return true;
> +}
> +static_assert(test_dyn2sta_extents_mismatch_02());   // { dg-error 
> "expansion of" }
> +
> +// { dg-prune-output "non-constant condition for static assertion" }
> +// { dg-prune-output "__glibcxx_assert" }
> --
> 2.49.0
>

Re: [PATCH v3 3/5] libstdc++: Restructure mdspan tests to reuse IntLike.

2025-07-07 Thread Tomasz Kaminski

On Fri, Jul 4, 2025 at 10:37 AM Luc Grosheintz 
wrote:

> The class IntLike is used for testing extents with user-defined classes
> that convert to int. This commit places the class into a separate header
> file. This allows it to be reused across different parts of the mdspan
> related testsuite.
>
> libstdc++-v3/ChangeLog:
>
> * testsuite/23_containers/mdspan/extents/custom_integer.cc:
> Delete IntLike and include "int_like.h".
> * testsuite/23_containers/mdspan/extents/int_like.h: Add
> IntLike.
>
> Signed-off-by: Luc Grosheintz 
> ---
>
 LGTM, outside of using header guards. I can change that locally.

>  .../mdspan/extents/custom_integer.cc  | 27 +-
>  .../23_containers/mdspan/extents/int_like.h   | 28 +++
>  2 files changed, 29 insertions(+), 26 deletions(-)
>  create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h
>
> diff --git
> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc
> index 2907ad12ae7..404755bd5ac 100644
> --- a/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc
> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc
> @@ -2,38 +2,13 @@
>  #include 
>
>  #include 
> +#include "int_like.h"
>
>  // Test construction from a custom integer-like object, that has
>  // no copy/move ctor or copy/move assignment operator.
>
>  constexpr size_t dyn = std::dynamic_extent;
>
> -class IntLike
> -{
> -public:
> -  explicit
> -  IntLike(int i)
> -  : _M_i(i)
> -  { }
> -
> -  IntLike() = delete;
> -  IntLike(const IntLike&) = delete;
> -  IntLike(IntLike&&) = delete;
> -
> -  const IntLike&
> -  operator=(const IntLike&) = delete;
> -
> -  const IntLike&
> -  operator=(IntLike&&) = delete;
> -
> -  constexpr
> -  operator int() const noexcept
> -  { return _M_i; }
> -
> -private:
> -  int _M_i;
> -};
> -
>  static_assert(std::is_convertible_v);
>  static_assert(std::is_nothrow_constructible_v);
>
> diff --git
> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h
> new file mode 100644
> index 000..7785d2ffe95
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h
> @@ -0,0 +1,28 @@
> +#pragma once
>
Please use header guards instead of pragma once.

> +
> +class IntLike
> +{
> +public:
> +  explicit
> +  IntLike(int i)
> +  : _M_i(i)
> +  { }
> +
> +  IntLike() = delete;
> +  IntLike(const IntLike&) = delete;
> +  IntLike(IntLike&&) = delete;
> +
> +  const IntLike&
> +  operator=(const IntLike&) = delete;
> +
> +  const IntLike&
> +  operator=(IntLike&&) = delete;
> +
> +  constexpr
> +  operator int() const noexcept
> +  { return _M_i; }
> +
> +private:
> +  int _M_i;
> +};
> +
> --
> 2.49.0
>
>

Re: [PATCH] libstdc++: Format __float128 as _Float128 only when long double is not 128 IEE [PR119246]

2025-07-07 Thread Jonathan Wakely

On Fri, 16 May 2025 at 12:04, Tomasz Kamiński wrote:
>
> For powerpc64 and sparc architectures that both have __float128 and 128bit 
> long double,
> the __float128 is same type as long double/__iee128 and already formattable.

__ieee128

>
> Remaining specializaiton make __float128 formattable on x86_64 via _Float128,

"The remaining specialization makes ..."

> however __float128 is now not formattable on x86_32 (-m32) with 
> -mlong-double-128,

I don't think we need to care about that.

> where __float128 is distinct type from long double that is 128bit IEEE.
>
> PR libstdc++/119246
>
> libstdc++-v3/ChangeLog:
>
> * include/std/format (formatter<__float128, _Char_T): Define if
> _GLIBCXX_FORMAT_F128 == 2.
> ---
> This patch avoids dealing with cases when long double is 128bit and thus
> __float128 may be same as long double (it is same for powerpc and will be for 
> sparc),
> but distinct for x86_32/-mlong-double-128.
>
> This preserve support formatting __float128 on x84_64, where it is formatted
> using _Float128.
>
> Tested on x86_64, powerpc64. For format test checked both 
> -mabi=ibmlongdouble,-mabi=ieeelongdouble.
> Rainer Orth confirmed that this also work with his patch adding __float128 
> for sparc.
>
> OK for trunk?

OK with the typos above fixed, and "IEE" in the first line of the
commit fixed to "IEEE".

>
>  libstdc++-v3/include/std/format | 11 +++
>  1 file changed, 3 insertions(+), 8 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
> index b1823db83bc..d1ca05105f9 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -2973,11 +2973,9 @@ namespace __format
>  };
>  #endif
>
> -#if defined(__SIZEOF_FLOAT128__) && _GLIBCXX_FORMAT_F128 > 1
> -  // Reuse __formatter_fp::format<__format::__flt128_t, Out> for 
> __float128.
> -  // This formatter is not declared if _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT is 
> true,
> -  // as __float128 when present is same type as __ieee128, which may be same 
> as
> -  // long double.
> +#if defined(__SIZEOF_FLOAT128__) && _GLIBCXX_FORMAT_F128 == 2
> +  // Use __formatter_fp::format<__format::__flt128_t, Out> for __float128,
> +  // when long double is not 128bit IEEE type.
>template<__format::__char _CharT>
>  struct formatter<__float128, _CharT>
>  {
> @@ -2995,9 +2993,6 @@ namespace __format
>
>  private:
>__format::__formatter_fp<_CharT> _M_f;
> -
> -  static_assert( !is_same_v<__float128, long double>,
> -"This specialization should not be used for long double" 
> );
>  };
>  #endif
>
> --
> 2.49.0
>

Re: [PATCH v3 4/5] libstdc++: Implement mdspan::size.

2025-07-07 Thread Tomasz Kaminski

On Fri, Jul 4, 2025 at 10:34 AM Luc Grosheintz 
wrote:

> The current code uses __mdspan::__fwd_prod(__exts, __rank) to express
> computing the size of an extent. This commit adds an function __mdspan::
> __size(__exts) to express the idea more directly.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan (__mdspan::__size): New function.
>
> Signed-off-by: Luc Grosheintz 
> ---
>
LGTM

>  libstdc++-v3/include/std/mdspan | 12 
>  1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> index 1d6cdc93d80..7e970c2b905 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -398,6 +398,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>__rev_prod(const _Extents& __exts, size_t __r) noexcept
>{ return __exts_prod(__exts, __r + 1, __exts.rank()); }
>
> +template
> +  constexpr typename _Extents::index_type
> +  __size(const _Extents& __exts) noexcept
> +  { return __fwd_prod(__exts, __exts.rank()); }
> +
>  template
>auto __build_dextents_type(integer_sequence)
> -> extents<_IndexType, ((void) _Counts, dynamic_extent)...>;
> @@ -591,7 +596,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>constexpr index_type
>required_span_size() const noexcept
> -  { return __mdspan::__fwd_prod(_M_extents, extents_type::rank()); }
> +  { return __mdspan::__size(_M_extents); }
>
>template<__mdspan::__valid_index_type... _Indices>
> requires (sizeof...(_Indices) == extents_type::rank())
> @@ -730,7 +735,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>constexpr index_type
>required_span_size() const noexcept
> -  { return __mdspan::__fwd_prod(_M_extents, extents_type::rank()); }
> +  { return __mdspan::__size(_M_extents); }
>
>template<__mdspan::__valid_index_type... _Indices>
> requires (sizeof...(_Indices) == extents_type::rank())
> @@ -986,8 +991,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>{
> if constexpr (!is_always_exhaustive())
>   {
> -   constexpr auto __rank = extents_type::rank();
> -   auto __size = __mdspan::__fwd_prod(_M_extents, __rank);
> +   auto __size = __mdspan::__size(_M_extents);
> if(__size > 0)
>   return __size == required_span_size();
>   }
> --
> 2.49.0
>
>

[PATCH] aarch64: Implement sme2+faminmax extension.

2025-07-07 Thread Alfie Richards

Hello all,

This patch implements the couple of amin/amax instructions that are part of
SME2 + faminmax.

Regression testsed and bootstrapped for Aarch64.

Thanks,
Alfie

-- >8 --

Implements the sme2+faminmax svamin and svamax intrinsics.

gcc/ChangeLog:

* config/aarch64/aarch64-sme.md (@aarch64_sme_):
New patterns.
* config/aarch64/aarch64-sve-builtins-sme.def (svamin): New intrinsics.
(svamax): New intrinsics.
* config/aarch64/aarch64-sve-builtins-sve2.cc (class faminmaximpl): New
class.
(svamin): New function.
(svamax): New function.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sme2/acle-asm/amax_f16_x2.c: New test.
* gcc.target/aarch64/sme2/acle-asm/amax_f16_x4.c: New test.
* gcc.target/aarch64/sme2/acle-asm/amax_f32_x2.c: New test.
* gcc.target/aarch64/sme2/acle-asm/amax_f32_x4.c: New test.
* gcc.target/aarch64/sme2/acle-asm/amax_f64_x2.c: New test.
* gcc.target/aarch64/sme2/acle-asm/amax_f64_x4.c: New test.
* gcc.target/aarch64/sme2/acle-asm/amin_f16_x2.c: New test.
* gcc.target/aarch64/sme2/acle-asm/amin_f16_x4.c: New test.
* gcc.target/aarch64/sme2/acle-asm/amin_f32_x2.c: New test.
* gcc.target/aarch64/sme2/acle-asm/amin_f32_x4.c: New test.
* gcc.target/aarch64/sme2/acle-asm/amin_f64_x2.c: New test.
* gcc.target/aarch64/sme2/acle-asm/amin_f64_x4.c: New test.
---
 gcc/config/aarch64/aarch64-sme.md |  18 +++
 .../aarch64/aarch64-sve-builtins-sme.def  |   5 +
 .../aarch64/aarch64-sve-builtins-sve2.cc  |  44 +-
 .../aarch64/sme2/acle-asm/amax_f16_x2.c   |  97 +
 .../aarch64/sme2/acle-asm/amax_f16_x4.c   | 128 +
 .../aarch64/sme2/acle-asm/amax_f32_x2.c   |  96 +
 .../aarch64/sme2/acle-asm/amax_f32_x4.c   | 129 ++
 .../aarch64/sme2/acle-asm/amax_f64_x2.c   |  96 +
 .../aarch64/sme2/acle-asm/amax_f64_x4.c   | 128 +
 .../aarch64/sme2/acle-asm/amin_f16_x2.c   |  96 +
 .../aarch64/sme2/acle-asm/amin_f16_x4.c   | 128 +
 .../aarch64/sme2/acle-asm/amin_f32_x2.c   |  96 +
 .../aarch64/sme2/acle-asm/amin_f32_x4.c   | 128 +
 .../aarch64/sme2/acle-asm/amin_f64_x2.c   |  96 +
 .../aarch64/sme2/acle-asm/amin_f64_x4.c   | 128 +
 15 files changed, 1409 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f64_x4.c

diff --git a/gcc/config/aarch64/aarch64-sme.md 
b/gcc/config/aarch64/aarch64-sme.md
index b8bb4cc14b6..bfe368e80b5 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -38,6 +38,7 @@
 ;;  Binary arithmetic on ZA tile
 ;;  Binary arithmetic on ZA slice
 ;;  Binary arithmetic, writing to ZA slice
+;;  Absolute minimum/maximum
 ;;
 ;; == Ternary arithmetic
 ;;  [INT] Dot product
@@ -1264,6 +1265,23 @@ (define_insn "*aarch64_sme_single__plus"
   "\tza.[%w0, %1, vgx], %2, %3."
 )
 
+;; -
+;;  Absolute minimum/maximum
+;; -
+;; Includes:
+;; - svamin (SME2+faminmax)
+;; - svamin (SME2+faminmax)
+;; -
+
+(define_insn "@aarch64_sme_"
+  [(set (match_operand:SVE_Fx24 0 "register_operand" "=Uw")
+   (unspec:SVE_Fx24 [(match_operand:SVE_Fx24 1 "register_operand" "%0")
+ (match_operand:SVE_Fx24 2 "register_operand" 
"Uw")]
+FAMINMAX_UNS))]
+  "TARGET_SME2 && TARGET_FAMINMAX"
+  "\t%0, %1, %2"
+)
+
 ;; =
 ;; == Ternary arithmetic
 ;; =
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def 
b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
index f75

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-07 Thread Richard Biener

On Sat, Jul 5, 2025 at 2:10 PM Siddhesh Poyarekar  wrote:
>
> On 2025-07-05 07:23, Richard Biener wrote:
> >> OK, should I revert right away or can we wait till Qing returns on Monday?
> >
> > Monday is OK with me.
> >
>
> Thanks, so I thought about this some more and I think when I said in
> bugzilla:
>
> "In fact, maybe the .ACCESS_WITH_SIZE handling in objsz probably needs
> improvement to express it better, but that's an orthogonal matter."
>
> I had the right intuition but I was completely wrong about it being an
> orthogonal matter.  That *is* the issue and it only becomes relevant
> when the member being described is a pointer and not a FAM.  e.g. for
> the following:
>
> ```
> struct A
> {
>int count;
> #ifndef PTR
>char c[] __attribute ((__counted_by__ (count)));
> #else
>char *c __attribute ((__counted_by__ (count)));
> #endif
> } a;
>
> unsigned long
> foo (struct A *a)
> {
>return __builtin_dynamic_object_size (a->c, 1);
> }
> ```
>
> the .ACCESS_WITH_SIZE abstraction records the size using &a->c:
>
>_2 = &a->c;
>_3 = &a->count;
>_1 = .ACCESS_WITH_SIZE (_2, _3, 1, 0, -1, 0B);
>D.2964 = __builtin_dynamic_object_size (_1, 1);
>
> this doesn't make a difference when c is an array since the & operator
> is a nop.  However when the same is applied to the pointer a->c, it
> becomes an additional dereference, which changes the semantic meaning:
>
>_2 = &a->c;
>_3 = &a->count;
>_1 = .ACCESS_WITH_SIZE (_2, _3, 1, 0, -1, 0B);
>_4 = *_1;
>D.2964 = __builtin_dynamic_object_size (_4, 1);
>
> Since the intent of the .ACCESS_WITH_SIZE was to associate the storage
> of count with c to prevent reordering, maybe the semantically correct
> solution here is that when c is a pointer, the frontend emits:
>
>_2 = a->c;
>_3 = &a->count;
>_1 = .ACCESS_WITH_SIZE (_2, _3, 1, 0, -1, 0B);
>D.2964 = __builtin_dynamic_object_size (_, 1);
>
> so a->c instead of &a->c.

Yes.  That's what I'd have expected happens?  I thought .ACCESS_WITH_SIZE
annotates the pointer, it doesn't perform an access itself - correct?  Where
is .ACCESS_WITH_SIZE documented?  I can't find it documented in the
internals manual, internal-fn.def has

/* A function to associate the access size and access mode information
   with the corresponding reference to an object.  It only reads from the
   2nd argument.  */
DEF_INTERNAL_FN (ACCESS_WITH_SIZE, ECF_PURE | ECF_LEAF | ECF_NOTHROW, NULL)

that suggests .ACCESS_WITH_SIZE performs a read on the size.  It doesn't
say what the function returns at all.

Is the above only happening
when using __builtin_dynamic_object_size (_1, 1) or also when performing
an actual access like

 return a->c[i];

?

>  In fact, maybe taking the address of a->c
> doesn't make sense in general and .ACCESS_WITH_SIZE should always be the
> above even for FAM?  Does that sound correct?
>
> Sid

Re: [PATCH, v2] Fortran: fix minor issues with coarrays (extended)

2025-07-07 Thread Andre Vehreschild

Hi Harald,

I totally understand your confusion. I also had a hard time figuring what is
needed there. I got to restructure the code fragment and now only allow pure
*and* elemental intrinsic function and pure *and* elemental user-defined
functions (hoping that's the opposite of intrinsics) in a caf accessor. For all
others a temporary is to be created in the helper structure. I also added a
comment to clarify the intention. I think this is better now. Opinions? 

Regtests ok on x86_64-pc-linux-gnu / F41. Ok for mainline?

Regards,
Andre

On Fri, 4 Jul 2025 19:29:08 +0200
Harald Anlauf  wrote:

> Andre,
> 
> either your patch to coarray.cc is wrong, or the comment in the code
> is not concise, or I am too dense to understand the intent of the
> change:
> 
> diff --git a/gcc/fortran/coarray.cc b/gcc/fortran/coarray.cc
> index ef8fd4e42d0..01aac581a74 100644
> --- a/gcc/fortran/coarray.cc
> +++ b/gcc/fortran/coarray.cc
> @@ -700,7 +700,7 @@ check_add_new_component (gfc_symbol *type, gfc_expr 
> *e, gfc_symbol *add_data)
> && !e->symtree->n.sym->attr.elemental
> && !(e->value.function.isym
>  && (e->value.function.isym->pure
> -|| e->value.function.isym->elemental)))
> +&& e->value.function.isym->elemental)))
>   /* Treat non-pure/non-elemental functions.  */
>   check_add_new_comp_handle_array (e, type, add_data);
> else
> 
> Can you please elaborate?
> 
> I understood the code comment in the way that any pure or elemental
> intrinsic should be handled in the else branch.  Or do you have
> an example which is different?
> 
> The change to trans-decl.cc (fix of decl) looks fine for me.
> 
> Harald
> 
> 
> m 04.07.25 um 13:43 schrieb Andre Vehreschild:
> > Hi all,
> > 
> > attached patch narrows the use of intrinsic functions in the caf accessor
> > down to pure elemental functions. This is needed because functions that get
> > extracted into the caf accessor routine, have no access to the source
> > image's memory. E.g. team_number() is marked as pure, but takes a pointer
> > argument to an object in the source image's memory, which is not available
> > on the remote image where the accessor is executed. This patch fixes that
> > and also corrects the type in the decl of the ABI of team_number. It is of
> > the opaque type team_type aka void* now and not integer as formerly
> > declared.
> > 
> > Regtest ok on x86_64-pc-linux-gnu / F41. Ok for mainline?
> > 
> > Regards,
> > Andre
> > 
> > On Tue, 1 Jul 2025 12:54:49 -0700
> > Jerry D  wrote:
> >   
> >> On 7/1/25 12:51 PM, Harald Anlauf wrote:  
> >>> Dear all,
> >>>
> >>> the attached patch fixes the following minor issues found by running
> >>> f951 under valgrind for the just added new testcases coindexed_6.f90
> >>> and coindexed_7.f90:
> >>>
> >>> - minor front-end memleaks with non-freed strings and lost GMP variables
> >>>    (these are simple and obvious fixes)
> >>>
> >>> - an inconsistency between pure/elemental functions being either
> >>>    non-intrinsic or intrinsic.  Checking for the latter was likely missed
> >>>    from the beginning.
> >>>
> >>> No new testcase.
> >>>
> >>> Regtested on x86_64-pc-linux-gnu.  OK for mainline?
> >>>
> >>> Thanks,
> >>> Harald
> >>>  
> >>
> >> Yes, OK, straight-forward.
> >>
> >> Thanks,
> >>
> >> Jerry  
> > 
> >   
> 


-- 
Andre Vehreschild * Email: vehre ad gmx dot de 
From 2ad3600f5756b4c50fd70efde6d965a0037eb833 Mon Sep 17 00:00:00 2001
From: Andre Vehreschild 
Date: Fri, 4 Jul 2025 11:26:46 +0200
Subject: [PATCH] Fortran: Fix pure/elemental function treatment and
 team_number parameter attribution.

gcc/fortran/ChangeLog:

	* coarray.cc (check_add_new_component): Only allow pure
	elemental (intrinsic) functions in a coarray accessor.
	* trans-decl.cc (gfc_build_builtin_function_decls): The only
	argument of team_number() is of type void* in the library ABI.
---
 gcc/fortran/coarray.cc| 26 --
 gcc/fortran/trans-decl.cc |  7 +++
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/gcc/fortran/coarray.cc b/gcc/fortran/coarray.cc
index ef8fd4e42d0..c611b539968 100644
--- a/gcc/fortran/coarray.cc
+++ b/gcc/fortran/coarray.cc
@@ -696,17 +696,23 @@ check_add_new_component (gfc_symbol *type, gfc_expr *e, gfc_symbol *add_data)
 	check_add_new_component (type, actual->expr, add_data);
 	  break;
 	case EXPR_FUNCTION:
-	  if (!e->symtree->n.sym->attr.pure
-	  && !e->symtree->n.sym->attr.elemental
-	  && !(e->value.function.isym
-		   && (e->value.function.isym->pure
-		   || e->value.function.isym->elemental)))
-	/* Treat non-pure/non-elemental functions.  */
-	check_add_new_comp_handle_array (e, type, add_data);
+	  if ((e->symtree->n.sym->attr.pure
+	   && e->symtree->n.sym->attr.elemental)
+	  || (e->value.function.isym && e->value.function.isym->pure
+		  && e->value.function.isym->elemental))
+	{
+	  /* Onl

Re: Add template keyword to for Clang

2025-07-07 Thread Jonathan Wakely

On Mon, 7 Jul 2025 at 12:25, Luc Grosheintz  wrote:
>
>
>
> On 7/5/25 01:13, Jonathan Wakely wrote:
> > Clang wants this change:
> >
> > --- a/libstdc++-v3/include/std/mdspan
> > +++ b/libstdc++-v3/include/std/mdspan
> > @@ -509,7 +509,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >
> >  template
> >concept __mapping_of =
> > -   is_same_v > _Mapping::extents_type>,
> > +   is_same_v > _Mapping::extents_type>,
> >   _Mapping>;
> >
> >  template
> >
> > to fix:
> >
> > /home/jwakely/gcc/latest/lib/gcc/x86_64-pc-linux-gnu/16.0.0/../../../../include/c++/16.0.0/mdspan:512:30:
> > error: use 'template' keyword to treat 'mapping' a
> > s a dependent template name
> >   512 | is_same_v > _Mapping::extents_type>,
> >   | ^
> >
> >
> > I'll push that on Monday.
> >
>
> I've been dreading this. Unless, you're confident it's
> the only issue (my gut says it's not), is now a good time
> to ensure our implementation of mdspan is compatible with
> other compilers?

Don't worry about it. If there are problems, people will let us know.
We have until next April before GCC 16 is released anyway.

> Is there a good trick for doing this? (Better than
> --gcc-toolkit and try to compile each non-neg test file.)


> Which other compilers should I check?

We only really care about Clang and the Intel compiler, which is
Clang-based now. The EDG front-end aims for GCC compatibility and is
hard to test with libstdc++ headers anyway.

Re: [PATCH v2] s390: Optimize fmin/fmax.

2025-07-07 Thread Stefan Schulze Frielinghaus

On Wed, Jun 25, 2025 at 10:04:41AM +0200, Juergen Christ wrote:
> On VXE targets, we can directly use the fp min/max instruction instead of
> calling into libm for fmin/fmax etc.
> 
> Provide fmin/fmax versions also for vectors even though it cannot be
> called directly.  This will be exploited with a follow-up patch when
> reductions are introduced.
> 
> Bootstrapped and regtested on s390.  Ok for trunk?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.md: Update UNSPECs
>   * config/s390/vector.md (fmax3): New expander.
>   (fmin3): New expander.
>   * config/s390/vx-builtins.md (*fmin): New insn.
>   (vfmin): Redefined to use new insn.
>   (*fmax): New insn.
>   (vfmax): Redefined to use new insn.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/fminmax-1.c: New test.
>   * gcc.target/s390/fminmax-2.c: New test.
> 
> Signed-off-by: Juergen Christ 
> ---
>  gcc/config/s390/s390.md   |  6 +-
>  gcc/config/s390/vector.md | 25 
>  gcc/config/s390/vx-builtins.md| 29 ++---
>  gcc/testsuite/gcc.target/s390/fminmax-1.c | 77 +++
>  gcc/testsuite/gcc.target/s390/fminmax-2.c | 29 +
>  5 files changed, 156 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/fminmax-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/fminmax-2.c
> 
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 97a4bdf96b2d..1c88c9624b60 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -241,9 +241,6 @@
>  
> UNSPEC_VEC_MSUM
>  
> -   UNSPEC_VEC_VFMIN
> -   UNSPEC_VEC_VFMAX
> -
> UNSPEC_VEC_VBLEND
> UNSPEC_VEC_VEVAL
> UNSPEC_VEC_VGEM
> @@ -256,6 +253,9 @@
>  
> UNSPEC_NNPA_VCFN_V8HI
> UNSPEC_NNPA_VCNF_V8HI
> +
> +   UNSPEC_FMAX
> +   UNSPEC_FMIN
>  ])
>  
>  ;;
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 6f4e1929eb80..8bda30624c22 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -89,6 +89,13 @@
>  (define_mode_iterator VF_HW [(V4SF "TARGET_VXE") V2DF (V1TF "TARGET_VXE")
>(TF "TARGET_VXE")])
>  
> +; FP scalar and vector modes
> +(define_mode_iterator VFT_BFP [SF DF
> +   (V1SF "TARGET_VXE") (V2SF "TARGET_VXE") (V4SF 
> "TARGET_VXE")
> +   V1DF V2DF
> +   (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
> +
> +
>  (define_mode_iterator V_8   [V1QI])
>  (define_mode_iterator V_16  [V2QI  V1HI])
>  (define_mode_iterator V_32  [V4QI  V2HI V1SI V1SF])
> @@ -3576,3 +3583,21 @@
>  ; vec_unpacks_float_lo
>  ; vec_unpacku_float_hi
>  ; vec_unpacku_float_lo
> +
> +; fmax
> +(define_expand "fmax3"
> +  [(set (match_operand:VFT_BFP 0 "register_operand" "=v")
> + (unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand" "v")
> +(match_operand:VFT_BFP 2 "register_operand" "v")
> +(const_int 4)]
> +   UNSPEC_FMAX))]
> +  "TARGET_VXE")

Expanders don't have constraints and should be removed.
Wrong indentation.

> +
> +; fmin
> +(define_expand "fmin3"
> +  [(set (match_operand:VFT_BFP 0 "register_operand" "=v")
> + (unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand" "v")
> +(match_operand:VFT_BFP 2 "register_operand" "v")
> +(const_int 4)]
> +   UNSPEC_FMIN))]
> +  "TARGET_VXE")

Ditto.

> diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
> index a7bb7ff92f5e..0508df43b866 100644
> --- a/gcc/config/s390/vx-builtins.md
> +++ b/gcc/config/s390/vx-builtins.md
> @@ -2136,15 +2136,32 @@
>"fchebs\t%v2,%v0,%v1"
>[(set_attr "op_type" "VRR")])
>  
> +(define_insn "*fmin"
> +  [(set (match_operand:VFT_BFP0 "register_operand"  "=v")
 ^
 ~~
Wrong indentation.

> + (unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand"   "v")
> +  (match_operand:VFT_BFP 2 "register_operand"   "v")
> +  (match_operand:QI  3 "const_mask_operand" "C")]
> + UNSPEC_FMIN))]
> +  "TARGET_VXE"
> +  "fminb\t%v0,%v1,%v2,%b3"
> +  [(set_attr "op_type" "VRR")])
>  
> -(define_insn "vfmin"
> +(define_expand "vfmin"
>[(set (match_operand:VF_HW0 "register_operand"  "=v")
>   (unspec:VF_HW [(match_operand:VF_HW 1 "register_operand"   "v")
>  (match_operand:VF_HW 2 "register_operand"   "v")
>  (match_operand:QI3 "const_mask_operand" "C")]
> -   UNSPEC_VEC_VFMIN))]
> +   UNSPEC_FMIN))]
> +  "TARGET_VXE")

Expanders don't have constraints.

Anyhow, you could also merge

define_insn "*fmin"

and define_expand "vfmin"

into

define_insn "vfmin"

using iterator VFT_BFP since VFT_BFP subsumes VF_HW.

> +
> +(define_insn "*fmax"
> +  [(

Re: [PATCH 0/7] Improve bit-manipulation SIMD codegen for 64-bit types

2025-07-07 Thread Kyrylo Tkachov

Resending due to difficulties with my email

> On 7 Jul 2025, at 11:56, Kyrylo Tkachov  wrote:
> 
> Hi all,
> 
> This series improves code generation for 64-bit vector types as well as the 
> scalar DImode types.
> It makes use of SHA3 and SVE2 instructions like BCAX, EOR3, BSL*.
> The first 2 patches just extend the mode iterators used in a straightforward 
> way.
> Patches 3-7 handle the DImode cases to make sure we don’t force the operands 
> into
> SIMD registers unnecessarily by introducing the appropriate splitters.
> The final patch 7/7 is a bit more involved for reasons explained in the 
> description.
> I don’t feel strongly about applying that patch, but if it’s acceptable or 
> can be adjusted
> in a different way I’m happy to rework it.
> 
> Bootstrapped and tested on aarch64-none-linux-gnu, with BOOT_CFLAGS having 
> -mcpu=grace
> To exercise the SHA3 and SVE2 paths a bit more.
> 
> Thanks,
> Kyrill

[PATCH 1/7] aarch64: Allow 64-bit vector modes in pattern for BCAX instruction

2025-07-07 Thread Kyrylo Tkachov

Hi all,

The BCAX instruction from TARGET_SHA3 only operates on the full .16b form
of the inputs but as it's a pure bitwise operation we can use it for the 64-bit
modes as well as there we don't care about the upper 64 bits. This patch extends
the relevant pattern in aarch64-simd.md to accept the 64-bit vector modes.

Thus, for the input:
uint32x2_t
bcax_s (uint32x2_t a, uint32x2_t b, uint32x2_t c)
{
 return BCAX (a, b, c);
}

we can now generate:
bcax_s:
bcax v0.16b, v0.16b, v1.16b, v2.16b
ret

instead of the current:
bcax_s:
bic v1.8b, v1.8b, v2.8b
eor v0.8b, v1.8b, v0.8b
ret

This patch doesn't cover the DI/V1DI modes as that would require extending
the bcaxqdi4 pattern with =r,r alternatives and adding splitting logic to
handle the cases where the operands arrive in GP regs. It is doable, but can
be a separate patch. This patch as is should be a straightforward improvement
always.

Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk?
Thanks,
Kyrill

Signed-off-by: Kyrylo Tkachov 

gcc/

* config/aarch64/aarch64-simd.md (bcaxq4): Use VDQ_I mode
iterator.

gcc/testsuite/

* gcc.target/aarch64/simd/bcax_d.c: New test.


0001-aarch64-Allow-64-bit-vector-modes-in-pattern-for-BCA.patch
Description: 0001-aarch64-Allow-64-bit-vector-modes-in-pattern-for-BCA.patch

Re: [PATCH v3 5/5] libstdc++: Implement mdspan and tests [PR107761].

2025-07-07 Thread Luc Grosheintz





On 7/7/25 11:52, Jonathan Wakely wrote:

On Fri, 4 Jul 2025 at 09:37, Luc Grosheintz  wrote:


Implements the class mdspan as described in N4950, i.e. without P3029.
It also adds tests for mdspan. This commit completes the implementation
of P0009, i.e. the C++23 part .

 PR libstdc++/107761

libstdc++-v3/ChangeLog:

 * include/std/mdspan (mdspan): New class.
 * src/c++23/std.cc.in (mdspan): Add.
 * testsuite/23_containers/mdspan/class_mandate_neg.cc: New test.
 * testsuite/23_containers/mdspan/mdspan.cc: New test.
 * testsuite/23_containers/mdspan/layout_like.h: Add class
 LayoutLike which models a user-defined layout.

Signed-off-by: Luc Grosheintz 
---
  libstdc++-v3/include/std/mdspan   | 284 +
  libstdc++-v3/src/c++23/std.cc.in  |   3 +-
  .../23_containers/mdspan/class_mandate_neg.cc |  41 ++
  .../23_containers/mdspan/layout_like.h|  80 +++
  .../testsuite/23_containers/mdspan/mdspan.cc  | 603 ++
  .../23_containers/mdspan/out_of_bounds_neg.cc |  24 +
  6 files changed, 1034 insertions(+), 1 deletion(-)
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/class_mandate_neg.cc
  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/layout_like.h
  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/out_of_bounds_neg.cc

diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
index 7e970c2b905..f64804e2a42 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -1057,6 +1057,290 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ return __p + __i; }
  };

+  namespace __mdspan
+  {
+template
+  constexpr bool
+  __is_multi_index(const _Extents& __exts, span<_IndexType, _Nm> __indices)
+  {
+   static_assert(__exts.rank() == _Nm);
+   for (size_t __i = 0; __i < __exts.rank(); ++__i)
+ if (__indices[__i] >= __exts.extent(__i))
+   return false;
+   return true;
+  }
+  }
+
+  template>
+class mdspan
+{
+  static_assert(!is_array_v<_ElementType>,
+   "ElementType must not be an array type");
+  static_assert(!is_abstract_v<_ElementType>,
+   "ElementType must not be an abstract class type");
+  static_assert(__mdspan::__is_extents<_Extents>,
+   "Extents must be a specialization of std::extents");
+  static_assert(is_same_v<_ElementType,
+ typename _AccessorPolicy::element_type>);
+
+public:
+  using extents_type = _Extents;
+  using layout_type = _LayoutPolicy;
+  using accessor_type = _AccessorPolicy;
+  using mapping_type = typename layout_type::template 
mapping;
+  using element_type = _ElementType;
+  using value_type = remove_cv_t;
+  using index_type = typename extents_type::index_type;
+  using size_type = typename extents_type::size_type;
+  using rank_type = typename extents_type::rank_type;
+  using data_handle_type = typename accessor_type::data_handle_type;
+  using reference = typename accessor_type::reference;
+
+  static constexpr rank_type
+  rank() noexcept { return extents_type::rank(); }
+
+  static constexpr rank_type
+  rank_dynamic() noexcept { return extents_type::rank_dynamic(); }
+
+  static constexpr size_t
+  static_extent(rank_type __r) noexcept
+  { return extents_type::static_extent(__r); }
+
+  constexpr index_type
+  extent(rank_type __r) const noexcept { return extents().extent(__r); }
+
+  constexpr
+  mdspan()
+  requires (rank_dynamic() > 0)
+  && is_default_constructible_v
+ && is_default_constructible_v
+ && is_default_constructible_v
+  : _M_accessor(), _M_mapping(), _M_handle()
+  { }
+
+  constexpr
+  mdspan(const mdspan& __other) = default;
+
+  constexpr
+  mdspan(mdspan&& __other) = default;
+
+  template<__mdspan::__valid_index_type... _OIndexTypes>
+   requires (sizeof...(_OIndexTypes) == rank()
+  || sizeof...(_OIndexTypes) == rank_dynamic())
+&& is_constructible_v
+&& is_default_constructible_v
+   constexpr explicit
+   mdspan(data_handle_type __handle, _OIndexTypes... __exts)
+   : _M_accessor(),
+ _M_mapping(_Extents(static_cast(std::move(__exts))...)),
+ _M_handle(std::move(__handle))
+   { }
+
+  template<__mdspan::__valid_index_type _OIndexType,
+  size_t _Nm>
+   requires (_Nm == rank() || _Nm == rank_dynamic())
+&& is_constructible_v
+&& is_default_constructible_v
+   constexpr explicit(_Nm != rank_dynamic())
+   mdspan(data_handle_type __handle, span<_OIndexType, _Nm> __exts)
+   : _M_accessor(), _M_mapping(extents_type(__exts)),
+ _M_handle(std::move(__handle))
+

Re: [PATCH] libstdc++: Make debug iterator pointer sequence const [PR116369]

2025-07-07 Thread Jonathan Wakely

On Sat, 5 Jul 2025 at 14:03, François Dumont  wrote:
>
> On 01/07/2025 22:51, Jonathan Wakely wrote:
> > On Mon, 16 Jun 2025 at 18:36, François Dumont  wrote:
> >> I eventually wonder if it is such a big deal to add the new symbols for 
> >> _GLIBCXX_DEBUG mode.
> > I like this version much more than the one trying to duplicate symbols with 
> > asm.
> >
> >
> >> Here is the patch doing this. It avoids to add many const_cast which is 
> >> what we are trying to achieve here.
> > I'm still not really sure if this is worth it though - is it fixing a
> > bug or a correctness problem? (using const_cast is safe if the objects
> > aren't actually const)
> >
> > All the new tests already pass, even without this patch. Are these
> > just tests for const member functions that we aren't currently testing
> > at all?
>
> Those tests are showing the same UB that you fixed as part of your
> PR116369 patch but this time with local_iterator. Even if tests are
> passing without this patch it's still UB before it, do you prefer to
> remove those tests then ?

Ah OK, so they are showing UB ... it's just that the compiler doesn't
actually complain about it.

Please make the const containers in those tests global variables,
instead of local variables inside main(). The compiler won't put local
variables in ROM so the test would never fail. It might put globals in
ROM (although not after your patch, because of the mutable members,
which is why the patch is actually fixing something).


>
> Globally this patch is following your recommendations on PR116369 commit
> where you were saying:
>
>  Ideally we would not need the const_cast at all. Instead, the _M_attach
>  member (and everything it calls) should be const-qualified. That would
>  work fine now, because the members that it ends up modifying are
>  mutable. Making that change would require a number of new exports from
>  the shared library, and would require retaining the old non-const
> member
>  functions (maybe as symbol aliases) for backwards compatibility. That
>  might be worth changing at some point, but isn't done here.
>
> In addition to what is said here I made the sequence pointer const too
> as the added mutable allows that.
>
> It was also the occasion to fix some types used in std::forward_list in
> Debug mode.
>
> Do you think it is useless eventually ?

I think it's worth doing, I was just concerned about the __asm__
solution used in the initial patches.

OK for trunk with the adjusted tests, thanks.

Re: [PATCH v3 5/5] libstdc++: Implement mdspan and tests [PR107761].

2025-07-07 Thread Jonathan Wakely

On Mon, 7 Jul 2025 at 11:11, Luc Grosheintz  wrote:
>
>
>
> On 7/7/25 11:52, Jonathan Wakely wrote:
> > On Fri, 4 Jul 2025 at 09:37, Luc Grosheintz  
> > wrote:
> >>
> >> Implements the class mdspan as described in N4950, i.e. without P3029.
> >> It also adds tests for mdspan. This commit completes the implementation
> >> of P0009, i.e. the C++23 part .
> >>
> >>  PR libstdc++/107761
> >>
> >> libstdc++-v3/ChangeLog:
> >>
> >>  * include/std/mdspan (mdspan): New class.
> >>  * src/c++23/std.cc.in (mdspan): Add.
> >>  * testsuite/23_containers/mdspan/class_mandate_neg.cc: New test.
> >>  * testsuite/23_containers/mdspan/mdspan.cc: New test.
> >>  * testsuite/23_containers/mdspan/layout_like.h: Add class
> >>  LayoutLike which models a user-defined layout.
> >>
> >> Signed-off-by: Luc Grosheintz 
> >> ---
> >>   libstdc++-v3/include/std/mdspan   | 284 +
> >>   libstdc++-v3/src/c++23/std.cc.in  |   3 +-
> >>   .../23_containers/mdspan/class_mandate_neg.cc |  41 ++
> >>   .../23_containers/mdspan/layout_like.h|  80 +++
> >>   .../testsuite/23_containers/mdspan/mdspan.cc  | 603 ++
> >>   .../23_containers/mdspan/out_of_bounds_neg.cc |  24 +
> >>   6 files changed, 1034 insertions(+), 1 deletion(-)
> >>   create mode 100644 
> >> libstdc++-v3/testsuite/23_containers/mdspan/class_mandate_neg.cc
> >>   create mode 100644 
> >> libstdc++-v3/testsuite/23_containers/mdspan/layout_like.h
> >>   create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc
> >>   create mode 100644 
> >> libstdc++-v3/testsuite/23_containers/mdspan/out_of_bounds_neg.cc
> >>
> >> diff --git a/libstdc++-v3/include/std/mdspan 
> >> b/libstdc++-v3/include/std/mdspan
> >> index 7e970c2b905..f64804e2a42 100644
> >> --- a/libstdc++-v3/include/std/mdspan
> >> +++ b/libstdc++-v3/include/std/mdspan
> >> @@ -1057,6 +1057,290 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> { return __p + __i; }
> >>   };
> >>
> >> +  namespace __mdspan
> >> +  {
> >> +template
> >> +  constexpr bool
> >> +  __is_multi_index(const _Extents& __exts, span<_IndexType, _Nm> 
> >> __indices)
> >> +  {
> >> +   static_assert(__exts.rank() == _Nm);
> >> +   for (size_t __i = 0; __i < __exts.rank(); ++__i)
> >> + if (__indices[__i] >= __exts.extent(__i))
> >> +   return false;
> >> +   return true;
> >> +  }
> >> +  }
> >> +
> >> +  template >> +  typename _LayoutPolicy = layout_right,
> >> +  typename _AccessorPolicy = default_accessor<_ElementType>>
> >> +class mdspan
> >> +{
> >> +  static_assert(!is_array_v<_ElementType>,
> >> +   "ElementType must not be an array type");
> >> +  static_assert(!is_abstract_v<_ElementType>,
> >> +   "ElementType must not be an abstract class type");
> >> +  static_assert(__mdspan::__is_extents<_Extents>,
> >> +   "Extents must be a specialization of std::extents");
> >> +  static_assert(is_same_v<_ElementType,
> >> + typename _AccessorPolicy::element_type>);
> >> +
> >> +public:
> >> +  using extents_type = _Extents;
> >> +  using layout_type = _LayoutPolicy;
> >> +  using accessor_type = _AccessorPolicy;
> >> +  using mapping_type = typename layout_type::template 
> >> mapping;
> >> +  using element_type = _ElementType;
> >> +  using value_type = remove_cv_t;
> >> +  using index_type = typename extents_type::index_type;
> >> +  using size_type = typename extents_type::size_type;
> >> +  using rank_type = typename extents_type::rank_type;
> >> +  using data_handle_type = typename accessor_type::data_handle_type;
> >> +  using reference = typename accessor_type::reference;
> >> +
> >> +  static constexpr rank_type
> >> +  rank() noexcept { return extents_type::rank(); }
> >> +
> >> +  static constexpr rank_type
> >> +  rank_dynamic() noexcept { return extents_type::rank_dynamic(); }
> >> +
> >> +  static constexpr size_t
> >> +  static_extent(rank_type __r) noexcept
> >> +  { return extents_type::static_extent(__r); }
> >> +
> >> +  constexpr index_type
> >> +  extent(rank_type __r) const noexcept { return 
> >> extents().extent(__r); }
> >> +
> >> +  constexpr
> >> +  mdspan()
> >> +  requires (rank_dynamic() > 0)
> >> +  && is_default_constructible_v
> >> + && is_default_constructible_v
> >> + && is_default_constructible_v
> >> +  : _M_accessor(), _M_mapping(), _M_handle()
> >> +  { }
> >> +
> >> +  constexpr
> >> +  mdspan(const mdspan& __other) = default;
> >> +
> >> +  constexpr
> >> +  mdspan(mdspan&& __other) = default;
> >> +
> >> +  template<__mdspan::__valid_index_type... _OIndexTypes>
> >> +   requires (sizeof...(_OIndexTypes) == rank()
> >> +  || sizeof...(_OIndexTypes) == rank_dynamic())
> >> +

[PATCH 3/7] aarch64: Handle DImode BCAX operations

2025-07-07 Thread Kyrylo Tkachov

Hi all,

To handle DImode BCAX operations we want to do them on the SIMD side only if
the incoming arguments don't require a cross-bank move.
This means we need to split back the combination to separate GP BIC+EOR
instructions if the operands are expected to be in GP regs through reload.
The split happens pre-reload if we already know that the destination will be
a GP reg. Otherwise if reload descides to use the "=r,r" alternative we ensure
operand 0 is early-clobber.
This scheme is similar to how we handle the BSL operations elsewhere in
aarch64-simd.md.

Thus, for the functions:
uint64_t bcax_d_gp (uint64_t a, uint64_t b, uint64_t c) { return BCAX (a, b, 
c); }
uint64x1_t bcax_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return BCAX (a, 
b, c); }

we now generate the desired:
bcax_d_gp:
bic x1, x1, x2
eor x0, x1, x0
ret

bcax_d:
bcax v0.16b, v0.16b, v1.16b, v2.16b
ret

When the inputs are in SIMD regs we use BCAX and when they are in GP regs we
don't force them to SIMD with extra moves.

Bootstrapped and tested on aarch64-none-linux-gnu.
Ok for trunk?
Thanks,
Kyrill

Signed-off-by: Kyrylo Tkachov 

gcc/

* config/aarch64/aarch64-simd.md (*bcaxqdi4): New
define_insn_and_split.

gcc/testsuite/

* gcc.target/aarch64/simd/bcax_d.c: Add tests for DImode arguments.



0003-aarch64-Handle-DImode-BCAX-operations.patch
Description: 0003-aarch64-Handle-DImode-BCAX-operations.patch

[PATCH 2/7] aarch64: Use EOR3 for 64-bit vector modes

2025-07-07 Thread Kyrylo Tkachov

Hi all,

Similar to the BCAX patch, we can also use EOR3 for 64-bit modes,
just by adjusting the mode iterator used.
Thus for input:

uint32x2_t
bcax_s (uint32x2_t a, uint32x2_t b, uint32x2_t c)
{
 return EOR3 (a, b, c);
}

we now generate:
bcax_s:
eor3 v0.16b, v0.16b, v1.16b, v2.16b
ret

instead of:
bcax_s:
eor v1.8b, v1.8b, v2.8b
eor v0.8b, v1.8b, v0.8b
ret

Bootstrapped and tested on aarch64-none-linux-gnu.
Ok for trunk?
Thanks,
Kyrill

Signed-off-by: Kyrylo Tkachov 

gcc/

* config/aarch64/aarch64-simd.md (eor3q4): Use VDQ_I mode
iterator.

gcc/testsuite/

* gcc.target/aarch64/simd/eor3_d.c: New test.



0002-aarch64-Use-EOR3-for-64-bit-vector-modes.patch
Description: 0002-aarch64-Use-EOR3-for-64-bit-vector-modes.patch

[PATCH 5/7] aarch64: Use SVE2 NBSL for DImode arguments

2025-07-07 Thread Kyrylo Tkachov

Hi all,

Similar to the BCAX and EOR3 patterns from TARGET_SHA3 we can use the
SVE2 NBSL instruction for DImode arugments when they come in SIMD registers.
Again, this is accomplished with a new splitter for the GP case. I noticed
that the split has a side-effect of producing a GP EON instruction where it
wasn't getting generated before because the BSL insn-and-split got in the way.
So for the inputs:

uint64_t nbsl_gp(uint64_t a, uint64_t b, uint64_t c) { return NBSL (a, b, c); }
uint64x1_t nbsl_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return NBSL (a, 
b, c); }

We now generate:
nbsl_gp:
eor x0, x0, x1
and x0, x0, x2
eon x0, x0, x1
ret

nbsl_d:
nbsl z0.d, z0.d, z1.d, z2.d
ret

instead of:
nbsl_gp:
eor x0, x1, x0
and x0, x0, x2
eor x0, x0, x1
mvn x0, x0
ret

nbsl_d:
bif v0.8b, v1.8b, v2.8b
mvn v0.8b, v0.8b
ret

Bootstrapped and tested on aarch64-none-linux-gnu.
Ok for trunk?
Thanks,
Kyrill

Signed-off-by: Kyrylo Tkachov 

gcc/

* config/aarch64/aarch64-sve.md (*aarch64_sve2_nbsl_unpreddi): New
define_insn_and_split.

gcc/testsuite/

* gcc.target/aarch64/sve2/nbsl_d.c: New test.


0005-aarch64-Use-SVE2-NBSL-for-DImode-arguments.patch
Description: 0005-aarch64-Use-SVE2-NBSL-for-DImode-arguments.patch

Re: [PATCH v3 1/5] libstdc++: Check prerequisites of layout_*::operator().

2025-07-07 Thread Tomasz Kaminski

On Fri, Jul 4, 2025 at 10:32 AM Luc Grosheintz 
wrote:

> Previously, the prerequisite that the arguments passed to operator() are
> a multi-dimensional index (of extents()) was not checked.
>
> Both mapping::operator() and mdspan::operator[] have the same
> prerequisite. Since, mdspan must check the prerequisite for user-defined
> layout mappings, the preference is to check in mdspan.
>
> Because out-of-bounds accesses are very common it's nevertheless useful
> to check the prerequisite in mapping::operator(). This is relevant for
> cases where the layout mappings are used without mdspan. This commit
> check the prerequisites via _GLIBCXX_DEBUG_ASSERTs and adds the required
> tests.
>
> More discussion in the email chain starting at:
>
>   https://gcc.gnu.org/pipermail/libstdc++/2025-July/062265.html
>
> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan: Check prerequisites of
> layout_*::operator() with _GLIBCXX_DEBUG_ASSERTs.
> *
> testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc:
> Add tests for prerequisites.
>
> Signed-off-by: Luc Grosheintz 
> ---
>
LGTM.

>  libstdc++-v3/include/std/mdspan   |  5 
>  .../mdspan/layouts/debug/out_of_bounds_neg.cc | 30 +++
>  2 files changed, 35 insertions(+)
>  create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
>
> diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> index c72a64094b7..cf20553aaa5 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -441,6 +441,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> _IndexType __mult = 1;
> auto __update = [&, __pos = 0u](_IndexType __idx) mutable
>   {
> +   _GLIBCXX_DEBUG_ASSERT(cmp_less(__idx,
> __exts.extent(__pos)));
> __res += __idx * __mult;
> __mult *= __exts.extent(__pos);
> ++__pos;
> @@ -651,6 +652,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> auto __update = [&, __pos = __exts.rank()](_IndexType) mutable
>   {
> --__pos;
> +   _GLIBCXX_DEBUG_ASSERT(cmp_less(__ind_arr[__pos],
> +  __exts.extent(__pos)));
> __res += __ind_arr[__pos] * __mult;
> __mult *= __exts.extent(__pos);
>   };
> @@ -822,6 +825,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   {
> auto __update = [&, __pos = 0u](_IndexType __idx) mutable
>   {
> +   _GLIBCXX_DEBUG_ASSERT(cmp_less(__idx,
> +
> __m.extents().extent(__pos)));
> __res += __idx * __m.stride(__pos++);
>   };
> (__update(__indices), ...);
> diff --git
> a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
> new file mode 100644
> index 000..fb8ff01e8aa
> --- /dev/null
> +++
> b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
> @@ -0,0 +1,30 @@
> +// { dg-do compile { target c++23 } }
> +// { dg-require-debug-mode "" }
> +#include
> +
> +template
> +  constexpr bool
> +  test_out_of_bounds_1d()
> +  {
> +auto m = typename Layout::mapping>{};
> +(void) m(0); // { dg-error "expansion of" }
> +return true;
> +  }
> +static_assert(test_out_of_bounds_1d()); // { dg-error
> "expansion of" }
> +static_assert(test_out_of_bounds_1d()); // { dg-error
> "expansion of" }
> +static_assert(test_out_of_bounds_1d()); // { dg-error
> "expansion of" }
> +
> +template
> +  constexpr bool
> +  test_out_of_bounds_3d()
> +  {
> +auto m = typename Layout::mapping>{};
> +(void) m(2, 5, 5); // { dg-error "expansion of" }
> +return true;
> +  }
> +static_assert(test_out_of_bounds_3d()); // { dg-error
> "expansion of" }
> +static_assert(test_out_of_bounds_3d()); // { dg-error
> "expansion of" }
> +static_assert(test_out_of_bounds_3d()); // { dg-error
> "expansion of" }
> +
> +// { dg-prune-output "non-constant condition for static assertion" }
> +// { dg-prune-output "__glibcxx_assert" }
> --
> 2.49.0
>
>

[PATCH 4/7] aarch64: Use EOR3 for DImode values

2025-07-07 Thread Kyrylo Tkachov

Hi all,

Similar to BCAX, we can use EOR3 for DImode, but we have to be careful
not to force GP<->SIMD moves unnecessarily, so add a splitter for that case.

So for input:
uint64_t eor3_d_gp (uint64_t a, uint64_t b, uint64_t c) { return EOR3 (a, b, 
c); }
uint64x1_t eor3_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return EOR3 (a, 
b, c); }

We generate the desired:
eor3_d_gp:
eor x1, x1, x2
eor x0, x1, x0
ret

eor3_d:
eor3 v0.16b, v0.16b, v1.16b, v2.16b
ret

Bootstrapped and tested on aarch64-none-linux-gnu.
Ok for trunk?
Thanks,
Kyrill

Signed-off-by: Kyrylo Tkachov 

gcc/

* config/aarch64/aarch64-simd.md (*eor3qdi4): New
define_insn_and_split.

gcc/testsuite/

* gcc.target/aarch64/simd/eor3_d.c: Add tests for DImode operands.


0004-aarch64-Use-EOR3-for-DImode-values.patch
Description: 0004-aarch64-Use-EOR3-for-DImode-values.patch

[PATCH 6/7] aarch64: Use SVE2 BSL1N for DImode arguments

2025-07-07 Thread Kyrylo Tkachov

Hi all,

Similar to other patches in this series, this patch adds a splitter
for DImode BSL1N operations, taking care to generate the right code
in the GP regs case.

Thus for the testcase we generate:
bsl1n_gp:
eon x0, x0, x1
and x0, x0, x2
eor x0, x0, x1
ret

bsl1n_d:
bsl1n z0.d, z0.d, z1.d, z2.d
ret

instead of the previous:
bsl1n_gp: // The same, avoid moves to FP regs.
eon x0, x0, x1
and x0, x0, x2
eor x0, x0, x1
ret

bsl1n_d:
fmov x0, d0
fmov x1, d1
eon x0, x1, x0
fmov d31, x0
and v2.8b, v31.8b, v2.8b
eor v0.8b, v2.8b, v1.8b
ret

Bootstrapped and tested on aarch64-none-linux-gnu.
Ok for trunk?
Thanks,
Kyrill

Signed-off-by: Kyrylo Tkachov 

gcc/

* config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl1n_unpreddi): New
define_insn_and_split.

gcc/testsuite/

* gcc.target/aarch64/sve2/bsl1n_d.c: New test.



0006-aarch64-Use-SVE2-BSL1N-for-DImode-arguments.patch
Description: 0006-aarch64-Use-SVE2-BSL1N-for-DImode-arguments.patch

Re: Add template keyword to for Clang

2025-07-07 Thread Jonathan Wakely

On Mon, 7 Jul 2025 at 07:53, Tomasz Kaminski  wrote:
>
> Thanks.
> I am not sure if template is necessary here, as I believe this is type-only 
> context, but I never understood the rules around this.

Same here. I remain unconvinced that relaxing the rules in the
standard was a good idea. "You always need it" was a lot easier to
understand than "you sometimes need it and even most members of the
standard committee aren't sure when you need it, probably safer to
just use it anyway".


>
> On Sat, Jul 5, 2025 at 1:15 AM Jonathan Wakely  wrote:
>>
>> Clang wants this change:
>>
>> --- a/libstdc++-v3/include/std/mdspan
>> +++ b/libstdc++-v3/include/std/mdspan
>> @@ -509,7 +509,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>
>> template
>>   concept __mapping_of =
>> -   is_same_v,
>> +   is_same_v> _Mapping::extents_type>,
>>  _Mapping>;
>>
>> template
>>
>> to fix:
>>
>> /home/jwakely/gcc/latest/lib/gcc/x86_64-pc-linux-gnu/16.0.0/../../../../include/c++/16.0.0/mdspan:512:30:
>> error: use 'template' keyword to treat 'mapping' a
>> s a dependent template name
>>  512 | is_same_v> _Mapping::extents_type>,
>>  | ^
>>
>>
>> I'll push that on Monday.
>>

[PATCH 7/7] aarch64: Use BSL2N for DImode operands

2025-07-07 Thread Kyrylo Tkachov

Hi all,

The intent of the patch is similar to previous in the series.
Make more use of BSL2N when we have DImode operands in SIMD regs,
but still use the GP instructions when that's where the operands are.
Compared to the previous patches there are a couple of complications:
* The operands are a bit more complex and get rejected by RTX costs during
combine. This is fixed by adding some costing logic to aarch64_rtx_costs.

* The GP split sequence requires two temporaries instead of just one.
I've marked operand 1 to be an input/output earlyclobber operand to give
the second temporary together with the earlyclobber operand 0. This means
that operand is marked with "+" even for the "w" alternatives as the modifier
is global, but I don't see another way out here. Suggestions welcome.

With these fixed for the testcase we generate:
bsl2n_gp: // unchanged scalar output
orr x1, x2, x1
and x0, x0, x2
orn x0, x0, x1
ret

bsl2n_d:
bsl2n z0.d, z0.d, z1.d, z2.d
ret

compared to the previous:
bsl2n_gp:
orr x1, x2, x1
and x0, x0, x2
orn x0, x0, x1
ret

bsl2n_d:
orr v1.8b, v2.8b, v1.8b
and v0.8b, v2.8b, v0.8b
orn v0.8b, v0.8b, v1.8b
ret

Bootstrapped and tested on aarch64-none-linux-gnu.
Ok for trunk?
Thanks,
Kyrill

Signed-off-by: Kyrylo Tkachov 

gcc/

* config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_unpreddi): New
define_insn_and_split.
* config/aarch64/aarch64.cc (aarch64_bsl2n_rtx_form_p): Define.
(aarch64_rtx_costs): Use the above. Cost BSL2N ops.

gcc/testsuite/

* gcc.target/aarch64/sve2/bsl2n_d.c: New test.


0007-aarch64-Use-BSL2N-for-DImode-operands.patch
Description: 0007-aarch64-Use-BSL2N-for-DImode-operands.patch

Re: [PATCH v1] libstdc++: Set FMT for complete C++23 mdspan.

2025-07-07 Thread Luc Grosheintz





On 7/7/25 11:45, Tomasz Kaminski wrote:

On Mon, Jul 7, 2025 at 11:41 AM Luc Grosheintz 
wrote:


libstdc++-v3/ChangeLog:

 * include/bits/version.def (mdspan): Set to 202207 and remove
 no_stdname.
 * include/bits/version.h: Regenerate.
 * testsuite/23_containers/mdspan/mdspan_ftm.cc: Test presence
 of FTM.

Signed-off-by: Luc Grosheintz 
---


Some minor suggestions below. Could you please also mention PR107761 in the
commit.
We have automation that will put that in bugzilla, so people tracking
issues can see it.
I would do that in general for any new commits implementing language
feature.
Let me know if you will not be able to find a corresponding bugzilla ticket.


Makes sense, and I've been horribly inconsistent about this (I completely
forgot during all of the layout patches).

I'll add the updated commit to v4 of the mdspan patch series.




  libstdc++-v3/include/bits/version.def | 3 +--
  libstdc++-v3/include/bits/version.h   | 3 ++-
  libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc | 6 ++
  3 files changed, 9 insertions(+), 3 deletions(-)
  create mode 100644
libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc

diff --git a/libstdc++-v3/include/bits/version.def
b/libstdc++-v3/include/bits/version.def
index f4ba501c403..ad909afd20e 100644
--- a/libstdc++-v3/include/bits/version.def
+++ b/libstdc++-v3/include/bits/version.def
@@ -1007,9 +1007,8 @@ ftms = {

  ftms = {
name = mdspan;
-  no_stdname = true; // FIXME: remove
values = {
-v = 1; // FIXME: 202207
+v = 202207;
  cxxmin = 23;
};
  };
diff --git a/libstdc++-v3/include/bits/version.h
b/libstdc++-v3/include/bits/version.h
index dc8ac07be16..72f9231846e 100644
--- a/libstdc++-v3/include/bits/version.h
+++ b/libstdc++-v3/include/bits/version.h
@@ -1126,8 +1126,9 @@

  #if !defined(__cpp_lib_mdspan)
  # if (__cplusplus >= 202100L)
-#  define __glibcxx_mdspan 1L
+#  define __glibcxx_mdspan 202207L
  #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_mdspan)
+#   define __cpp_lib_mdspan 202207L
  #  endif
  # endif
  #endif /* !defined(__cpp_lib_mdspan) && defined(__glibcxx_want_mdspan) */
diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
new file mode 100644
index 000..fc528293e66
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc


I would name the file just ftm.


@@ -0,0 +1,6 @@
+// { dg-do compile { target c++23 } }
+#include 
+
+#ifndef __cpp_lib_mdspan
+#error "Missing FTM"
+#endif


We usually use following to test the feature test macro:
#ifndef __cpp_lib_ranges_to_container
# error "Feature test macro for ranges_to_container is missing in "
#elif __cpp_lib_ranges_to_container < 202202L
# error "Feature test macro for ranges_to_container has wrong value in
"
#endif



--
2.49.0

Re: [PATCH v1] libstdc++: Set FMT for complete C++23 mdspan.

2025-07-07 Thread Luc Grosheintz




On 7/7/25 13:30, Tomasz Kaminski wrote:

Ah, I just missed the other e-mail, and just found it.
Yes, in that case it makes sense to include it in the patch series.
You may also update the last two commits.


I'm not quite sure what you mean by "update the last two commits".

Do you mean that I should update mdspan [v3 4/5] and [v3 5/5] but
leave the first three commits alone?

I would have just updated all commits according to the suggestions
you made.



On Mon, Jul 7, 2025 at 1:27 PM Luc Grosheintz 
wrote:




On 7/7/25 13:24, Tomasz Kaminski wrote:

On Mon, Jul 7, 2025 at 12:34 PM Luc Grosheintz 


On 7/7/25 11:45, Tomasz Kaminski wrote:

On Mon, Jul 7, 2025 at 11:41 AM Luc Grosheintz <

luc.groshei...@gmail.com


wrote:


libstdc++-v3/ChangeLog:

   * include/bits/version.def (mdspan): Set to 202207 and

remove

   no_stdname.
   * include/bits/version.h: Regenerate.
   * testsuite/23_containers/mdspan/mdspan_ftm.cc: Test

presence

   of FTM.

Signed-off-by: Luc Grosheintz 
---


Some minor suggestions below. Could you please also mention PR107761 in

the

commit.
We have automation that will put that in bugzilla, so people tracking
issues can see it.
I would do that in general for any new commits implementing language
feature.
Let me know if you will not be able to find a corresponding bugzilla

ticket.

Makes sense, and I've been horribly inconsistent about this (I

completely

forgot during all of the layout patches).

I'll add the updated commit to v4 of the mdspan patch series.


I will be merging patch series for mdspan with changes made locally.
Please just send v2 of this patch.


Even though there's a major complaint related to ADL issues in
swap?

https://gcc.gnu.org/pipermail/libstdc++/2025-July/062380.html








libstdc++-v3/include/bits/version.def | 3 +--
libstdc++-v3/include/bits/version.h   | 3 ++-
libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc | 6

++

3 files changed, 9 insertions(+), 3 deletions(-)
create mode 100644
libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc

diff --git a/libstdc++-v3/include/bits/version.def
b/libstdc++-v3/include/bits/version.def
index f4ba501c403..ad909afd20e 100644
--- a/libstdc++-v3/include/bits/version.def
+++ b/libstdc++-v3/include/bits/version.def
@@ -1007,9 +1007,8 @@ ftms = {

ftms = {
  name = mdspan;
-  no_stdname = true; // FIXME: remove
  values = {
-v = 1; // FIXME: 202207
+v = 202207;
cxxmin = 23;
  };
};
diff --git a/libstdc++-v3/include/bits/version.h
b/libstdc++-v3/include/bits/version.h
index dc8ac07be16..72f9231846e 100644
--- a/libstdc++-v3/include/bits/version.h
+++ b/libstdc++-v3/include/bits/version.h
@@ -1126,8 +1126,9 @@

#if !defined(__cpp_lib_mdspan)
# if (__cplusplus >= 202100L)
-#  define __glibcxx_mdspan 1L
+#  define __glibcxx_mdspan 202207L
#  if defined(__glibcxx_want_all) || defined(__glibcxx_want_mdspan)
+#   define __cpp_lib_mdspan 202207L
#  endif
# endif
#endif /* !defined(__cpp_lib_mdspan) &&

defined(__glibcxx_want_mdspan) */

diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
new file mode 100644
index 000..fc528293e66
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc


I would name the file just ftm.


@@ -0,0 +1,6 @@
+// { dg-do compile { target c++23 } }
+#include 
+
+#ifndef __cpp_lib_mdspan
+#error "Missing FTM"
+#endif


We usually use following to test the feature test macro:
#ifndef __cpp_lib_ranges_to_container
# error "Feature test macro for ranges_to_container is missing in

"

#elif __cpp_lib_ranges_to_container < 202202L
# error "Feature test macro for ranges_to_container has wrong value in
"
#endif



--
2.49.0

Re: [PATCH v1] libstdc++: Set FMT for complete C++23 mdspan.

2025-07-07 Thread Tomasz Kaminski

On Mon, Jul 7, 2025 at 2:11 PM Luc Grosheintz 
wrote:

>
> On 7/7/25 13:30, Tomasz Kaminski wrote:
> > Ah, I just missed the other e-mail, and just found it.
> > Yes, in that case it makes sense to include it in the patch series.
> > You may also update the last two commits.
>
> I'm not quite sure what you mean by "update the last two commits".
>
> Do you mean that I should update mdspan [v3 4/5] and [v3 5/5] but
> leave the first three commits alone?
>
The v3 1-4 were already approved, and I have made requested changes locally.
So I would just submit the new patch v4 5/5 and feature test macro.

>
> I would have just updated all commits according to the suggestions
> you made.
>
This I think would suggest that re-approval is needed, just multiplying the
work needed.

>
> >
> > On Mon, Jul 7, 2025 at 1:27 PM Luc Grosheintz 
> > wrote:
> >
> >>
> >>
> >> On 7/7/25 13:24, Tomasz Kaminski wrote:
> >>> On Mon, Jul 7, 2025 at 12:34 PM Luc Grosheintz <
> luc.groshei...@gmail.com
> >>>
> >>> wrote:
> >>>
> 
> 
>  On 7/7/25 11:45, Tomasz Kaminski wrote:
> > On Mon, Jul 7, 2025 at 11:41 AM Luc Grosheintz <
> >> luc.groshei...@gmail.com
> >
> > wrote:
> >
> >> libstdc++-v3/ChangeLog:
> >>
> >>* include/bits/version.def (mdspan): Set to 202207 and
> >> remove
> >>no_stdname.
> >>* include/bits/version.h: Regenerate.
> >>* testsuite/23_containers/mdspan/mdspan_ftm.cc: Test
> >> presence
> >>of FTM.
> >>
> >> Signed-off-by: Luc Grosheintz 
> >> ---
> >>
> > Some minor suggestions below. Could you please also mention PR107761
> in
>  the
> > commit.
> > We have automation that will put that in bugzilla, so people tracking
> > issues can see it.
> > I would do that in general for any new commits implementing language
> > feature.
> > Let me know if you will not be able to find a corresponding bugzilla
>  ticket.
> 
>  Makes sense, and I've been horribly inconsistent about this (I
> >> completely
>  forgot during all of the layout patches).
> 
>  I'll add the updated commit to v4 of the mdspan patch series.
> 
> >>> I will be merging patch series for mdspan with changes made locally.
> >>> Please just send v2 of this patch.
> >>
> >> Even though there's a major complaint related to ADL issues in
> >> swap?
> >>
> >> https://gcc.gnu.org/pipermail/libstdc++/2025-July/062380.html
> >>
> >>>
> 
> >
> >> libstdc++-v3/include/bits/version.def | 3
> +--
> >> libstdc++-v3/include/bits/version.h   | 3
> ++-
> >> libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc | 6
> >> ++
> >> 3 files changed, 9 insertions(+), 3 deletions(-)
> >> create mode 100644
> >> libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
> >>
> >> diff --git a/libstdc++-v3/include/bits/version.def
> >> b/libstdc++-v3/include/bits/version.def
> >> index f4ba501c403..ad909afd20e 100644
> >> --- a/libstdc++-v3/include/bits/version.def
> >> +++ b/libstdc++-v3/include/bits/version.def
> >> @@ -1007,9 +1007,8 @@ ftms = {
> >>
> >> ftms = {
> >>   name = mdspan;
> >> -  no_stdname = true; // FIXME: remove
> >>   values = {
> >> -v = 1; // FIXME: 202207
> >> +v = 202207;
> >> cxxmin = 23;
> >>   };
> >> };
> >> diff --git a/libstdc++-v3/include/bits/version.h
> >> b/libstdc++-v3/include/bits/version.h
> >> index dc8ac07be16..72f9231846e 100644
> >> --- a/libstdc++-v3/include/bits/version.h
> >> +++ b/libstdc++-v3/include/bits/version.h
> >> @@ -1126,8 +1126,9 @@
> >>
> >> #if !defined(__cpp_lib_mdspan)
> >> # if (__cplusplus >= 202100L)
> >> -#  define __glibcxx_mdspan 1L
> >> +#  define __glibcxx_mdspan 202207L
> >> #  if defined(__glibcxx_want_all) ||
> defined(__glibcxx_want_mdspan)
> >> +#   define __cpp_lib_mdspan 202207L
> >> #  endif
> >> # endif
> >> #endif /* !defined(__cpp_lib_mdspan) &&
>  defined(__glibcxx_want_mdspan) */
> >> diff --git
> a/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
> >> b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
> >> new file mode 100644
> >> index 000..fc528293e66
> >> --- /dev/null
> >> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
> >>
> > I would name the file just ftm.
> >
> >> @@ -0,0 +1,6 @@
> >> +// { dg-do compile { target c++23 } }
> >> +#include 
> >> +
> >> +#ifndef __cpp_lib_mdspan
> >> +#error "Missing FTM"
> >> +#endif
> >>
> > We usually use following to test the feature test macro:
> > #ifndef __cpp_lib_ranges_to_container
> > # error "Feature test macro for ranges_to_container is missing in
>  "

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-07 Thread Jakub Jelinek

On Mon, Jul 07, 2025 at 07:21:26AM -0400, Siddhesh Poyarekar wrote:
> > is .ACCESS_WITH_SIZE documented?  I can't find it documented in the
> > internals manual, internal-fn.def has
> 
> It's documented in tree-object-size.cc as:
> 
> /* Compute __builtin_object_size for a CALL to .ACCESS_WITH_SIZE,
>OBJECT_SIZE_TYPE is the second argument from __builtin_object_size.
>The 2nd, 3rd, and the 4th parameters of the call determine the size of
>the CALL:
> 
>2nd argument REF_TO_SIZE: The reference to the size of the object,
>3rd argument CLASS_OF_SIZE: The size referenced by the REF_TO_SIZE
> represents
>  0: the number of bytes;
>  1: the number of the elements of the object type;
>4th argument TYPE_OF_SIZE: A constant 0 with its TYPE being the same as
> the TYPE
> of the object referenced by REF_TO_SIZE
>6th argument: A constant 0 with the pointer TYPE to the original flexible
>  array type or pointer field type.
> 
>The size of the element can be retrived from the TYPE of the 6th argument
>of the call, which is the pointer to the original flexible array type or
>the type of the original pointer field.  */
> 
> which doesn't document the return either.  This should have more verbose
> documentation in the internals, including the rationale for its existence.
> 
> > /* A function to associate the access size and access mode information
> > with the corresponding reference to an object.  It only reads from the
> > 2nd argument.  */
> > DEF_INTERNAL_FN (ACCESS_WITH_SIZE, ECF_PURE | ECF_LEAF | ECF_NOTHROW, NULL)
> > 
> > that suggests .ACCESS_WITH_SIZE performs a read on the size.  It doesn't
> > say what the function returns at all.
> 
> In practice the function is a nop, it gets optimized away during RTL
> expansion.  The aim is simply to pretend that the reference of the size may
> escape pointer to make sure that any preceding updates to size don't get
> reordered w.r.t. __builtin_dynamic_object_size since the latter could get
> expanded to that size.
> 
> The return value of .ACCESS_WITH_SIZE clobbering PTR (that subsequently gets
> passed to __builtin_dynamic_object_size) should be sufficient to fully
> prevent the reordering, it shouldn't have to clobber &PTR, I think.

The original use of .ACCESS_WITH_SIZE was designed for FAMs, for those
it IMHO does the right thing, it is a pass through first arg function
which attaches size information to the passed as well as returned pointer.
That pointer is &FAM, so address of the containing structure plus offsetof
of the FAM first element.

The way it is used for non-FAMs looks just wrong.
It passes as first argument the address of the pointer, not the pointer
itself.  So we have ifn used for two completely different purposes with
different meanings, while the arguments are otherwise pretty much the same
(or how do you uniquely distinguish the cases where it provides object
size for what it returns vs. where it provides object size for what the
pointer it returns points to).  That is like the spaghetti code in certain
middle end warnings.  For warnings it is really bad, for code generation
decisions it is a fatal design flaw.

So, either you need a different ifn, or add some flag in bitfield
that clearly distinguishes the 2 cases, or different number of arguments,
or perhaps most easily, why do the dereference at all?
When I have
  struct U { int n; int fam[n] __attribute__((counted_by (n))); } *u;
continue passing &u->fam as first argument and &u->n as second, while for
  struct S {
int n;
int (*p)[n] __attribute__((counted_by(n)));
  } *f;
don't pass &f->p to the builtin but pass f->p.  You are providing size
for f->p pointer, not for &f->p pointer, while for FAM it is for &u->fam
pointer.  The second argument would be &f->n.

So, my recommendation would be to revert the counted_by GCC 16 series,
rework it and submit again.  Unless you can fix it up in a day or two.

Jakub

[pushed] c++: -Wno-abbreviated-auto-in-template-arg [PR120917]

2025-07-07 Thread Jason Merrill

Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

In r14-1659 I added a missing error for a Concepts TS feature that we were
failing to diagnose, but this PR requests a way to disable that error for
code written thinking it was valid.  Which seems reasonable, since it
doesn't require any work beyond that and is a plausible extension by itself.

While looking at this, I also noticed we were still not giving the
diagnostic in a few cases, and fixing that affected a few of our old
concepts testcases.

PR c++/120917

gcc/ChangeLog:

* doc/invoke.texi: Add -Wno-abbreviated-auto-in-template-arg.

gcc/c-family/ChangeLog:

* c.opt: Add -Wno-abbreviated-auto-in-template-arg.
* c.opt.urls: Regenerate.

gcc/cp/ChangeLog:

* parser.cc (cp_parser_simple_type_specifier): Attach
auto in targ in parameter to -Wabbreviated-auto-in-template-arg.
(cp_parser_placeholder_type_specifier): Diagnose constrained auto in
template arg.

gcc/testsuite/ChangeLog:

* g++.dg/concepts/auto7a.C: Add diagnostic.
* g++.dg/concepts/auto7b.C: New test.
* g++.dg/concepts/auto7c.C: New test.
* g++.dg/cpp1y/pr85076.C: Expect 'auto' error.
* g++.dg/concepts/pr67249.C: Likewise.
* g++.dg/cpp1y/lambda-generic-variadic.C: Likewise.
* g++.dg/cpp2a/concepts-pr67210.C: Likewise.
* g++.dg/concepts/pr67249a.C: New test.
* g++.dg/cpp1y/lambda-generic-variadic-a.C: New test.
* g++.dg/cpp2a/concepts-pr67210a.C: New test.
---
 gcc/doc/invoke.texi| 18 ++
 gcc/c-family/c.opt |  4 
 gcc/cp/parser.cc   | 12 +---
 gcc/testsuite/g++.dg/concepts/auto7a.C |  1 +
 gcc/testsuite/g++.dg/concepts/auto7b.C | 10 ++
 gcc/testsuite/g++.dg/concepts/auto7c.C | 12 
 gcc/testsuite/g++.dg/concepts/pr67249.C|  2 +-
 gcc/testsuite/g++.dg/concepts/pr67249a.C   |  7 +++
 .../g++.dg/cpp1y/lambda-generic-variadic-a.C   | 15 +++
 .../g++.dg/cpp1y/lambda-generic-variadic.C |  4 ++--
 gcc/testsuite/g++.dg/cpp1y/pr85076.C   |  2 +-
 gcc/testsuite/g++.dg/cpp2a/concepts-pr67210.C  |  2 +-
 gcc/testsuite/g++.dg/cpp2a/concepts-pr67210a.C | 11 +++
 gcc/c-family/c.opt.urls|  3 +++
 14 files changed, 95 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/concepts/auto7b.C
 create mode 100644 gcc/testsuite/g++.dg/concepts/auto7c.C
 create mode 100644 gcc/testsuite/g++.dg/concepts/pr67249a.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-variadic-a.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-pr67210a.C

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 7640e7d8867..74f5ee26042 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -3816,6 +3816,23 @@ Warn when a type with an ABI tag is used in a context 
that does not
 have that ABI tag.  See @ref{C++ Attributes} for more information
 about ABI tags.
 
+@opindex Wabbreviated-auto-in-template-arg
+@opindex Wno-abbreviated-auto-in-template-arg
+@item -Wno-abbreviated-auto-in-template-arg
+Disable the error for an @code{auto} placeholder type used within a
+template argument list to declare a C++20 abbreviated function
+template, e.g.
+
+@smallexample
+void f(S);
+@end smallexample
+
+This feature was proposed in the Concepts TS, but was not adopted into
+C++20; in the standard, a placeholder in a parameter declaration must
+appear as a decl-specifier.  The error can also be reduced to a
+warning by @option{-fpermissive} or
+@option{-Wno-error=abbreviated-auto-in-template-arg}.
+
 @opindex Wcomma-subscript
 @opindex Wno-comma-subscript
 @item -Wcomma-subscript @r{(C++ and Objective-C++ only)}
@@ -6443,6 +6460,7 @@ only by this flag, but it also downgrades some C and C++ 
diagnostics
 that have their own flag:
 
 @gccoptlist{
+-Wabbreviated-auto-in-template-arg @r{(C++ and Objective-C++ only)}
 -Wdeclaration-missing-parameter-type @r{(C and Objective-C only)}
 -Wimplicit-function-declaration @r{(C and Objective-C only)}
 -Wimplicit-int @r{(C and Objective-C only)}
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 8af466d1ed1..6a55e7118d1 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -397,6 +397,10 @@ Wassign-intercept
 ObjC ObjC++ Var(warn_assign_intercept) Warning
 Warn whenever an Objective-C assignment is being intercepted by the garbage 
collector.
 
+Wabbreviated-auto-in-template-arg
+C++ ObjC++ Warning Var(warn_abbev_auto_targ) Init(1)
+Diagnose a placeholder type in a template argument in a function parameter 
type.
+
 Wbad-function-cast
 C ObjC Var(warn_bad_function_cast) Warning
 Warn about casting functions to incompatible types.
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 44a78324c6e..239e6f9a556 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -21021,9 +21021,6

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-07 Thread Jakub Jelinek

On Mon, Jul 07, 2025 at 09:07:14AM -0400, Siddhesh Poyarekar wrote:
> On 2025-07-07 08:48, Jakub Jelinek wrote:
> > > The return value of .ACCESS_WITH_SIZE clobbering PTR (that subsequently 
> > > gets
> > > passed to __builtin_dynamic_object_size) should be sufficient to fully
> > > prevent the reordering, it shouldn't have to clobber &PTR, I think.
> > 
> > The original use of .ACCESS_WITH_SIZE was designed for FAMs, for those
> > it IMHO does the right thing, it is a pass through first arg function
> > which attaches size information to the passed as well as returned pointer.
> > That pointer is &FAM, so address of the containing structure plus offsetof
> > of the FAM first element.
> 
> So that ought to be &a->fam[0] right, and not &a->fam?  It means the same
> for a FAM, so why not specify it as &a->fam[0] (or simply a->fam)? That will
> be consistent with when fam is a pointer.

The only difference between &a->fam[0] and &a->fam is not the value (that is
the same), just the type in one case say int *, in the other int [0:] *.
At least in GIMPLE pointer conversions are useless, so what exact type of
the argument is doesn't matter that much, but it matters e.g. when you're
dereferencing it.
Now, for
int n; int (*p)[n] __attribute__((counted_by(n)));
f->p has the int [n] * type, so guess that is what you want to use.

Jakub

RE: [PATCH] tree-optimization/120817 - bogus DSE of .MASK_STORE

2025-07-07 Thread Richard Biener

On Mon, 7 Jul 2025, Tamar Christina wrote:

> > -Original Message-
> > From: Richard Biener 
> > Sent: Monday, July 7, 2025 12:30 PM
> > To: gcc-patches@gcc.gnu.org
> > Cc: Tamar Christina 
> > Subject: [PATCH] tree-optimization/120817 - bogus DSE of .MASK_STORE
> > 
> > DSE used ao_ref_init_from_ptr_and_size for .MASK_STORE but
> > alias-analysis will use the specified size to disambiguate
> > against smaller objects.  For .MASK_STORE we instead have to
> > make the access size unspecified but we can still constrain
> > the access extent based on the maximum size possible.
> > 
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> > 
> > No testcase, I'd appreciate a runtime one.
> 
> Tested locally:
> 
> /* { dg-require-effective-target vect_int } */
> /* { dg-additional-options "-mcpu=neoverse-n2 -O1 -ftree-loop-vectorize" { 
> target aarch64*-*-* } */
> 
> #include "tree-vect.h"
> 
> typedef struct {
> int _M_current;
> } __normal_iterator;
> 
> typedef struct {
> char _M_elems[5];
> } array_5;
> 
> __normal_iterator __trans_tmp_1 = {-5};
> 
> __attribute__((noipa))
> array_5 copySourceIntoTarget() {
> array_5 target;
> char* target_it = target._M_elems;
> 
> while (__trans_tmp_1._M_current != 0) {
> *target_it = 1;
> __trans_tmp_1._M_current++;
> target_it++;
> }
> 
> return target;
> }
> 
> int main ()
> {
> 
>   check_vect ();
> 
>   array_5 res = copySourceIntoTarget();
> 
> #pragma GCC novector
>   for (int i = 0; i < 5; i++)
> if (res._M_elems[i] != 1)
>   __builtin_abort ();
> }

Thanks - I added the testcase and pushed the change.

Richard.

> 
> > 
> > PR tree-optimization/120817
> > * tree-ssa-dse.cc (initialize_ao_ref_for_dse): Use
> > ao_ref_init_from_ptr_and_range with unknown size for
> > .MASK_STORE and .MASK_LEN_STORE.
> > ---
> >  gcc/tree-ssa-dse.cc | 8 
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> > 
> > diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
> > index 5ac4280ee36..51a572316cd 100644
> > --- a/gcc/tree-ssa-dse.cc
> > +++ b/gcc/tree-ssa-dse.cc
> > @@ -181,10 +181,10 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref 
> > *write,
> > bool may_def_ok = false)
> >can provide a may-def variant.  */
> > if (may_def_ok)
> >   {
> > -   ao_ref_init_from_ptr_and_size (
> > - write, gimple_call_arg (stmt, 0),
> > - TYPE_SIZE_UNIT (
> > -   TREE_TYPE (gimple_call_arg (stmt, stored_value_index;
> > +   ao_ref_init_from_ptr_and_range (
> > + write, gimple_call_arg (stmt, 0), true, 0, -1,
> > + tree_to_poly_int64 (TYPE_SIZE (
> > +   TREE_TYPE (gimple_call_arg (stmt, stored_value_index);
> > return true;
> >   }
> > break;
> > --
> > 2.43.0
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH v3 1/5] libstdc++: Check prerequisites of layout_*::operator().

2025-07-07 Thread Luc Grosheintz




On 7/4/25 10:29, Luc Grosheintz wrote:

Previously, the prerequisite that the arguments passed to operator() are
a multi-dimensional index (of extents()) was not checked.

Both mapping::operator() and mdspan::operator[] have the same
prerequisite. Since, mdspan must check the prerequisite for user-defined
layout mappings, the preference is to check in mdspan.

Because out-of-bounds accesses are very common it's nevertheless useful
to check the prerequisite in mapping::operator(). This is relevant for
cases where the layout mappings are used without mdspan. This commit
check the prerequisites via _GLIBCXX_DEBUG_ASSERTs and adds the required


Could you please locally fix this typo in the commit message?

s/check/checks/



tests.

More discussion in the email chain starting at:

   https://gcc.gnu.org/pipermail/libstdc++/2025-July/062265.html

libstdc++-v3/ChangeLog:

* include/std/mdspan: Check prerequisites of
layout_*::operator() with _GLIBCXX_DEBUG_ASSERTs.
* testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc:
Add tests for prerequisites.

Signed-off-by: Luc Grosheintz 
---
  libstdc++-v3/include/std/mdspan   |  5 
  .../mdspan/layouts/debug/out_of_bounds_neg.cc | 30 +++
  2 files changed, 35 insertions(+)
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc

diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
index c72a64094b7..cf20553aaa5 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -441,6 +441,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_IndexType __mult = 1;
auto __update = [&, __pos = 0u](_IndexType __idx) mutable
  {
+   _GLIBCXX_DEBUG_ASSERT(cmp_less(__idx, __exts.extent(__pos)));
__res += __idx * __mult;
__mult *= __exts.extent(__pos);
++__pos;
@@ -651,6 +652,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __update = [&, __pos = __exts.rank()](_IndexType) mutable
  {
--__pos;
+   _GLIBCXX_DEBUG_ASSERT(cmp_less(__ind_arr[__pos],
+  __exts.extent(__pos)));
__res += __ind_arr[__pos] * __mult;
__mult *= __exts.extent(__pos);
  };
@@ -822,6 +825,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  {
auto __update = [&, __pos = 0u](_IndexType __idx) mutable
  {
+   _GLIBCXX_DEBUG_ASSERT(cmp_less(__idx,
+  __m.extents().extent(__pos)));
__res += __idx * __m.stride(__pos++);
  };
(__update(__indices), ...);
diff --git 
a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
 
b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
new file mode 100644
index 000..fb8ff01e8aa
--- /dev/null
+++ 
b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
@@ -0,0 +1,30 @@
+// { dg-do compile { target c++23 } }
+// { dg-require-debug-mode "" }
+#include
+
+template
+  constexpr bool
+  test_out_of_bounds_1d()
+  {
+auto m = typename Layout::mapping>{};
+(void) m(0); // { dg-error "expansion of" }
+return true;
+  }
+static_assert(test_out_of_bounds_1d()); // { dg-error "expansion 
of" }
+static_assert(test_out_of_bounds_1d()); // { dg-error "expansion 
of" }
+static_assert(test_out_of_bounds_1d()); // { dg-error 
"expansion of" }
+
+template
+  constexpr bool
+  test_out_of_bounds_3d()
+  {
+auto m = typename Layout::mapping>{};
+(void) m(2, 5, 5); // { dg-error "expansion of" }
+return true;
+  }
+static_assert(test_out_of_bounds_3d()); // { dg-error "expansion 
of" }
+static_assert(test_out_of_bounds_3d()); // { dg-error "expansion 
of" }
+static_assert(test_out_of_bounds_3d()); // { dg-error 
"expansion of" }
+
+// { dg-prune-output "non-constant condition for static assertion" }
+// { dg-prune-output "__glibcxx_assert" }

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-07 Thread Siddhesh Poyarekar


On 2025-07-07 08:48, Jakub Jelinek wrote:

The return value of .ACCESS_WITH_SIZE clobbering PTR (that subsequently gets
passed to __builtin_dynamic_object_size) should be sufficient to fully
prevent the reordering, it shouldn't have to clobber &PTR, I think.


The original use of .ACCESS_WITH_SIZE was designed for FAMs, for those
it IMHO does the right thing, it is a pass through first arg function
which attaches size information to the passed as well as returned pointer.
That pointer is &FAM, so address of the containing structure plus offsetof
of the FAM first element.


So that ought to be &a->fam[0] right, and not &a->fam?  It means the 
same for a FAM, so why not specify it as &a->fam[0] (or simply a->fam)? 
That will be consistent with when fam is a pointer.



The way it is used for non-FAMs looks just wrong.
It passes as first argument the address of the pointer, not the pointer
itself.  So we have ifn used for two completely different purposes with
different meanings, while the arguments are otherwise pretty much the same
(or how do you uniquely distinguish the cases where it provides object
size for what it returns vs. where it provides object size for what the
pointer it returns points to).  That is like the spaghetti code in certain
middle end warnings.  For warnings it is really bad, for code generation
decisions it is a fatal design flaw.

So, either you need a different ifn, or add some flag in bitfield
that clearly distinguishes the 2 cases, or different number of arguments,
or perhaps most easily, why do the dereference at all?


If my argument to change .ACCESS_WITH_SIZE to refer to &a->fam[0] is not 
convincing then it should probably be a distinct ifn, 
.ACCESS_WITH_SIZE_PTR or something like that.



When I have
   struct U { int n; int fam[n] __attribute__((counted_by (n))); } *u;
continue passing &u->fam as first argument and &u->n as second, while for
   struct S {
 int n;
 int (*p)[n] __attribute__((counted_by(n)));
   } *f;
don't pass &f->p to the builtin but pass f->p.  You are providing size
for f->p pointer, not for &f->p pointer, while for FAM it is for &u->fam
pointer.  The second argument would be &f->n.

So, my recommendation would be to revert the counted_by GCC 16 series,
rework it and submit again.  Unless you can fix it up in a day or two.


I agree, this needs more thought.

Thanks,
Sid

Re: [PATCH] [testsuite] [arm] adjust fp16-aapcs for gcc-14

2025-07-07 Thread Torbjorn SVENSSON


Hi Christophe, Alexandre, Andrew and Richard,

Sorry for the very late reply. I've been occupied with other tasks than 
GCC for the past 3 months.


On 2025-05-20 09:56, Christophe Lyon wrote:

On Tue, 20 May 2025 at 06:30, Alexandre Oliva  wrote:



(The backport I've only just posted is not enough for the tests to pass;
there's another problem)

r14-10824 is a backport of r15-4549, that rewrote and extended into
check-function-bodies the save/restore expectations introduced in
r15-2160.  Alas, r15-2160 mentions an insn_propagation patch that
enables those specific save/restore insns to be generated, presumably
r15-1945, and this change is not present in gcc-14, so we get
different save/restore insns, and the test fails, even after
backporting r15-1035, that allows for single-character function names
in check-function-bodies.



I thought Torbjorn had tested this on gcc-14 and needed it to have
cleaner results,
but https://gcc.gnu.org/pipermail/gcc-patches/2024-November/667448.html
indicates we already noticed the failure some time ago...



I must have messed up when I tested this with gcc-14 and I've apparently 
also missed the ping from Andrew back in Nov. Really sorry.





Drop the save/restore checks that don't belong in gcc-14.

Tested with gcc-14 on arm-vxworks7r2.  Ok to install in gcc-14?


for  gcc/testsuite/ChangeLog

 * gcc.target/arm/fp16-aapcs-1.c: Drop save/restore checks.
 * gcc.target/arm/fp16-aapcs-2.c: Likewise.
 * gcc.target/arm/fp16-aapcs-3.c: Likewise.
 * gcc.target/arm/fp16-aapcs-4.c: Likewise.
---
  gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c |7 ++-
  gcc/testsuite/gcc.target/arm/fp16-aapcs-2.c |8 
  gcc/testsuite/gcc.target/arm/fp16-aapcs-3.c |7 ++-
  gcc/testsuite/gcc.target/arm/fp16-aapcs-4.c |8 
  4 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c 
b/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c
index b18d7cda65c8d..450c52fcd5c6c 100644
--- a/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c
+++ b/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c
@@ -29,10 +29,8 @@ Below block is for non-armv8.1
  ** ...
  ** vmov\.f32   s0, \2
  ** )
-** vstr\.32s2, \[sp, #4\]  @ int
+** ...
  ** bl  swap
-** vldr\.32s2, \[sp, #4\]  @ int
-** vmov\.f32   s0, s2

  ** |

@@ -50,9 +48,8 @@ Below block is for armv8.1
  ** ...
  ** vmovs0, \4  @ __fp16
  ** )
-** vstr\.32s2, \[sp, #4\]  @ int
+** ...
  ** bl  swap
-** vldr\.16s0, \[sp, #4\]

  ** )
  ** ...
diff --git a/gcc/testsuite/gcc.target/arm/fp16-aapcs-2.c 
b/gcc/testsuite/gcc.target/arm/fp16-aapcs-2.c
index 48510e895368d..c15f29dd3e44b 100644
--- a/gcc/testsuite/gcc.target/arm/fp16-aapcs-2.c
+++ b/gcc/testsuite/gcc.target/arm/fp16-aapcs-2.c
@@ -28,14 +28,6 @@ swap (__fp16, __fp16);
  ** )
  ** ...
  */
-/*
-** F: { target arm_little_endian }
-** ...
-** str r2, \[sp, #4\]
-** bl  swap
-** ldrhr0, \[sp, #4\]  @ __fp16


Is it expected that you also remove the 'bl swap' part?


I think it's fine to remove the entire block since the 'bl swap' is 
checked in the other block.





-** ...
-*/
  __fp16
  F (__fp16 a, __fp16 b, __fp16 c)
  {
diff --git a/gcc/testsuite/gcc.target/arm/fp16-aapcs-3.c 
b/gcc/testsuite/gcc.target/arm/fp16-aapcs-3.c
index 7238ef3a02e03..1102dc7344919 100644
--- a/gcc/testsuite/gcc.target/arm/fp16-aapcs-3.c
+++ b/gcc/testsuite/gcc.target/arm/fp16-aapcs-3.c
@@ -29,10 +29,8 @@ Below block is for non-armv8.1
  ** ...
  ** vmov\.f32   s0, \2
  ** )
-** vstr\.32s2, \[sp, #4\]  @ int
+** ...
  ** bl  swap
-** vldr\.32s2, \[sp, #4\]  @ int
-** vmov\.f32   s0, s2

  ** |

@@ -50,9 +48,8 @@ Below block is for armv8.1
  ** ...
  ** vmovs0, \4
  ** )
-** vstr\.32s2, \[sp, #4\]  @ int
+** ...
  ** bl  swap
-** vldr\.16s0, \[sp, #4\]

  ** )
  ** ...
diff --git a/gcc/testsuite/gcc.target/arm/fp16-aapcs-4.c 
b/gcc/testsuite/gcc.target/arm/fp16-aapcs-4.c
index 13f08d8afa32d..00a44d15129a8 100644
--- a/gcc/testsuite/gcc.target/arm/fp16-aapcs-4.c
+++ b/gcc/testsuite/gcc.target/arm/fp16-aapcs-4.c
@@ -28,14 +28,6 @@ swap (__fp16, __fp16);
  ** )
  ** ...
  */
-/*
-** F: { target arm_little_endian }
-** ...
-** str r2, \[sp, #4\]
-** bl  swap
-** ldrhr0, \[sp, #4\]  @ __fp16

Same question here?


I think it's fine to remove the entire block since the 'bl swap' is 
checked in the other block.



@Richard: Do you think there is any reason to not include this on 
releases/gcc-14?



Kind regards,
Torbjörn

[PATCH v3] libstdc++: Format chrono %a/%A/%b/%h/%B/%p using locale's time_put [PR117214]

2025-07-07 Thread Tomasz Kamiński

From: XU Kailiang 

C++ formatting locale could have a custom time_put that performs
differently from the C locale, so do not use __timepunct directly,
instead all of above specifiers use _M_locale_fmt.

For %a/%A/%b/%h/%B, the code handling the exception is now moved
to the _M_check_ok function, that is invoked before handling of the
conversion specifier. For time_points the values of months/weekday
are computed, and thus are always ok(), this information is indicated
by new _M_time_point member of the _ChronoSpec.

The different behavior of j specifier for durations and time_points/calendar
types, is now handled using only _ChronoParts, and _M_time_only in _ChronoSpec
is no longer needed, thus it was removed.

PR libstdc++/117214

libstdc++-v3/ChangeLog:

* include/bits/chrono_io.h (_ChronoSpec::_M_time_only): Remove.
(_ChronoSpec::_M_time_point): Define.
(__formatter_chrono::_M_parse): Use __parts to determine
interpretation of j.
(__formatter_chrono::_M_check_ok): Define.
(__formatter_chrono::_M_format_to): Invoke _M_check_ok.
(__formatter_chrono::_M_a_A, __formatter_chrono::_M_b_B): Move
exception throwing to _M_check_ok.
(__formatter_chrono::_M_j): Use _M_needs to define interpretation.
(__formatter_duration::_S_spec_for): Set _M_time_point.
* testsuite/std/time/format/format.cc: Test for exception for !ok()
months/weekday.
* testsuite/std/time/format/pr117214_custom_timeput.cc: New
test.

Co-authored-by: Tomasz Kaminski 
Reviewed-by: Jonathan Wakely 
Signed-off-by: XU Kailiang 
Signed-off-by: Tomasz Kaminski 
---
Posting v3 as I have realized, there is no test checking if we throw
exception for above spec and !ok() values, so added one.

Changes in v3:
 - adds test for exception being thrown for !ok() months and weekdays
 - fixes typos in commit description
 - changes comment for 'j'.

Tested on x86_64-linux locally, cfarm is not reachable again.
Additionally tested `std/time/format/*` with
-target_board=unix\{,-D_GLIBCXX_USE_CXX11_ABI=0/-D_GLIBCXX_DEBUG,-D_GLIBCXX_DEBUG\}.
OK for trunk? (mostly additional tests).


 libstdc++-v3/include/bits/chrono_io.h | 61 ++-
 .../testsuite/std/time/format/format.cc   |  7 +++
 .../time/format/pr117214_custom_timeput.cc| 37 +++
 3 files changed, 90 insertions(+), 15 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/std/time/format/pr117214_custom_timeput.cc

diff --git a/libstdc++-v3/include/bits/chrono_io.h 
b/libstdc++-v3/include/bits/chrono_io.h
index 72cd569ccd6..75ee7e818b2 100644
--- a/libstdc++-v3/include/bits/chrono_io.h
+++ b/libstdc++-v3/include/bits/chrono_io.h
@@ -280,8 +280,8 @@ namespace __format
   // in the format-spec, e.g. "{:L%a}" is localized and locale-specific,
   // but "{:L}" is only localized and "{:%a}" is only locale-specific.
   unsigned _M_locale_specific : 1;
-  // Indicates that we are handling duration.
-  unsigned _M_time_only : 1;
+  // Indicates that we are handling time_point.
+  unsigned _M_time_point : 1;
   // Indicates that duration should be treated as floating point.
   unsigned _M_floating_point_rep : 1;
   // Indicate that duration uses user-defined representation.
@@ -693,8 +693,11 @@ namespace __format
  __allowed_mods = _Mod_O;
  break;
case 'j':
- __needed = __spec._M_time_only ? _HoursMinutesSeconds
-: _DayOfYear;
+ __needed = __parts & _DayOfYear;
+ // If we do not know day-of-year then we must have a duration,
+ // which is to be formatted as decimal number of days.
+ if (__needed == _None)
+   __needed = _HoursMinutesSeconds;
  break;
case 'm':
  __needed = _Month;
@@ -919,7 +922,13 @@ namespace __format
   {
switch (__conv)
  {
+ case 'a':
+ case 'A':
+ case 'b':
+ case 'B':
  case 'c':
+ case 'h':
+ case 'p':
  case 'r':
  case 'x':
  case 'X':
@@ -947,6 +956,32 @@ namespace __format
  return __out;
}
 
+  void
+  _M_check_ok(const _ChronoData<_CharT>& __t, _CharT __conv) const
+  {
+   // n.b. for time point all date parts are computed, so
+   // they are always ok.
+   if (_M_spec._M_time_point || _M_spec._M_debug)
+ return;
+
+   switch (__conv)
+   {
+   case 'a':
+   case 'A':
+ if (!__t._M_weekday.ok()) [[unlikely]]
+   __throw_format_error("format error: invalid weekday");
+ return;
+   case 'b':
+   case 'h':
+   case 'B':
+ if (!__t._M_month.ok()) [[unlikely]]
+   __throw_format_error("format error: invalid month");
+ return;
+   default:

Re: [PATCH v3 5/5] libstdc++: Implement mdspan and tests [PR107761].

2025-07-07 Thread Tomasz Kaminski

I used TEST_MDSPAN_LAYOUT_LIKE_H as header guard.

On Mon, Jul 7, 2025 at 10:58 AM Tomasz Kaminski  wrote:

>
>
> On Fri, Jul 4, 2025 at 10:37 AM Luc Grosheintz 
> wrote:
>
>> Implements the class mdspan as described in N4950, i.e. without P3029.
>> It also adds tests for mdspan. This commit completes the implementation
>> of P0009, i.e. the C++23 part .
>>
>> PR libstdc++/107761
>>
>> libstdc++-v3/ChangeLog:
>>
>> * include/std/mdspan (mdspan): New class.
>> * src/c++23/std.cc.in (mdspan): Add.
>> * testsuite/23_containers/mdspan/class_mandate_neg.cc: New test.
>> * testsuite/23_containers/mdspan/mdspan.cc: New test.
>> * testsuite/23_containers/mdspan/layout_like.h: Add class
>> LayoutLike which models a user-defined layout.
>>
>> Signed-off-by: Luc Grosheintz 
>> ---
>>
> Only two small comments:
> - use of header guard instead of pragma
> - use of md.empty() instead of md.extents(0)
> I will do these changes locally.
>
>>  libstdc++-v3/include/std/mdspan   | 284 +
>>  libstdc++-v3/src/c++23/std.cc.in  |   3 +-
>>  .../23_containers/mdspan/class_mandate_neg.cc |  41 ++
>>  .../23_containers/mdspan/layout_like.h|  80 +++
>>  .../testsuite/23_containers/mdspan/mdspan.cc  | 603 ++
>>  .../23_containers/mdspan/out_of_bounds_neg.cc |  24 +
>>  6 files changed, 1034 insertions(+), 1 deletion(-)
>>  create mode 100644
>> libstdc++-v3/testsuite/23_containers/mdspan/class_mandate_neg.cc
>>  create mode 100644
>> libstdc++-v3/testsuite/23_containers/mdspan/layout_like.h
>>  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc
>>  create mode 100644
>> libstdc++-v3/testsuite/23_containers/mdspan/out_of_bounds_neg.cc
>>
>> diff --git a/libstdc++-v3/include/std/mdspan
>> b/libstdc++-v3/include/std/mdspan
>> index 7e970c2b905..f64804e2a42 100644
>> --- a/libstdc++-v3/include/std/mdspan
>> +++ b/libstdc++-v3/include/std/mdspan
>> @@ -1057,6 +1057,290 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>{ return __p + __i; }
>>  };
>>
>> +  namespace __mdspan
>> +  {
>> +template
>> +  constexpr bool
>> +  __is_multi_index(const _Extents& __exts, span<_IndexType, _Nm>
>> __indices)
>> +  {
>> +   static_assert(__exts.rank() == _Nm);
>> +   for (size_t __i = 0; __i < __exts.rank(); ++__i)
>> + if (__indices[__i] >= __exts.extent(__i))
>> +   return false;
>> +   return true;
>> +  }
>> +  }
>> +
>> +  template> +  typename _LayoutPolicy = layout_right,
>> +  typename _AccessorPolicy = default_accessor<_ElementType>>
>> +class mdspan
>> +{
>> +  static_assert(!is_array_v<_ElementType>,
>> +   "ElementType must not be an array type");
>> +  static_assert(!is_abstract_v<_ElementType>,
>> +   "ElementType must not be an abstract class type");
>> +  static_assert(__mdspan::__is_extents<_Extents>,
>> +   "Extents must be a specialization of std::extents");
>> +  static_assert(is_same_v<_ElementType,
>> + typename _AccessorPolicy::element_type>);
>> +
>> +public:
>> +  using extents_type = _Extents;
>> +  using layout_type = _LayoutPolicy;
>> +  using accessor_type = _AccessorPolicy;
>> +  using mapping_type = typename layout_type::template
>> mapping;
>> +  using element_type = _ElementType;
>> +  using value_type = remove_cv_t;
>> +  using index_type = typename extents_type::index_type;
>> +  using size_type = typename extents_type::size_type;
>> +  using rank_type = typename extents_type::rank_type;
>> +  using data_handle_type = typename accessor_type::data_handle_type;
>> +  using reference = typename accessor_type::reference;
>> +
>> +  static constexpr rank_type
>> +  rank() noexcept { return extents_type::rank(); }
>> +
>> +  static constexpr rank_type
>> +  rank_dynamic() noexcept { return extents_type::rank_dynamic(); }
>> +
>> +  static constexpr size_t
>> +  static_extent(rank_type __r) noexcept
>> +  { return extents_type::static_extent(__r); }
>> +
>> +  constexpr index_type
>> +  extent(rank_type __r) const noexcept { return
>> extents().extent(__r); }
>> +
>> +  constexpr
>> +  mdspan()
>> +  requires (rank_dynamic() > 0)
>> +  && is_default_constructible_v
>> + && is_default_constructible_v
>> + && is_default_constructible_v
>> +  : _M_accessor(), _M_mapping(), _M_handle()
>> +  { }
>> +
>> +  constexpr
>> +  mdspan(const mdspan& __other) = default;
>> +
>> +  constexpr
>> +  mdspan(mdspan&& __other) = default;
>> +
>> +  template<__mdspan::__valid_index_type... _OIndexTypes>
>> +   requires (sizeof...(_OIndexTypes) == rank()
>> +  || sizeof...(_OIndexTypes) == rank_dynamic())
>> +&& is_constructible_v
>> +&& is_default_constructible_v
>> +   constexpr expli

Re: [PATCH] Handle non default git configurations with mklog

2025-07-07 Thread Alexander Monakov



On Tue, 1 Jul 2025, Pierre-Emmanuel Patry wrote:

> Mklog parses the diff content from prepare-commit-msg hook but fails
> when git has been configured with some options (eg. mnemonicPrefix).
> Forcing the default values for the prefixes and the algorithm would
> set a distinct diff configuration supported by mklog and prevent most
> failures.
> 
> contrib/ChangeLog:
> 
>   * prepare-commit-msg: Force default git prefixes and algorithm usage.
> 
> Signed-off-by: Pierre-Emmanuel Patry 
> ---
>  contrib/prepare-commit-msg | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/contrib/prepare-commit-msg b/contrib/prepare-commit-msg
> index 1b878772dcc..fc4c4d80b16 100755
> --- a/contrib/prepare-commit-msg
> +++ b/contrib/prepare-commit-msg
> @@ -78,4 +78,8 @@ else
>  tee="cat"
>  fi
>  
> -git $cmd | $tee | git gcc-mklog -c "$COMMIT_MSG_FILE"
> +# Provides a baseline for everyone, we only need to deal with the default
> +# values within mklog.
> +default_values="--diff-algorithm=default --default-prefix"

Is switching the algorithm actually necessary for solving the problem at hand?
I think it is possible for the default algorithm to produce diffs that lead to
poorly generated changelog entries, so forcing it here seems undesirable.

If just --default-prefix is enough, I think you can just add it to the below
line and commit/push.

> +
> +git $cmd $default_values | $tee | git gcc-mklog -c "$COMMIT_MSG_FILE"

Alexander

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-07 Thread Siddhesh Poyarekar


On 2025-07-07 09:14, Jakub Jelinek wrote:

So that ought to be &a->fam[0] right, and not &a->fam?  It means the same
for a FAM, so why not specify it as &a->fam[0] (or simply a->fam)? That will
be consistent with when fam is a pointer.


The only difference between &a->fam[0] and &a->fam is not the value (that is
the same), just the type in one case say int *, in the other int [0:] *.
At least in GIMPLE pointer conversions are useless, so what exact type of
the argument is doesn't matter that much, but it matters e.g. when you're
dereferencing it.


Yes, that's why I'm thinking that we could use that flexibility match 
the type passed to .ACCESS_WITH_SIZE with that in 
__builtin_dynamic_object_size.  There it's almost always a->fam or 
&a->fam[0] in practice, rarely ever &a->fam.


Even in the rare case of __builtin_dynamic_object_size being passed 
&a->fam, shouldn't a->fam = .ACCESS_WITH_SIZE(a->fam, ...) ought to be 
sufficient in its minimal function of preventing reordering?  There's no 
actual dereference in .ACCESS_WITH_SIZE (it's a nop in practice, just a 
reordering barrier until the __builtin_dynamic_object_size call is 
replaced), so maybe we could do this?



Now, for
int n; int (*p)[n] __attribute__((counted_by(n)));
f->p has the int [n] * type, so guess that is what you want to use.


So to look at it another way the "leap of imagination" I'm suggesting is 
to see int p[] as, essentially int (*p)[], which then ends up matching 
both cases that .ACCESS_WITH_SIZE could potentially support.


Sid

Re: [PATCH] testsuite: Restore dg-do run on pr116906 and pr78185 tests

2025-07-07 Thread Torbjorn SVENSSON





On 2025-07-03 15:02, Christophe Lyon wrote:

ping^2 ?


Looks fine to me, but as you know, my approval does not count.

Kind regards,
Torbjörn



On Wed, 18 Jun 2025 at 12:11, Christophe Lyon
 wrote:


ping?

On Mon, 26 May 2025 at 17:26, Christophe Lyon
 wrote:


On Mon, 26 May 2025 at 17:14, Christophe Lyon
 wrote:


Commit r15-7152-g57b706d141b87c removed
/* { dg-do run { target*-*-linux* *-*-gnu* *-*-uclinux* } } */

from these tests, turning them into 'compile' only tests, even when
they could be executed.

This patch adds
/* { dg-do run } */

which is OK since the tests are correctly skipped if needed thanks to
the following effective-targets (alarm and signal).

With this patch we have again two entries for these tests on linux targets:
* compile (test for excess errors)
* execution test


Gasp I forgot to add a ChangeLog entry, but it would be an obvious:
Add 'dg-do run' :-)



---
  gcc/testsuite/gcc.dg/pr116906-1.c | 1 +
  gcc/testsuite/gcc.dg/pr116906-2.c | 1 +
  gcc/testsuite/gcc.dg/pr78185.c| 1 +
  3 files changed, 3 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/pr116906-1.c 
b/gcc/testsuite/gcc.dg/pr116906-1.c
index 7187507a60d..ee60ad67e93 100644
--- a/gcc/testsuite/gcc.dg/pr116906-1.c
+++ b/gcc/testsuite/gcc.dg/pr116906-1.c
@@ -1,3 +1,4 @@
+/* { dg-do run } */
  /* { dg-require-effective-target alarm } */
  /* { dg-require-effective-target signal } */
  /* { dg-options "-O2" } */
diff --git a/gcc/testsuite/gcc.dg/pr116906-2.c 
b/gcc/testsuite/gcc.dg/pr116906-2.c
index 41a352bf837..4172ec3644a 100644
--- a/gcc/testsuite/gcc.dg/pr116906-2.c
+++ b/gcc/testsuite/gcc.dg/pr116906-2.c
@@ -1,3 +1,4 @@
+/* { dg-do run } */
  /* { dg-require-effective-target alarm } */
  /* { dg-require-effective-target signal } */
  /* { dg-options "-O2 -fno-tree-ch" } */
diff --git a/gcc/testsuite/gcc.dg/pr78185.c b/gcc/testsuite/gcc.dg/pr78185.c
index ada8b1b9f90..4c3af4f2890 100644
--- a/gcc/testsuite/gcc.dg/pr78185.c
+++ b/gcc/testsuite/gcc.dg/pr78185.c
@@ -1,3 +1,4 @@
+/* { dg-do run } */
  /* { dg-require-effective-target alarm } */
  /* { dg-require-effective-target signal } */
  /* { dg-options "-O" } */
--
2.34.1

Re: [PATCH v3] libstdc++: Format chrono %a/%A/%b/%h/%B/%p using locale's time_put [PR117214]

2025-07-07 Thread Jonathan Wakely

On Mon, 7 Jul 2025 at 14:21, Tomasz Kamiński  wrote:
>
> From: XU Kailiang 
>
> C++ formatting locale could have a custom time_put that performs
> differently from the C locale, so do not use __timepunct directly,
> instead all of above specifiers use _M_locale_fmt.
>
> For %a/%A/%b/%h/%B, the code handling the exception is now moved
> to the _M_check_ok function, that is invoked before handling of the
> conversion specifier. For time_points the values of months/weekday
> are computed, and thus are always ok(), this information is indicated
> by new _M_time_point member of the _ChronoSpec.
>
> The different behavior of j specifier for durations and time_points/calendar
> types, is now handled using only _ChronoParts, and _M_time_only in _ChronoSpec
> is no longer needed, thus it was removed.
>
> PR libstdc++/117214
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/chrono_io.h (_ChronoSpec::_M_time_only): Remove.
> (_ChronoSpec::_M_time_point): Define.
> (__formatter_chrono::_M_parse): Use __parts to determine
> interpretation of j.
> (__formatter_chrono::_M_check_ok): Define.
> (__formatter_chrono::_M_format_to): Invoke _M_check_ok.
> (__formatter_chrono::_M_a_A, __formatter_chrono::_M_b_B): Move
> exception throwing to _M_check_ok.
> (__formatter_chrono::_M_j): Use _M_needs to define interpretation.
> (__formatter_duration::_S_spec_for): Set _M_time_point.
> * testsuite/std/time/format/format.cc: Test for exception for !ok()
> months/weekday.
> * testsuite/std/time/format/pr117214_custom_timeput.cc: New
> test.
>
> Co-authored-by: Tomasz Kaminski 
> Reviewed-by: Jonathan Wakely 
> Signed-off-by: XU Kailiang 
> Signed-off-by: Tomasz Kaminski 
> ---
> Posting v3 as I have realized, there is no test checking if we throw
> exception for above spec and !ok() values, so added one.
>
> Changes in v3:
>  - adds test for exception being thrown for !ok() months and weekdays
>  - fixes typos in commit description
>  - changes comment for 'j'.
>
> Tested on x86_64-linux locally, cfarm is not reachable again.

It's still just the VMs hosted in Japan that are unreachable.  Other
x86 hosts like cfarm186 - 188 are reachable.

> Additionally tested `std/time/format/*` with
> -target_board=unix\{,-D_GLIBCXX_USE_CXX11_ABI=0/-D_GLIBCXX_DEBUG,-D_GLIBCXX_DEBUG\}.
> OK for trunk? (mostly additional tests).

OK


>
>
>  libstdc++-v3/include/bits/chrono_io.h | 61 ++-
>  .../testsuite/std/time/format/format.cc   |  7 +++
>  .../time/format/pr117214_custom_timeput.cc| 37 +++
>  3 files changed, 90 insertions(+), 15 deletions(-)
>  create mode 100644 
> libstdc++-v3/testsuite/std/time/format/pr117214_custom_timeput.cc
>
> diff --git a/libstdc++-v3/include/bits/chrono_io.h 
> b/libstdc++-v3/include/bits/chrono_io.h
> index 72cd569ccd6..75ee7e818b2 100644
> --- a/libstdc++-v3/include/bits/chrono_io.h
> +++ b/libstdc++-v3/include/bits/chrono_io.h
> @@ -280,8 +280,8 @@ namespace __format
>// in the format-spec, e.g. "{:L%a}" is localized and locale-specific,
>// but "{:L}" is only localized and "{:%a}" is only locale-specific.
>unsigned _M_locale_specific : 1;
> -  // Indicates that we are handling duration.
> -  unsigned _M_time_only : 1;
> +  // Indicates that we are handling time_point.
> +  unsigned _M_time_point : 1;
>// Indicates that duration should be treated as floating point.
>unsigned _M_floating_point_rep : 1;
>// Indicate that duration uses user-defined representation.
> @@ -693,8 +693,11 @@ namespace __format
>   __allowed_mods = _Mod_O;
>   break;
> case 'j':
> - __needed = __spec._M_time_only ? _HoursMinutesSeconds
> -: _DayOfYear;
> + __needed = __parts & _DayOfYear;
> + // If we do not know day-of-year then we must have a 
> duration,
> + // which is to be formatted as decimal number of days.
> + if (__needed == _None)
> +   __needed = _HoursMinutesSeconds;
>   break;
> case 'm':
>   __needed = _Month;
> @@ -919,7 +922,13 @@ namespace __format
>{
> switch (__conv)
>   {
> + case 'a':
> + case 'A':
> + case 'b':
> + case 'B':
>   case 'c':
> + case 'h':
> + case 'p':
>   case 'r':
>   case 'x':
>   case 'X':
> @@ -947,6 +956,32 @@ namespace __format
>   return __out;
> }
>
> +  void
> +  _M_check_ok(const _ChronoData<_CharT>& __t, _CharT __conv) const
> +  {
> +   // n.b. for time point all date parts are computed, so
> +   // they are always ok.
> +   if (_M_spec._M_time_point || _M_spec._M_debug)
> + return;
> +
> +

Re: [PATCH v2] libstdc++: Format chrono %a/%A/%b/%h/%B/%p using locale's time_put [PR117214]

2025-07-07 Thread Jonathan Wakely

On Mon, 7 Jul 2025 at 10:21, Tomasz Kamiński  wrote:
>
> From: XU Kailiang 
>
> C++ formatting locale could have a custom time_put that performs
> differently from the C locale, so do not use __timepunct directly,
> instead all of above specifiers use _M_locale_fmt.
>
> For %a/%A/%b/%h/%B, the code handling the exception is now moved
> to the _M_check_ok function, that is inovked before handling of the

"invoked"

> conversion specifier. For time_points the values of months/weekday
> are computed, and thus are always ok(), this information is indicated
> by new _M_time_point member of the _ChronoSpec.
>
> The different handling of j specifier for durations and time_points/
> calendar types, is now handled using only _ChronoParts, and _M_time_only
> _ChronoSpec is no longer needed, thus is was removed.

I think this makes the handling for durations a bit easier to
understand, thanks.

>
> PR libstdc++/117214
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/chrono_io.h (_ChronoSpec::_M_time_only): Remove.
> (_ChronoSpec::_M_time_point): Define.
> (__formatter_chrono::_M_parse): Use __parts to determine
> interpretation of j.
> (__formatter_chrono::_M_check_ok): Define.
> (__formatter_chrono::_M_format_to): Invoke _M_check_ok.
> (__formatter_chrono::_M_a_A, __formatter_chrono::_M_b_B): Move
> exception throwing to _M_check_ok.
> (__formatter_chrono::_M_j): Use _M_needs to define interpreation.

"interpretation"

> (__formatter_duration::_S_spec_for): Set _M_time_point.
> * testsuite/std/time/format/pr117214_custom_timeput.cc: New
> test.
>
> Co-authored-by: Tomasz Kaminski 
> Signed-off-by: XU Kailiang 
> Signed-off-by: Tomasz Kaminski 
> ---
> This patchs adjust the implementation as follows:
>  * we use _M_locale_fmt for all specifiers
>  * %h which is alias for %b is also covered
>
> Tested on x86_64-linux localy.
>
>  libstdc++-v3/include/bits/chrono_io.h | 58 ++-
>  .../time/format/pr117214_custom_timeput.cc| 37 
>  2 files changed, 81 insertions(+), 14 deletions(-)
>  create mode 100644 
> libstdc++-v3/testsuite/std/time/format/pr117214_custom_timeput.cc
>
> diff --git a/libstdc++-v3/include/bits/chrono_io.h 
> b/libstdc++-v3/include/bits/chrono_io.h
> index 72cd569ccd6..863b3550e4f 100644
> --- a/libstdc++-v3/include/bits/chrono_io.h
> +++ b/libstdc++-v3/include/bits/chrono_io.h
> @@ -280,8 +280,8 @@ namespace __format
>// in the format-spec, e.g. "{:L%a}" is localized and locale-specific,
>// but "{:L}" is only localized and "{:%a}" is only locale-specific.
>unsigned _M_locale_specific : 1;
> -  // Indicates that we are handling duration.
> -  unsigned _M_time_only : 1;
> +  // Indicates that we are handling time_point.
> +  unsigned _M_time_point : 1;
>// Indicates that duration should be treated as floating point.
>unsigned _M_floating_point_rep : 1;
>// Indicate that duration uses user-defined representation.
> @@ -693,8 +693,10 @@ namespace __format
>   __allowed_mods = _Mod_O;
>   break;
> case 'j':
> - __needed = __spec._M_time_only ? _HoursMinutesSeconds
> -: _DayOfYear;
> + __needed = __parts & _DayOfYear;
> + // 'j' is decimal number of days for durations
> + if (__needed == _None)
> +   __needed = _HoursMinutesSeconds;

Maybe it's because I haven't slept well, but I found the comment here
didn't make the logic clearer for me (why is __needed = _None a
duration? what does HMS have to do with days?).
Would this be better?

// If we do not know day-of-year then we must have a duration,
// which is to be formatted as decimal number of days.


>   break;
> case 'm':
>   __needed = _Month;
> @@ -919,7 +921,13 @@ namespace __format
>{
> switch (__conv)
>   {
> + case 'a':
> + case 'A':
> + case 'b':
> + case 'B':
>   case 'c':
> + case 'h':
> + case 'p':
>   case 'r':
>   case 'x':
>   case 'X':
> @@ -947,6 +955,32 @@ namespace __format
>   return __out;
> }
>
> +  void
> +  _M_check_ok(const _ChronoData<_CharT>& __t, _CharT __conv) const
> +  {
> +   // n.b. for time point all date parts are computed, so
> +   // they are alwas ok.

"always"

> +   if (_M_spec._M_time_point || _M_spec._M_debug)
> + return;
> +
> +   switch (__conv)
> +   {
> +   case 'a':
> +   case 'A':
> + if (!__t._M_weekday.ok()) [[unlikely]]
> +__throw_format_error("format error: invalid weekday");
> + return;
> +   case 'b':
> +   case 'h':
> +   case 'B':
> +

Re: [PATCH v2] libstdc++: Format __float128 as _Float128 only when long double is not 128 IEEE [PR120976]

2025-07-07 Thread Jonathan Wakely

OK thanks

On Mon, 7 Jul 2025 at 10:35, Tomasz Kamiński  wrote:
>
> For powerpc64 and sparc architectures that both have __float128 and 128bit 
> long double,
> the __float128 is same type as long double/__ieee128 and already formattable.
>
> The remaining specialization makes __float128 formattable on x86_64 via 
> _Float128,
> however __float128 is now not formattable on x86_32 (-m32) with 
> -mlong-double-128,
> where __float128 is distinct type from long double that is 128bit IEEE.
>
> PR libstdc++/120976
>
> libstdc++-v3/ChangeLog:
>
> * include/std/format (formatter<__float128, _Char_T): Define if
> _GLIBCXX_FORMAT_F128 == 2.
> ---
> Changes in v2:
>  - Refer to PR120976
>  - Fix typos in commit description.
>
> Reruning test on x86_64-linux as sanity check.
>
>  libstdc++-v3/include/std/format | 11 +++
>  1 file changed, 3 insertions(+), 8 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
> index 46bd5d5ee6a..5749aa1995a 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -2998,11 +2998,9 @@ namespace __format
>  };
>  #endif
>
> -#if defined(__SIZEOF_FLOAT128__) && _GLIBCXX_FORMAT_F128 > 1
> -  // Reuse __formatter_fp::format<__format::__flt128_t, Out> for 
> __float128.
> -  // This formatter is not declared if _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT is 
> true,
> -  // as __float128 when present is same type as __ieee128, which may be same 
> as
> -  // long double.
> +#if defined(__SIZEOF_FLOAT128__) && _GLIBCXX_FORMAT_F128 == 2
> +  // Use __formatter_fp::format<__format::__flt128_t, Out> for __float128,
> +  // when long double is not 128bit IEEE type.
>template<__format::__char _CharT>
>  struct formatter<__float128, _CharT>
>  {
> @@ -3020,9 +3018,6 @@ namespace __format
>
>  private:
>__format::__formatter_fp<_CharT> _M_f;
> -
> -  static_assert( !is_same_v<__float128, long double>,
> -"This specialization should not be used for long double" 
> );
>  };
>  #endif
>
> --
> 2.49.0
>

[PATCH] aarch64: Improve popcountti2 with SVE

2025-07-07 Thread Kyrylo Tkachov

Hi all,

The TImode popcount sequence can be slightly improved with SVE.
If we generate:
ldr q31, [x0]
ptrue p7.b, vl16
cnt z31.d, p7/m, z31.d
addp d31, v31.2d
fmov x0, d31
ret

instead of:
h128:
ldr q31, [x0]
cnt v31.16b, v31.16b
addv b31, v31.16b
fmov w0, s31
ret

we use the ADDP instruction for reduction, which is cheaper on all CPUs AFAIK,
as it is only a single 64-bit addition vs the tree of additions for ADDV.
For example, on a CPU like Grace we get a latency and throughput of 2,4 vs 4,1
for ADDV.
We do generate one more instruction due to the PTRUE being materialised, but 
that
is cheap itself and can be scheduled away from the critical path or even CSE'd
with other PTRUE constants.
As this sequence is larger code size-wise it is avoided for -Os.

Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk?
Thanks,
Kyrill

Signed-off-by: Kyrylo Tkachov 

gcc/

* config/aarch64/aarch64.md (popcountti2): Add TARGET_SVE path.

gcc/testsuite/

* gcc.target/aarch64/popcnt9.c: Add +nosve to target pragma.
* gcc.target/aarch64/popcnt13.c: New test.


0001-aarch64-Improve-popcountti2-with-SVE.patch
Description: 0001-aarch64-Improve-popcountti2-with-SVE.patch

[PATCH v2] libstdc++: Format __float128 as _Float128 only when long double is not 128 IEEE [PR120976]

2025-07-07 Thread Tomasz Kamiński

For powerpc64 and sparc architectures that both have __float128 and 128bit long 
double,
the __float128 is same type as long double/__ieee128 and already formattable.

The remaining specialization makes __float128 formattable on x86_64 via 
_Float128,
however __float128 is now not formattable on x86_32 (-m32) with 
-mlong-double-128,
where __float128 is distinct type from long double that is 128bit IEEE.

PR libstdc++/120976

libstdc++-v3/ChangeLog:

* include/std/format (formatter<__float128, _Char_T): Define if
_GLIBCXX_FORMAT_F128 == 2.
---
Changes in v2:
 - Refer to PR120976 
 - Fix typos in commit description.

Reruning test on x86_64-linux as sanity check.

 libstdc++-v3/include/std/format | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 46bd5d5ee6a..5749aa1995a 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -2998,11 +2998,9 @@ namespace __format
 };
 #endif
 
-#if defined(__SIZEOF_FLOAT128__) && _GLIBCXX_FORMAT_F128 > 1
-  // Reuse __formatter_fp::format<__format::__flt128_t, Out> for __float128.
-  // This formatter is not declared if _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT is 
true,
-  // as __float128 when present is same type as __ieee128, which may be same as
-  // long double.
+#if defined(__SIZEOF_FLOAT128__) && _GLIBCXX_FORMAT_F128 == 2
+  // Use __formatter_fp::format<__format::__flt128_t, Out> for __float128,
+  // when long double is not 128bit IEEE type.
   template<__format::__char _CharT>
 struct formatter<__float128, _CharT>
 {
@@ -3020,9 +3018,6 @@ namespace __format
 
 private:
   __format::__formatter_fp<_CharT> _M_f;
-
-  static_assert( !is_same_v<__float128, long double>,
-"This specialization should not be used for long double" );
 };
 #endif
 
-- 
2.49.0

[PATCH v1] libstdc++: Set FMT for complete C++23 mdspan.

2025-07-07 Thread Luc Grosheintz

libstdc++-v3/ChangeLog:

* include/bits/version.def (mdspan): Set to 202207 and remove
no_stdname.
* include/bits/version.h: Regenerate.
* testsuite/23_containers/mdspan/mdspan_ftm.cc: Test presence
of FTM.

Signed-off-by: Luc Grosheintz 
---
 libstdc++-v3/include/bits/version.def | 3 +--
 libstdc++-v3/include/bits/version.h   | 3 ++-
 libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc | 6 ++
 3 files changed, 9 insertions(+), 3 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc

diff --git a/libstdc++-v3/include/bits/version.def 
b/libstdc++-v3/include/bits/version.def
index f4ba501c403..ad909afd20e 100644
--- a/libstdc++-v3/include/bits/version.def
+++ b/libstdc++-v3/include/bits/version.def
@@ -1007,9 +1007,8 @@ ftms = {
 
 ftms = {
   name = mdspan;
-  no_stdname = true; // FIXME: remove
   values = {
-v = 1; // FIXME: 202207
+v = 202207;
 cxxmin = 23;
   };
 };
diff --git a/libstdc++-v3/include/bits/version.h 
b/libstdc++-v3/include/bits/version.h
index dc8ac07be16..72f9231846e 100644
--- a/libstdc++-v3/include/bits/version.h
+++ b/libstdc++-v3/include/bits/version.h
@@ -1126,8 +1126,9 @@
 
 #if !defined(__cpp_lib_mdspan)
 # if (__cplusplus >= 202100L)
-#  define __glibcxx_mdspan 1L
+#  define __glibcxx_mdspan 202207L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_mdspan)
+#   define __cpp_lib_mdspan 202207L
 #  endif
 # endif
 #endif /* !defined(__cpp_lib_mdspan) && defined(__glibcxx_want_mdspan) */
diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc 
b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
new file mode 100644
index 000..fc528293e66
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
@@ -0,0 +1,6 @@
+// { dg-do compile { target c++23 } }
+#include 
+
+#ifndef __cpp_lib_mdspan
+#error "Missing FTM"
+#endif
-- 
2.49.0

Re: [PATCH v1] libstdc++: Set FMT for complete C++23 mdspan.

2025-07-07 Thread Tomasz Kaminski

On Mon, Jul 7, 2025 at 11:41 AM Luc Grosheintz 
wrote:

> libstdc++-v3/ChangeLog:
>
> * include/bits/version.def (mdspan): Set to 202207 and remove
> no_stdname.
> * include/bits/version.h: Regenerate.
> * testsuite/23_containers/mdspan/mdspan_ftm.cc: Test presence
> of FTM.
>
> Signed-off-by: Luc Grosheintz 
> ---
>
Some minor suggestions below. Could you please also mention PR107761 in the
commit.
We have automation that will put that in bugzilla, so people tracking
issues can see it.
I would do that in general for any new commits implementing language
feature.
Let me know if you will not be able to find a corresponding bugzilla ticket.

>  libstdc++-v3/include/bits/version.def | 3 +--
>  libstdc++-v3/include/bits/version.h   | 3 ++-
>  libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc | 6 ++
>  3 files changed, 9 insertions(+), 3 deletions(-)
>  create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
>
> diff --git a/libstdc++-v3/include/bits/version.def
> b/libstdc++-v3/include/bits/version.def
> index f4ba501c403..ad909afd20e 100644
> --- a/libstdc++-v3/include/bits/version.def
> +++ b/libstdc++-v3/include/bits/version.def
> @@ -1007,9 +1007,8 @@ ftms = {
>
>  ftms = {
>name = mdspan;
> -  no_stdname = true; // FIXME: remove
>values = {
> -v = 1; // FIXME: 202207
> +v = 202207;
>  cxxmin = 23;
>};
>  };
> diff --git a/libstdc++-v3/include/bits/version.h
> b/libstdc++-v3/include/bits/version.h
> index dc8ac07be16..72f9231846e 100644
> --- a/libstdc++-v3/include/bits/version.h
> +++ b/libstdc++-v3/include/bits/version.h
> @@ -1126,8 +1126,9 @@
>
>  #if !defined(__cpp_lib_mdspan)
>  # if (__cplusplus >= 202100L)
> -#  define __glibcxx_mdspan 1L
> +#  define __glibcxx_mdspan 202207L
>  #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_mdspan)
> +#   define __cpp_lib_mdspan 202207L
>  #  endif
>  # endif
>  #endif /* !defined(__cpp_lib_mdspan) && defined(__glibcxx_want_mdspan) */
> diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
> new file mode 100644
> index 000..fc528293e66
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
>
I would name the file just ftm.

> @@ -0,0 +1,6 @@
> +// { dg-do compile { target c++23 } }
> +#include 
> +
> +#ifndef __cpp_lib_mdspan
> +#error "Missing FTM"
> +#endif
>
We usually use following to test the feature test macro:
#ifndef __cpp_lib_ranges_to_container
# error "Feature test macro for ranges_to_container is missing in "
#elif __cpp_lib_ranges_to_container < 202202L
# error "Feature test macro for ranges_to_container has wrong value in
"
#endif


> --
> 2.49.0
>
>

Re: [PATCH v3 0/5] Implement mdspan.

2025-07-07 Thread Jonathan Wakely

On Fri, 4 Jul 2025 at 09:32, Luc Grosheintz  wrote:
>
> This patch series replaces:
> https://gcc.gnu.org/pipermail/libstdc++/2025-June/062207.html
>
> Addresses the review comments and improves commit messages. Most notably
> the unnecessary patch to "strength" exception guarantees has been
> removed (and tests moved into the mdspan commit).
>
> The outstanding tasks for C++23 mdspan are:
>
>   - Update cxxapi-data.csv,
>   - Update C++23 status page.
>
> IIUC, the cxxapi-data.csv can be updated separately in bulk for all of
> C++23 mdspan and I can take care of it in a follow up patch. I'm not
> entirely sure when and by whom the status page is updated.

Usually by me when I realise I've forgotten to do it and we're about
to make a new release ;-)

[PATCH v1 2/3] RISC-V: Add test for vec_duplicate + vssub.vv combine case 0 with GR2VR cost 0, 2 and 15

2025-07-07 Thread pan2 . li

From: Pan Li 

Add asm dump check and run test for vec_duplicate + vssub.vv
combine to vssub.vx, with the GR2VR cost is 0, 2 and 15.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c: Add asm check.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h: Add test
helper macros.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h: Add test
data for run test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vssub-run-1-i16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vssub-run-1-i32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vssub-run-1-i64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vssub-run-1-i8.c: New test.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/vx_vf/vx-1-i16.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-1-i32.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-1-i64.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-1-i8.c |   1 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i16.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i32.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i64.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i8.c |   1 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i16.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i32.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i64.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i8.c |   1 +
 .../riscv/rvv/autovec/vx_vf/vx_binary.h   |  21 ++
 .../riscv/rvv/autovec/vx_vf/vx_binary_data.h  | 196 ++
 .../rvv/autovec/vx_vf/vx_vssub-run-1-i16.c|  17 ++
 .../rvv/autovec/vx_vf/vx_vssub-run-1-i32.c|  17 ++
 .../rvv/autovec/vx_vf/vx_vssub-run-1-i64.c|  17 ++
 .../rvv/autovec/vx_vf/vx_vssub-run-1-i8.c |  17 ++
 18 files changed, 297 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vssub-run-1-i16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vssub-run-1-i32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vssub-run-1-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vssub-run-1-i8.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c
index 25652ec0e7b..83515eebe3c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c
@@ -19,3 +19,4 @@ TEST_BINARY_VX_SIGNED_0(T)
 /* { dg-final { scan-assembler-times {vmax.vx} 2 } } */
 /* { dg-final { scan-assembler-times {vmin.vx} 2 } } */
 /* { dg-final { scan-assembler-times {vsadd.vx} 1 } } */
+/* { dg-final { scan-assembler-times {vssub.vx} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c
index cbf4e289824..1488fe1fa17 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c
@@ -19,3 +19,4 @@ TEST_BINARY_VX_SIGNED_0(T)
 /* { dg-final { scan-assembler-times {vmax.vx} 2 } } */
 /* { dg-final { scan-assembler-times {vmin.vx} 2 } } */
 /* { dg-final { scan-assembler-times {vsadd.vx} 1 } } */
+/* { dg-final { scan-assembler-times {vssub.vx} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c
index e5519e6699f..342ea18d59f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c
@@ -19,3 +19,4 @@ TEST_BINARY_VX_SIGNED_0(T)
 /* { dg-final { scan-assembler-times {vmax.vx} 2 } } */
 /* { dg-final { scan-assembler-times {vmin.vx} 2 } } */
 /* { dg-final { scan-assembler-times {vsadd.vx} 1 } } */
+/* { dg-final { scan-assembler-times {vssub.vx} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c
index beaf1741efb..583f917bc7e 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c
@@ -19,3 +19,4 @@ TEST_BINARY_VX_SIGNED_0(T)
 /* { dg-final { scan-assembler-times {

Re: [PATCH v3 1/5] libstdc++: Check prerequisites of layout_*::operator().

2025-07-07 Thread Jonathan Wakely

On Fri, 4 Jul 2025 at 09:32, Luc Grosheintz  wrote:
>
> Previously, the prerequisite that the arguments passed to operator() are
> a multi-dimensional index (of extents()) was not checked.
>
> Both mapping::operator() and mdspan::operator[] have the same
> prerequisite. Since, mdspan must check the prerequisite for user-defined
> layout mappings, the preference is to check in mdspan.
>
> Because out-of-bounds accesses are very common it's nevertheless useful
> to check the prerequisite in mapping::operator(). This is relevant for
> cases where the layout mappings are used without mdspan. This commit
> check the prerequisites via _GLIBCXX_DEBUG_ASSERTs and adds the required
> tests.
>
> More discussion in the email chain starting at:
>
>   https://gcc.gnu.org/pipermail/libstdc++/2025-July/062265.html
>
> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan: Check prerequisites of
> layout_*::operator() with _GLIBCXX_DEBUG_ASSERTs.
> * testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc:
> Add tests for prerequisites.


OK for trunk.


>
> Signed-off-by: Luc Grosheintz 
> ---
>  libstdc++-v3/include/std/mdspan   |  5 
>  .../mdspan/layouts/debug/out_of_bounds_neg.cc | 30 +++
>  2 files changed, 35 insertions(+)
>  create mode 100644 
> libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
>
> diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
> index c72a64094b7..cf20553aaa5 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -441,6 +441,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> _IndexType __mult = 1;
> auto __update = [&, __pos = 0u](_IndexType __idx) mutable
>   {
> +   _GLIBCXX_DEBUG_ASSERT(cmp_less(__idx, __exts.extent(__pos)));
> __res += __idx * __mult;
> __mult *= __exts.extent(__pos);
> ++__pos;
> @@ -651,6 +652,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> auto __update = [&, __pos = __exts.rank()](_IndexType) mutable
>   {
> --__pos;
> +   _GLIBCXX_DEBUG_ASSERT(cmp_less(__ind_arr[__pos],
> +  __exts.extent(__pos)));
> __res += __ind_arr[__pos] * __mult;
> __mult *= __exts.extent(__pos);
>   };
> @@ -822,6 +825,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   {
> auto __update = [&, __pos = 0u](_IndexType __idx) mutable
>   {
> +   _GLIBCXX_DEBUG_ASSERT(cmp_less(__idx,
> +  __m.extents().extent(__pos)));
> __res += __idx * __m.stride(__pos++);
>   };
> (__update(__indices), ...);
> diff --git 
> a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
>  
> b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
> new file mode 100644
> index 000..fb8ff01e8aa
> --- /dev/null
> +++ 
> b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
> @@ -0,0 +1,30 @@
> +// { dg-do compile { target c++23 } }
> +// { dg-require-debug-mode "" }
> +#include
> +
> +template
> +  constexpr bool
> +  test_out_of_bounds_1d()
> +  {
> +auto m = typename Layout::mapping>{};
> +(void) m(0); // { dg-error "expansion of" }
> +return true;
> +  }
> +static_assert(test_out_of_bounds_1d()); // { dg-error 
> "expansion of" }
> +static_assert(test_out_of_bounds_1d()); // { dg-error 
> "expansion of" }
> +static_assert(test_out_of_bounds_1d()); // { dg-error 
> "expansion of" }
> +
> +template
> +  constexpr bool
> +  test_out_of_bounds_3d()
> +  {
> +auto m = typename Layout::mapping>{};
> +(void) m(2, 5, 5); // { dg-error "expansion of" }
> +return true;
> +  }
> +static_assert(test_out_of_bounds_3d()); // { dg-error 
> "expansion of" }
> +static_assert(test_out_of_bounds_3d()); // { dg-error 
> "expansion of" }
> +static_assert(test_out_of_bounds_3d()); // { dg-error 
> "expansion of" }
> +
> +// { dg-prune-output "non-constant condition for static assertion" }
> +// { dg-prune-output "__glibcxx_assert" }
> --
> 2.49.0
>

Re: [PATCH v3 2/5] libstdc++: Check prerequisite of extents::extents.

2025-07-07 Thread Jonathan Wakely

On Mon, 7 Jul 2025 at 10:13, Tomasz Kaminski  wrote:
>
>
>
> On Mon, Jul 7, 2025 at 11:09 AM Jonathan Wakely  wrote:
>>
>> On Fri, 4 Jul 2025 at 09:30, Luc Grosheintz  wrote:
>> >
>> > Previously the prerequisite of the extents ctors that
>> >
>> > static_extent(i) == dynamic_extent || extent(i) == other.extent(i).
>> >
>> > was not checked. This commit adds the __glibcxx_assert and test them.
>> >
>> > libstdc++-v3/ChangeLog:
>> >
>> > * include/std/mdspan (extents): Check prerequisite of the ctor that
>> > static_extent(i) == dynamic_extent || extent(i) == other.extent(i).
>> > * testsuite/23_containers/mdspan/extents/class_mandates_neg.cc:
>> > Test the implemented prerequisite.
>> >
>> > Signed-off-by: Luc Grosheintz 
>> > ---
>> >  libstdc++-v3/include/std/mdspan   | 13 +++
>> >  .../mdspan/extents/class_mandates_neg.cc  |  2 ++
>> >  .../mdspan/extents/extents_mismatch_neg.cc| 35 +++
>> >  3 files changed, 50 insertions(+)
>> >  create mode 100644 
>> > libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
>> >
>> > diff --git a/libstdc++-v3/include/std/mdspan 
>> > b/libstdc++-v3/include/std/mdspan
>> > index cf20553aaa5..1d6cdc93d80 100644
>> > --- a/libstdc++-v3/include/std/mdspan
>> > +++ b/libstdc++-v3/include/std/mdspan
>> > @@ -110,10 +110,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> > return __se;
>> > }
>> >
>> > +   template
>> > + constexpr bool
>> > + _S_is_compatible_extents(_GetOtherExtent __get_extent) noexcept
>>
>> Was this intended to be a static member function?
>
> Yes, it should be. I will also adjust that locally.

Thanks, OK for trunk with that change.


>>
>>
>> > + {
>> > +   if constexpr (_OtherRank == _S_rank)
>> > + for (size_t __i = 0; __i < _S_rank; ++__i)
>> > +   if (_Extents[__i] != dynamic_extent
>> > +   && !cmp_equal(_Extents[__i], 
>> > _S_int_cast(__get_extent(__i
>> > + return false;
>> > +   return true;
>> > + }
>> > +
>> > template
>> >   constexpr void
>> >   _M_init_dynamic_extents(_GetOtherExtent __get_extent) noexcept
>> >   {
>> > +   
>> > __glibcxx_assert(_S_is_compatible_extents<_OtherRank>(__get_extent));
>> > for (size_t __i = 0; __i < _S_rank_dynamic; ++__i)
>> >   {
>> > size_t __di = __i;
>> > diff --git 
>> > a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
>> >  
>> > b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
>> > index f9c1c019666..67d18feda96 100644
>> > --- 
>> > a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
>> > +++ 
>> > b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
>> > @@ -7,6 +7,8 @@ std::extents e1; // { dg-error 
>> > "from here" }
>> >  std::extents e2; // { dg-error "from here" }
>> >  std::extents e3; // { dg-error "from here" }
>> >  std::extents e4;   // { dg-error "from here" }
>> > +
>> >  // { dg-prune-output "dynamic or representable as IndexType" }
>> >  // { dg-prune-output "signed or unsigned integer" }
>> >  // { dg-prune-output "invalid use of incomplete type" }
>> > +// { dg-prune-output "non-constant condition for static assertion" }
>> > diff --git 
>> > a/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
>> >  
>> > b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
>> > new file mode 100644
>> > index 000..b35e5310d41
>> > --- /dev/null
>> > +++ 
>> > b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
>> > @@ -0,0 +1,35 @@
>> > +// { dg-do compile { target c++23 } }
>> > +#include
>> > +
>> > +#include 
>> > +
>> > +constexpr size_t dyn = std::dynamic_extent;
>> > +
>> > +constexpr bool
>> > +test_dyn2sta_extents_mismatch_00()
>> > +{
>> > +  auto e0 = std::extents{1};
>> > +  [[maybe_unused]] auto e1 = std::extents{e0};// { 
>> > dg-error "expansion of" }
>> > +  return true;
>> > +}
>> > +static_assert(test_dyn2sta_extents_mismatch_00());// { 
>> > dg-error "expansion of" }
>> > +
>> > +constexpr bool
>> > +test_dyn2sta_extents_mismatch_01()
>> > +{
>> > +  [[maybe_unused]] auto e = std::extents{2, 2}; // { 
>> > dg-error "expansion of" }
>> > +  return true;
>> > +}
>> > +static_assert(test_dyn2sta_extents_mismatch_01());   // { 
>> > dg-error "expansion of" }
>> > +
>> > +constexpr bool
>> > +test_dyn2sta_extents_mismatch_02()
>> > +{
>> > +  std::array exts{2, 2};
>> > +  [[maybe_unused]] auto e = std::extents{exts}; // { 
>> > dg-error "expansion of" }
>> > +  return true;
>> > +}
>> > +static_assert(test_dyn2sta_extents_mismatch_02());   // { 
>> > dg-error "expansion of" }
>> > +
>> > +// { dg-prune-output "non-constant con

Re: [PATCH v2] libstdc++: Format chrono %a/%A/%b/%h/%B/%p using locale's time_put [PR117214]

2025-07-07 Thread Jonathan Wakely

On Mon, 7 Jul 2025 at 10:49, Tomasz Kaminski  wrote:
>
>
>
> On Mon, Jul 7, 2025 at 11:44 AM Jonathan Wakely  wrote:
>>
>> On Mon, 7 Jul 2025 at 10:21, Tomasz Kamiński  wrote:
>> >
>> > From: XU Kailiang 
>> >
>> > C++ formatting locale could have a custom time_put that performs
>> > differently from the C locale, so do not use __timepunct directly,
>> > instead all of above specifiers use _M_locale_fmt.
>> >
>> > For %a/%A/%b/%h/%B, the code handling the exception is now moved
>> > to the _M_check_ok function, that is inovked before handling of the
>>
>> "invoked"
>>
>> > conversion specifier. For time_points the values of months/weekday
>> > are computed, and thus are always ok(), this information is indicated
>> > by new _M_time_point member of the _ChronoSpec.
>> >
>> > The different handling of j specifier for durations and time_points/
>> > calendar types, is now handled using only _ChronoParts, and _M_time_only
>> > _ChronoSpec is no longer needed, thus is was removed.
>>
>> I think this makes the handling for durations a bit easier to
>> understand, thanks.
>>
>> >
>> > PR libstdc++/117214
>> >
>> > libstdc++-v3/ChangeLog:
>> >
>> > * include/bits/chrono_io.h (_ChronoSpec::_M_time_only): Remove.
>> > (_ChronoSpec::_M_time_point): Define.
>> > (__formatter_chrono::_M_parse): Use __parts to determine
>> > interpretation of j.
>> > (__formatter_chrono::_M_check_ok): Define.
>> > (__formatter_chrono::_M_format_to): Invoke _M_check_ok.
>> > (__formatter_chrono::_M_a_A, __formatter_chrono::_M_b_B): Move
>> > exception throwing to _M_check_ok.
>> > (__formatter_chrono::_M_j): Use _M_needs to define interpreation.
>>
>> "interpretation"
>>
>> > (__formatter_duration::_S_spec_for): Set _M_time_point.
>> > * testsuite/std/time/format/pr117214_custom_timeput.cc: New
>> > test.
>> >
>> > Co-authored-by: Tomasz Kaminski 
>> > Signed-off-by: XU Kailiang 
>> > Signed-off-by: Tomasz Kaminski 
>> > ---
>> > This patchs adjust the implementation as follows:
>> >  * we use _M_locale_fmt for all specifiers
>> >  * %h which is alias for %b is also covered
>> >
>> > Tested on x86_64-linux localy.
>> >
>> >  libstdc++-v3/include/bits/chrono_io.h | 58 ++-
>> >  .../time/format/pr117214_custom_timeput.cc| 37 
>> >  2 files changed, 81 insertions(+), 14 deletions(-)
>> >  create mode 100644 
>> > libstdc++-v3/testsuite/std/time/format/pr117214_custom_timeput.cc
>> >
>> > diff --git a/libstdc++-v3/include/bits/chrono_io.h 
>> > b/libstdc++-v3/include/bits/chrono_io.h
>> > index 72cd569ccd6..863b3550e4f 100644
>> > --- a/libstdc++-v3/include/bits/chrono_io.h
>> > +++ b/libstdc++-v3/include/bits/chrono_io.h
>> > @@ -280,8 +280,8 @@ namespace __format
>> >// in the format-spec, e.g. "{:L%a}" is localized and 
>> > locale-specific,
>> >// but "{:L}" is only localized and "{:%a}" is only locale-specific.
>> >unsigned _M_locale_specific : 1;
>> > -  // Indicates that we are handling duration.
>> > -  unsigned _M_time_only : 1;
>> > +  // Indicates that we are handling time_point.
>> > +  unsigned _M_time_point : 1;
>> >// Indicates that duration should be treated as floating point.
>> >unsigned _M_floating_point_rep : 1;
>> >// Indicate that duration uses user-defined representation.
>> > @@ -693,8 +693,10 @@ namespace __format
>> >   __allowed_mods = _Mod_O;
>> >   break;
>> > case 'j':
>> > - __needed = __spec._M_time_only ? _HoursMinutesSeconds
>> > -: _DayOfYear;
>> > + __needed = __parts & _DayOfYear;
>> > + // 'j' is decimal number of days for durations
>> > + if (__needed == _None)
>> > +   __needed = _HoursMinutesSeconds;
>>
>> Maybe it's because I haven't slept well, but I found the comment here
>> didn't make the logic clearer for me (why is __needed = _None a
>> duration? what does HMS have to do with days?).
>> Would this be better?
>>
>> // If we do not know day-of-year then we must have a 
>> duration,
>> // which is to be formatted as decimal number of days.
>
> Yes, that makes sense. I will put that wording there.
> Given that the rest of comments are only typos, are changes otherwise OK for 
> trunk?
> Or are you still reviewing it?

No, I reviewed the rest and it's OK for trunk - thanks (and thanks to
Kailiang for the original patch).


>>
>>
>>
>> >   break;
>> > case 'm':
>> >   __needed = _Month;
>> > @@ -919,7 +921,13 @@ namespace __format
>> >{
>> > switch (__conv)
>> >   {
>> > + case 'a':
>> > + case 'A':
>> > + case 'b':
>> > + case 'B':
>> >   case 'c':
>> > + case 'h':
>> > +

Re: [PATCH v3 4/5] libstdc++: Implement mdspan::size.

2025-07-07 Thread Jonathan Wakely

On Fri, 4 Jul 2025 at 09:34, Luc Grosheintz  wrote:
>
> The current code uses __mdspan::__fwd_prod(__exts, __rank) to express
> computing the size of an extent. This commit adds an function __mdspan::
> __size(__exts) to express the idea more directly.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan (__mdspan::__size): New function.


OK thanks.

> Signed-off-by: Luc Grosheintz 
> ---
>  libstdc++-v3/include/std/mdspan | 12 
>  1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
> index 1d6cdc93d80..7e970c2b905 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -398,6 +398,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>__rev_prod(const _Extents& __exts, size_t __r) noexcept
>{ return __exts_prod(__exts, __r + 1, __exts.rank()); }
>
> +template
> +  constexpr typename _Extents::index_type
> +  __size(const _Extents& __exts) noexcept
> +  { return __fwd_prod(__exts, __exts.rank()); }
> +
>  template
>auto __build_dextents_type(integer_sequence)
> -> extents<_IndexType, ((void) _Counts, dynamic_extent)...>;
> @@ -591,7 +596,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>constexpr index_type
>required_span_size() const noexcept
> -  { return __mdspan::__fwd_prod(_M_extents, extents_type::rank()); }
> +  { return __mdspan::__size(_M_extents); }
>
>template<__mdspan::__valid_index_type... _Indices>
> requires (sizeof...(_Indices) == extents_type::rank())
> @@ -730,7 +735,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>constexpr index_type
>required_span_size() const noexcept
> -  { return __mdspan::__fwd_prod(_M_extents, extents_type::rank()); }
> +  { return __mdspan::__size(_M_extents); }
>
>template<__mdspan::__valid_index_type... _Indices>
> requires (sizeof...(_Indices) == extents_type::rank())
> @@ -986,8 +991,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>{
> if constexpr (!is_always_exhaustive())
>   {
> -   constexpr auto __rank = extents_type::rank();
> -   auto __size = __mdspan::__fwd_prod(_M_extents, __rank);
> +   auto __size = __mdspan::__size(_M_extents);
> if(__size > 0)
>   return __size == required_span_size();
>   }
> --
> 2.49.0
>

Re: [PATCH v3 5/5] libstdc++: Implement mdspan and tests [PR107761].

2025-07-07 Thread Jonathan Wakely

On Fri, 4 Jul 2025 at 09:37, Luc Grosheintz  wrote:
>
> Implements the class mdspan as described in N4950, i.e. without P3029.
> It also adds tests for mdspan. This commit completes the implementation
> of P0009, i.e. the C++23 part .
>
> PR libstdc++/107761
>
> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan (mdspan): New class.
> * src/c++23/std.cc.in (mdspan): Add.
> * testsuite/23_containers/mdspan/class_mandate_neg.cc: New test.
> * testsuite/23_containers/mdspan/mdspan.cc: New test.
> * testsuite/23_containers/mdspan/layout_like.h: Add class
> LayoutLike which models a user-defined layout.
>
> Signed-off-by: Luc Grosheintz 
> ---
>  libstdc++-v3/include/std/mdspan   | 284 +
>  libstdc++-v3/src/c++23/std.cc.in  |   3 +-
>  .../23_containers/mdspan/class_mandate_neg.cc |  41 ++
>  .../23_containers/mdspan/layout_like.h|  80 +++
>  .../testsuite/23_containers/mdspan/mdspan.cc  | 603 ++
>  .../23_containers/mdspan/out_of_bounds_neg.cc |  24 +
>  6 files changed, 1034 insertions(+), 1 deletion(-)
>  create mode 100644 
> libstdc++-v3/testsuite/23_containers/mdspan/class_mandate_neg.cc
>  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/layout_like.h
>  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc
>  create mode 100644 
> libstdc++-v3/testsuite/23_containers/mdspan/out_of_bounds_neg.cc
>
> diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
> index 7e970c2b905..f64804e2a42 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -1057,6 +1057,290 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>{ return __p + __i; }
>  };
>
> +  namespace __mdspan
> +  {
> +template
> +  constexpr bool
> +  __is_multi_index(const _Extents& __exts, span<_IndexType, _Nm> 
> __indices)
> +  {
> +   static_assert(__exts.rank() == _Nm);
> +   for (size_t __i = 0; __i < __exts.rank(); ++__i)
> + if (__indices[__i] >= __exts.extent(__i))
> +   return false;
> +   return true;
> +  }
> +  }
> +
> +  template +  typename _LayoutPolicy = layout_right,
> +  typename _AccessorPolicy = default_accessor<_ElementType>>
> +class mdspan
> +{
> +  static_assert(!is_array_v<_ElementType>,
> +   "ElementType must not be an array type");
> +  static_assert(!is_abstract_v<_ElementType>,
> +   "ElementType must not be an abstract class type");
> +  static_assert(__mdspan::__is_extents<_Extents>,
> +   "Extents must be a specialization of std::extents");
> +  static_assert(is_same_v<_ElementType,
> + typename _AccessorPolicy::element_type>);
> +
> +public:
> +  using extents_type = _Extents;
> +  using layout_type = _LayoutPolicy;
> +  using accessor_type = _AccessorPolicy;
> +  using mapping_type = typename layout_type::template 
> mapping;
> +  using element_type = _ElementType;
> +  using value_type = remove_cv_t;
> +  using index_type = typename extents_type::index_type;
> +  using size_type = typename extents_type::size_type;
> +  using rank_type = typename extents_type::rank_type;
> +  using data_handle_type = typename accessor_type::data_handle_type;
> +  using reference = typename accessor_type::reference;
> +
> +  static constexpr rank_type
> +  rank() noexcept { return extents_type::rank(); }
> +
> +  static constexpr rank_type
> +  rank_dynamic() noexcept { return extents_type::rank_dynamic(); }
> +
> +  static constexpr size_t
> +  static_extent(rank_type __r) noexcept
> +  { return extents_type::static_extent(__r); }
> +
> +  constexpr index_type
> +  extent(rank_type __r) const noexcept { return extents().extent(__r); }
> +
> +  constexpr
> +  mdspan()
> +  requires (rank_dynamic() > 0)
> +  && is_default_constructible_v
> + && is_default_constructible_v
> + && is_default_constructible_v
> +  : _M_accessor(), _M_mapping(), _M_handle()
> +  { }
> +
> +  constexpr
> +  mdspan(const mdspan& __other) = default;
> +
> +  constexpr
> +  mdspan(mdspan&& __other) = default;
> +
> +  template<__mdspan::__valid_index_type... _OIndexTypes>
> +   requires (sizeof...(_OIndexTypes) == rank()
> +  || sizeof...(_OIndexTypes) == rank_dynamic())
> +&& is_constructible_v
> +&& is_default_constructible_v
> +   constexpr explicit
> +   mdspan(data_handle_type __handle, _OIndexTypes... __exts)
> +   : _M_accessor(),
> + _M_mapping(_Extents(static_cast(std::move(__exts))...)),
> + _M_handle(std::move(__handle))
> +   { }
> +
> +  template<__mdspan::__valid_index_type _OIndexType,
> +  size_t _Nm>
> +   requires (_Nm == rank() || _Nm == rank_dynamic())
> +

Re: [PATCH v3 3/5] libstdc++: Restructure mdspan tests to reuse IntLike.

2025-07-07 Thread Jonathan Wakely

On Mon, 7 Jul 2025 at 09:59, Tomasz Kaminski  wrote:
>
>
> On Fri, Jul 4, 2025 at 10:37 AM Luc Grosheintz  
> wrote:
>>
>> The class IntLike is used for testing extents with user-defined classes
>> that convert to int. This commit places the class into a separate header
>> file. This allows it to be reused across different parts of the mdspan
>> related testsuite.
>>
>> libstdc++-v3/ChangeLog:
>>
>> * testsuite/23_containers/mdspan/extents/custom_integer.cc:
>> Delete IntLike and include "int_like.h".
>> * testsuite/23_containers/mdspan/extents/int_like.h: Add
>> IntLike.
>>
>> Signed-off-by: Luc Grosheintz 
>> ---
>
>  LGTM, outside of using header guards. I can change that locally.

Yes please. OK for trunk with the guards.


>>
>>  .../mdspan/extents/custom_integer.cc  | 27 +-
>>  .../23_containers/mdspan/extents/int_like.h   | 28 +++
>>  2 files changed, 29 insertions(+), 26 deletions(-)
>>  create mode 100644 
>> libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h
>>
>> diff --git 
>> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc 
>> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc
>> index 2907ad12ae7..404755bd5ac 100644
>> --- a/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc
>> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/custom_integer.cc
>> @@ -2,38 +2,13 @@
>>  #include 
>>
>>  #include 
>> +#include "int_like.h"
>>
>>  // Test construction from a custom integer-like object, that has
>>  // no copy/move ctor or copy/move assignment operator.
>>
>>  constexpr size_t dyn = std::dynamic_extent;
>>
>> -class IntLike
>> -{
>> -public:
>> -  explicit
>> -  IntLike(int i)
>> -  : _M_i(i)
>> -  { }
>> -
>> -  IntLike() = delete;
>> -  IntLike(const IntLike&) = delete;
>> -  IntLike(IntLike&&) = delete;
>> -
>> -  const IntLike&
>> -  operator=(const IntLike&) = delete;
>> -
>> -  const IntLike&
>> -  operator=(IntLike&&) = delete;
>> -
>> -  constexpr
>> -  operator int() const noexcept
>> -  { return _M_i; }
>> -
>> -private:
>> -  int _M_i;
>> -};
>> -
>>  static_assert(std::is_convertible_v);
>>  static_assert(std::is_nothrow_constructible_v);
>>
>> diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h 
>> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h
>> new file mode 100644
>> index 000..7785d2ffe95
>> --- /dev/null
>> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h
>> @@ -0,0 +1,28 @@
>> +#pragma once
>
> Please use header guards instead of pragma once.
>>
>> +
>> +class IntLike
>> +{
>> +public:
>> +  explicit
>> +  IntLike(int i)
>> +  : _M_i(i)
>> +  { }
>> +
>> +  IntLike() = delete;
>> +  IntLike(const IntLike&) = delete;
>> +  IntLike(IntLike&&) = delete;
>> +
>> +  const IntLike&
>> +  operator=(const IntLike&) = delete;
>> +
>> +  const IntLike&
>> +  operator=(IntLike&&) = delete;
>> +
>> +  constexpr
>> +  operator int() const noexcept
>> +  { return _M_i; }
>> +
>> +private:
>> +  int _M_i;
>> +};
>> +
>> --
>> 2.49.0
>>

Re: [PATCH v2] libstdc++: Format chrono %a/%A/%b/%h/%B/%p using locale's time_put [PR117214]

2025-07-07 Thread Tomasz Kaminski

On Mon, Jul 7, 2025 at 11:44 AM Jonathan Wakely  wrote:

> On Mon, 7 Jul 2025 at 10:21, Tomasz Kamiński  wrote:
> >
> > From: XU Kailiang 
> >
> > C++ formatting locale could have a custom time_put that performs
> > differently from the C locale, so do not use __timepunct directly,
> > instead all of above specifiers use _M_locale_fmt.
> >
> > For %a/%A/%b/%h/%B, the code handling the exception is now moved
> > to the _M_check_ok function, that is inovked before handling of the
>
> "invoked"
>
> > conversion specifier. For time_points the values of months/weekday
> > are computed, and thus are always ok(), this information is indicated
> > by new _M_time_point member of the _ChronoSpec.
> >
> > The different handling of j specifier for durations and time_points/
> > calendar types, is now handled using only _ChronoParts, and _M_time_only
> > _ChronoSpec is no longer needed, thus is was removed.
>
> I think this makes the handling for durations a bit easier to
> understand, thanks.
>
> >
> > PR libstdc++/117214
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/bits/chrono_io.h (_ChronoSpec::_M_time_only): Remove.
> > (_ChronoSpec::_M_time_point): Define.
> > (__formatter_chrono::_M_parse): Use __parts to determine
> > interpretation of j.
> > (__formatter_chrono::_M_check_ok): Define.
> > (__formatter_chrono::_M_format_to): Invoke _M_check_ok.
> > (__formatter_chrono::_M_a_A, __formatter_chrono::_M_b_B): Move
> > exception throwing to _M_check_ok.
> > (__formatter_chrono::_M_j): Use _M_needs to define interpreation.
>
> "interpretation"
>
> > (__formatter_duration::_S_spec_for): Set _M_time_point.
> > * testsuite/std/time/format/pr117214_custom_timeput.cc: New
> > test.
> >
> > Co-authored-by: Tomasz Kaminski 
> > Signed-off-by: XU Kailiang 
> > Signed-off-by: Tomasz Kaminski 
> > ---
> > This patchs adjust the implementation as follows:
> >  * we use _M_locale_fmt for all specifiers
> >  * %h which is alias for %b is also covered
> >
> > Tested on x86_64-linux localy.
> >
> >  libstdc++-v3/include/bits/chrono_io.h | 58 ++-
> >  .../time/format/pr117214_custom_timeput.cc| 37 
> >  2 files changed, 81 insertions(+), 14 deletions(-)
> >  create mode 100644
> libstdc++-v3/testsuite/std/time/format/pr117214_custom_timeput.cc
> >
> > diff --git a/libstdc++-v3/include/bits/chrono_io.h
> b/libstdc++-v3/include/bits/chrono_io.h
> > index 72cd569ccd6..863b3550e4f 100644
> > --- a/libstdc++-v3/include/bits/chrono_io.h
> > +++ b/libstdc++-v3/include/bits/chrono_io.h
> > @@ -280,8 +280,8 @@ namespace __format
> >// in the format-spec, e.g. "{:L%a}" is localized and
> locale-specific,
> >// but "{:L}" is only localized and "{:%a}" is only
> locale-specific.
> >unsigned _M_locale_specific : 1;
> > -  // Indicates that we are handling duration.
> > -  unsigned _M_time_only : 1;
> > +  // Indicates that we are handling time_point.
> > +  unsigned _M_time_point : 1;
> >// Indicates that duration should be treated as floating point.
> >unsigned _M_floating_point_rep : 1;
> >// Indicate that duration uses user-defined representation.
> > @@ -693,8 +693,10 @@ namespace __format
> >   __allowed_mods = _Mod_O;
> >   break;
> > case 'j':
> > - __needed = __spec._M_time_only ? _HoursMinutesSeconds
> > -: _DayOfYear;
> > + __needed = __parts & _DayOfYear;
> > + // 'j' is decimal number of days for durations
> > + if (__needed == _None)
> > +   __needed = _HoursMinutesSeconds;
>
> Maybe it's because I haven't slept well, but I found the comment here
> didn't make the logic clearer for me (why is __needed = _None a
> duration? what does HMS have to do with days?).
> Would this be better?
>
> // If we do not know day-of-year then we must have a
> duration,
> // which is to be formatted as decimal number of days.
>
Yes, that makes sense. I will put that wording there.
Given that the rest of comments are only typos, are changes otherwise OK
for trunk?
Or are you still reviewing it?

>
>
> >   break;
> > case 'm':
> >   __needed = _Month;
> > @@ -919,7 +921,13 @@ namespace __format
> >{
> > switch (__conv)
> >   {
> > + case 'a':
> > + case 'A':
> > + case 'b':
> > + case 'B':
> >   case 'c':
> > + case 'h':
> > + case 'p':
> >   case 'r':
> >   case 'x':
> >   case 'X':
> > @@ -947,6 +955,32 @@ namespace __format
> >   return __out;
> > }
> >
> > +  void
> > +  _M_check_ok(const _ChronoData<_CharT>& __t, _CharT __conv) const
> > +  {
> > +   // n.b

[PATCH][commited]: Update maintainers file

2025-07-07 Thread Tamar Christina

Update MAINTAINERS file to include myself in AArch64 port.

committed to master.

Thanks,
Tamar

ChangeLog:

* MAINTAINERS: Add myself to AArch64 pot.

---
diff --git a/MAINTAINERS b/MAINTAINERS
index 
f2ad65007b6d4b2ce68fa07d580ad93286739529..2cd2ec650b6f7c4b7a6aa42f295a728079d7e183
 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -57,6 +57,7 @@ docs, and the testsuite related to that.
 aarch64 ldp/stp Alex Coplan 
 aarch64 portRichard Earnshaw
 aarch64 portRichard Sandiford   
+aarch64 portTamar Christina 
 aarch64 portKyrylo Tkachov  
 alpha port  Richard Henderson   
 amdgcn port Julian Brown


-- 
diff --git a/MAINTAINERS b/MAINTAINERS
index f2ad65007b6d4b2ce68fa07d580ad93286739529..2cd2ec650b6f7c4b7a6aa42f295a728079d7e183 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -57,6 +57,7 @@ docs, and the testsuite related to that.
 aarch64 ldp/stp Alex Coplan 
 aarch64 portRichard Earnshaw
 aarch64 portRichard Sandiford   
+aarch64 portTamar Christina 
 aarch64 portKyrylo Tkachov  
 alpha port  Richard Henderson   
 amdgcn port Julian Brown

Re: [PATCH v3 1/5] libstdc++: Check prerequisites of layout_*::operator().

2025-07-07 Thread Tomasz Kaminski

On Mon, Jul 7, 2025 at 3:29 PM Luc Grosheintz 
wrote:

>
> On 7/4/25 10:29, Luc Grosheintz wrote:
> > Previously, the prerequisite that the arguments passed to operator() are
> > a multi-dimensional index (of extents()) was not checked.
> >
> > Both mapping::operator() and mdspan::operator[] have the same
> > prerequisite. Since, mdspan must check the prerequisite for user-defined
> > layout mappings, the preference is to check in mdspan.
> >
> > Because out-of-bounds accesses are very common it's nevertheless useful
> > to check the prerequisite in mapping::operator(). This is relevant for
> > cases where the layout mappings are used without mdspan. This commit
> > check the prerequisites via _GLIBCXX_DEBUG_ASSERTs and adds the required
>
> Could you please locally fix this typo in the commit message?
>
> s/check/checks/
>
Done.

>
>
> > tests.
> >
> > More discussion in the email chain starting at:
> >
> >https://gcc.gnu.org/pipermail/libstdc++/2025-July/062265.html
> >
> > libstdc++-v3/ChangeLog:
> >
> >   * include/std/mdspan: Check prerequisites of
> >   layout_*::operator() with _GLIBCXX_DEBUG_ASSERTs.
> >   *
> testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc:
> >   Add tests for prerequisites.
> >
> > Signed-off-by: Luc Grosheintz 
> > ---
> >   libstdc++-v3/include/std/mdspan   |  5 
> >   .../mdspan/layouts/debug/out_of_bounds_neg.cc | 30 +++
> >   2 files changed, 35 insertions(+)
> >   create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
> >
> > diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> > index c72a64094b7..cf20553aaa5 100644
> > --- a/libstdc++-v3/include/std/mdspan
> > +++ b/libstdc++-v3/include/std/mdspan
> > @@ -441,6 +441,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >   _IndexType __mult = 1;
> >   auto __update = [&, __pos = 0u](_IndexType __idx) mutable
> > {
> > + _GLIBCXX_DEBUG_ASSERT(cmp_less(__idx,
> __exts.extent(__pos)));
> >   __res += __idx * __mult;
> >   __mult *= __exts.extent(__pos);
> >   ++__pos;
> > @@ -651,6 +652,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >   auto __update = [&, __pos = __exts.rank()](_IndexType) mutable
> > {
> >   --__pos;
> > + _GLIBCXX_DEBUG_ASSERT(cmp_less(__ind_arr[__pos],
> > +__exts.extent(__pos)));
> >   __res += __ind_arr[__pos] * __mult;
> >   __mult *= __exts.extent(__pos);
> > };
> > @@ -822,6 +825,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> > {
> >   auto __update = [&, __pos = 0u](_IndexType __idx) mutable
> > {
> > + _GLIBCXX_DEBUG_ASSERT(cmp_less(__idx,
> > +
> __m.extents().extent(__pos)));
> >   __res += __idx * __m.stride(__pos++);
> > };
> >   (__update(__indices), ...);
> > diff --git
> a/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
> > new file mode 100644
> > index 000..fb8ff01e8aa
> > --- /dev/null
> > +++
> b/libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
> > @@ -0,0 +1,30 @@
> > +// { dg-do compile { target c++23 } }
> > +// { dg-require-debug-mode "" }
> > +#include
> > +
> > +template
> > +  constexpr bool
> > +  test_out_of_bounds_1d()
> > +  {
> > +auto m = typename Layout::mapping>{};
> > +(void) m(0); // { dg-error "expansion of" }
> > +return true;
> > +  }
> > +static_assert(test_out_of_bounds_1d()); // { dg-error
> "expansion of" }
> > +static_assert(test_out_of_bounds_1d()); // {
> dg-error "expansion of" }
> > +static_assert(test_out_of_bounds_1d()); // {
> dg-error "expansion of" }
> > +
> > +template
> > +  constexpr bool
> > +  test_out_of_bounds_3d()
> > +  {
> > +auto m = typename Layout::mapping>{};
> > +(void) m(2, 5, 5); // { dg-error "expansion of" }
> > +return true;
> > +  }
> > +static_assert(test_out_of_bounds_3d()); // { dg-error
> "expansion of" }
> > +static_assert(test_out_of_bounds_3d()); // {
> dg-error "expansion of" }
> > +static_assert(test_out_of_bounds_3d()); // {
> dg-error "expansion of" }
> > +
> > +// { dg-prune-output "non-constant condition for static assertion" }
> > +// { dg-prune-output "__glibcxx_assert" }
>
>

[PATCH] tree-optimization/120358 - bogus PTA with structure access

2025-07-07 Thread Richard Biener

When we compute the constraint for something like
MEM[(const struct QStringView &)&tok2 + 32] we go and compute
what (const struct QStringView &)&tok2 + 32 points to and then
add subvariables to its dereference that possibly fall in the
range of the access according to the original refs size.  In
doing that we disregarded that the subvariable the starting
address points to might not be aligned to it and thus the
access might start at any point within that variable.  The following
conservatively adjusts the pruning of adjacent sub-variables to
honor this.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

PR tree-optimization/120358
* tree-ssa-structalias.cc (get_constraint_for_1): Adjust
pruning of sub-variables according to the imprecise
known start offset.
---
 gcc/tree-ssa-structalias.cc | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index deca44ae0bf..0215243d5be 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -3690,7 +3690,10 @@ get_constraint_for_1 (tree t, vec *results, bool 
address_p,
size = -1;
  for (; curr; curr = vi_next (curr))
{
- if (curr->offset - vi->offset < size)
+ /* The start of the access might happen anywhere
+within vi, so conservatively assume it was
+at its end.  */
+ if (curr->offset - (vi->offset + vi->size - 1) < size)
{
  cs.var = curr->id;
  results->safe_push (cs);
-- 
2.43.0

Re: [PATCH] aarch64: Improve popcountti2 with SVE

2025-07-07 Thread Kyrylo Tkachov



> On 7 Jul 2025, at 13:27, Richard Sandiford  wrote:
> 
> Tamar Christina  writes:
>>> -Original Message-
>>> From: Kyrylo Tkachov 
>>> Sent: Monday, July 7, 2025 10:38 AM
>>> To: GCC Patches 
>>> Cc: Richard Sandiford ; Richard Earnshaw
>>> ; Alex Coplan ; Andrew
>>> Pinski 
>>> Subject: [PATCH] aarch64: Improve popcountti2 with SVE
>>> 
>>> Hi all,
>>> 
>>> The TImode popcount sequence can be slightly improved with SVE.
>>> If we generate:
>>> ldr q31, [x0]
>>> ptrue p7.b, vl16
>>> cnt z31.d, p7/m, z31.d
>>> addp d31, v31.2d
>>> fmov x0, d31
>>> ret
>>> 
>>> instead of:
>>> h128:
>>> ldr q31, [x0]
>>> cnt v31.16b, v31.16b
>>> addv b31, v31.16b
>>> fmov w0, s31
>>> ret
>>> 
>>> we use the ADDP instruction for reduction, which is cheaper on all CPUs 
>>> AFAIK,
>>> as it is only a single 64-bit addition vs the tree of additions for ADDV.
>>> For example, on a CPU like Grace we get a latency and throughput of 2,4 vs 
>>> 4,1
>>> for ADDV.
>>> We do generate one more instruction due to the PTRUE being materialised, but
>>> that
>>> is cheap itself and can be scheduled away from the critical path or even 
>>> CSE'd
>>> with other PTRUE constants.
>>> As this sequence is larger code size-wise it is avoided for -Os.
>>> 
>>> Bootstrapped and tested on aarch64-none-linux-gnu.
>>> 
>>> Ok for trunk?
>> 
>> We don't seem to take -Os into consideration for the general vector version 
>> when
>> using SVE. Should we? or should the size check be dropped here?  Seems better
>> if we're consistent.
> 
> The difference is that for 64-bit and smaller popcounts, SVE CNT provides
> the result directly, whereas Advanced SIMD requires CNT+ADDV.  So for smaller
> sizes, it's effectively PTRUE+CNT vs CNT+ADDV, with the SVE version having
> the advantage of a hoistable and shareable constant.
> 
> For 128-bit popcounts we need CNT+an ADD either way, and the SVE CNT has the
> added disadvantage of requiring tied registers to avoid a false dependency
> (either directly from the RA, or via MOVPRFX).  So keeping the -Os check
> seems better to me FWIW.
> 
> Richard
> 
>> 
>> OK with or without that change.
>> 

Thanks, I added the -Os check as I expected this distinction to matter in more 
contexts as this is a scalar expansion and so the user may be more serious 
about code size requirements vs vector code.
Though I admit it’s a bit handwavy. Richards rationale is more technical.
I’ll keep the check when committing.

Thanks,
Kyrill

>> Thanks,
>> Tamar
>> 
>>> Thanks,
>>> Kyrill
>>> 
>>> Signed-off-by: Kyrylo Tkachov 
>>> 
>>> gcc/
>>> 
>>> * config/aarch64/aarch64.md (popcountti2): Add TARGET_SVE path.
>>> 
>>> gcc/testsuite/
>>> 
>>> * gcc.target/aarch64/popcnt9.c: Add +nosve to target pragma.
>>> * gcc.target/aarch64/popcnt13.c: New test.

[PATCH v1 0/3] RISC-V: Combine vec_duplicate + vssub.vv to vssub.vx on GR2VR cost

2025-07-07 Thread pan2 . li

From: Pan Li 

This patch would like to introduce the combine of vec_dup + vssub.vv
into vssub.vx on the cost value of GR2VR.  The late-combine will take
place if the cost of GR2VR is zero, or reject the combine if non-zero
like 1, 2, 15 in test.  There will be two cases for the combine:

Case 0:
 |   ...
 |   vmv.v.x
 | L1:
 |   vssub.vv
 |   J L1
 |   ...

Case 1:
 |   ...
 | L1:
 |   vmv.v.x
 |   vssub.vv
 |   J L1
 |   ...

Both will be combined to below if the cost of GR2VR is zero.
 |   ...
 | L1:
 |   vssub.vx
 |   J L1
 |   ...

The below test suites are passed for this patch series.
* The rv64gcv fully regression test.

Pan Li (3):
  RISC-V: Combine vec_duplicate + vssub.vv to vssub.vx on GR2VR cost
  RISC-V: Add test for vec_duplicate + vssub.vv combine case 0 with GR2VR cost 
0, 2 and 15
  RISC-V: Add test for vec_duplicate + vssub.vv combine case 1 with GR2VR cost 
0, 1 and 2

 gcc/config/riscv/riscv-v.cc   |   1 +
 gcc/config/riscv/riscv.cc |   1 +
 gcc/config/riscv/vector-iterators.md  |   3 +-
 .../riscv/rvv/autovec/vx_vf/vx-1-i16.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-1-i32.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-1-i64.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-1-i8.c |   1 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i16.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i32.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i64.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-2-i8.c |   1 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i16.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i32.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i64.c|   1 +
 .../riscv/rvv/autovec/vx_vf/vx-3-i8.c |   1 +
 .../riscv/rvv/autovec/vx_vf/vx-4-i16.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-4-i32.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-4-i64.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-4-i8.c |   2 +
 .../riscv/rvv/autovec/vx_vf/vx-5-i16.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-5-i32.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-5-i64.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-5-i8.c |   2 +
 .../riscv/rvv/autovec/vx_vf/vx-6-i16.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-6-i32.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-6-i64.c|   2 +
 .../riscv/rvv/autovec/vx_vf/vx-6-i8.c |   2 +
 .../riscv/rvv/autovec/vx_vf/vx_binary.h   |  21 ++
 .../riscv/rvv/autovec/vx_vf/vx_binary_data.h  | 196 ++
 .../rvv/autovec/vx_vf/vx_vssub-run-1-i16.c|  17 ++
 .../rvv/autovec/vx_vf/vx_vssub-run-1-i32.c|  17 ++
 .../rvv/autovec/vx_vf/vx_vssub-run-1-i64.c|  17 ++
 .../rvv/autovec/vx_vf/vx_vssub-run-1-i8.c |  17 ++
 33 files changed, 325 insertions(+), 1 deletion(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vssub-run-1-i16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vssub-run-1-i32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vssub-run-1-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vssub-run-1-i8.c

-- 
2.43.0

[PATCH v1 1/3] RISC-V: Combine vec_duplicate + vssub.vv to vssub.vx on GR2VR cost

2025-07-07 Thread pan2 . li

From: Pan Li 

This patch would like to combine the vec_duplicate + vssub.vv to the
vssub.vx.  From example as below code.  The related pattern will depend
on the cost of vec_duplicate from GR2VR.  Then the late-combine will
take action if the cost of GR2VR is zero, and reject the combination
if the GR2VR cost is greater than zero.

Assume we have example code like below, GR2VR cost is 0.

  #define DEF_SAT_S_ADD(T, UT, MIN, MAX) \
  T  \
  test_##T##_sat_add (T x, T y)  \
  {  \
T sum = (UT)x + (UT)y;   \
return (x ^ y) < 0   \
  ? sum  \
  : (sum ^ x) >= 0   \
? sum\
: x < 0 ? MIN : MAX; \
  }

  DEF_SAT_S_ADD(int32_t, uint32_t, INT32_MIN, INT32_MAX)
  DEF_VX_BINARY_CASE_2_WRAP(T, SAT_S_ADD_FUNC(T), sat_add)

Before this patch:
  10   │ test_vx_binary_or_int32_t_case_0:
  11   │ beq a3,zero,.L8
  12   │ vsetvli a5,zero,e32,m1,ta,ma
  13   │ vmv.v.x v2,a2
  14   │ sllia3,a3,32
  15   │ srlia3,a3,32
  16   │ .L3:
  17   │ vsetvli a5,a3,e32,m1,ta,ma
  18   │ vle32.v v1,0(a1)
  19   │ sllia4,a5,2
  20   │ sub a3,a3,a5
  21   │ add a1,a1,a4
  22   │ vssub.vv v1,v1,v2
  23   │ vse32.v v1,0(a0)
  24   │ add a0,a0,a4
  25   │ bne a3,zero,.L3

After this patch:
  10   │ test_vx_binary_or_int32_t_case_0:
  11   │ beq a3,zero,.L8
  12   │ sllia3,a3,32
  13   │ srlia3,a3,32
  14   │ .L3:
  15   │ vsetvli a5,a3,e32,m1,ta,ma
  16   │ vle32.v v1,0(a1)
  17   │ sllia4,a5,2
  18   │ sub a3,a3,a5
  19   │ add a1,a1,a4
  20   │ vssub.vx v1,v1,a2
  21   │ vse32.v v1,0(a0)
  22   │ add a0,a0,a4
  23   │ bne a3,zero,.L3

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_vx_binary_vec_vec_dup): Add
new case SS_MINUS.
* config/riscv/riscv.cc (riscv_rtx_costs): Ditto.
* config/riscv/vector-iterators.md: Add new op ss_minus.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-v.cc  | 1 +
 gcc/config/riscv/riscv.cc| 1 +
 gcc/config/riscv/vector-iterators.md | 3 ++-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a5ab8dd4e2f..dad23d5c670 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -5586,6 +5586,7 @@ expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx 
op_2,
 case US_PLUS:
 case US_MINUS:
 case SS_PLUS:
+case SS_MINUS:
   icode = code_for_pred_scalar (code, mode);
   break;
 default:
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index ecdb61e1899..1ad756fa2f8 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3998,6 +3998,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
case US_PLUS:
case US_MINUS:
case SS_PLUS:
+   case SS_MINUS:
  *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
  break;
default:
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index fd0959c1a4b..e60e3a8399c 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -4042,7 +4042,8 @@ (define_code_iterator any_int_binop [plus minus and ior 
xor ashift ashiftrt lshi
 ])
 
 (define_code_iterator any_int_binop_no_shift_v_vdup [
-  plus minus and ior xor mult div udiv mod umod smax umax smin umin us_plus 
us_minus ss_plus
+  plus minus and ior xor mult div udiv mod umod smax umax smin umin us_plus
+  us_minus ss_plus ss_minus
 ])
 
 (define_code_iterator any_int_binop_no_shift_vdup_v [
-- 
2.43.0

[PATCH v1 3/3] RISC-V: Add test for vec_duplicate + vssub.vv combine case 1 with GR2VR cost 0, 1 and 2

2025-07-07 Thread pan2 . li

From: Pan Li 

Add asm dump check test for vec_duplicate + vssub.vv combine to
vssub.vx, with the GR2VR cost is 0, 1 and 2.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c: Add asm check.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c: Ditto.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c  | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c  | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c | 2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c  | 2 ++
 12 files changed, 24 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c
index 92f1b7b1596..d79a9f21af6 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c
@@ -19,6 +19,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_1_WARP(T), max, 
VX_BINARY_FUNC_BODY_X8)
 DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_0_WARP(T), min, VX_BINARY_FUNC_BODY_X8)
 DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8)
 DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, 
VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, 
VX_BINARY_FUNC_BODY_X8)
 
 /* { dg-final { scan-assembler {vadd.vx} } } */
 /* { dg-final { scan-assembler {vsub.vx} } } */
@@ -32,3 +33,4 @@ DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, 
VX_BINARY_FUNC_BOD
 /* { dg-final { scan-assembler {vmax.vx} } } */
 /* { dg-final { scan-assembler {vmin.vx} } } */
 /* { dg-final { scan-assembler {vsadd.vx} } } */
+/* { dg-final { scan-assembler {vssub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c
index 31594cec359..940f5965438 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c
@@ -19,6 +19,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_1_WARP(T), max, 
VX_BINARY_FUNC_BODY_X4)
 DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_0_WARP(T), min, VX_BINARY_FUNC_BODY_X4)
 DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4)
 DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, 
VX_BINARY_FUNC_BODY_X4)
+DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, 
VX_BINARY_FUNC_BODY_X4)
 
 /* { dg-final { scan-assembler {vadd.vx} } } */
 /* { dg-final { scan-assembler {vsub.vx} } } */
@@ -32,3 +33,4 @@ DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, 
VX_BINARY_FUNC_BOD
 /* { dg-final { scan-assembler {vmax.vx} } } */
 /* { dg-final { scan-assembler {vmin.vx} } } */
 /* { dg-final { scan-assembler {vsadd.vx} } } */
+/* { dg-final { scan-assembler {vssub.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c
index 02e03ec4d3f..22a64f6c5c0 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c
@@ -19,6 +19,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_1_WARP(T), max, 
VX_BINARY_FUNC_BODY)
 DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_0_WARP(T), min, VX_BINARY_FUNC_BODY)
 DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY)
 DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, 
VX_BINARY_FUNC_BODY)
+DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, 
VX_BINARY_FUNC_BODY)
 
 /* { dg-final { scan-assembler {vadd.vx} } } */
 /* { dg-final { scan-assembler {vsub.vx} } } */
@@ -32,3 +33,4 @@ DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, 
VX_BINARY_FUNC_BOD
 /

Re: [PATCH v3] x86: Improve vector_loop/unrolled_loop for memset/memcpy

2025-07-07 Thread Hongtao Liu

On Tue, Jun 24, 2025 at 2:11 PM H.J. Lu  wrote:
>
> On Mon, Jun 23, 2025 at 2:24 PM H.J. Lu  wrote:
> >
> > On Wed, Jun 18, 2025 at 3:17 PM H.J. Lu  wrote:
> > >
> > > 1. Don't generate the loop if the loop count is 1.
> > > 2. For memset with vector on small size, use vector if small size supports
> > > vector, otherwise use the scalar value.
> > > 3. Duplicate the promoted scalar value for vector.
> > > 4. Always expand vector-version of memset for vector_loop.
> > > 5. Use misaligned prologue if alignment isn't needed.  When misaligned
> > > prologue is used, check if destination is actually aligned and update
> > > destination alignment if aligned.
> > >
> > > The included tests show that codegen of vector_loop/unrolled_loop for
> > > memset/memcpy are significantly improved.  For
> > >
> > > ---
> > > void
> > > foo (void *p1, size_t len)
> > > {
> > >   __builtin_memset (p1, 0, len);
> > > }
> > > ---
> > >
> > > with
> > >
> > > -O2 -minline-all-stringops 
> > > -mmemset-strategy=vector_loop:256:noalign,libcall:-1:noalign -march=x86-64
> > >
> > > we used to generate
> > >
> > > foo:
> > > .LFB0:
> > > .cfi_startproc
> > > movq%rdi, %rax
> > > pxor%xmm0, %xmm0
> > > cmpq$64, %rsi
> > > jnb .L18
> > > .L2:
> > > andl$63, %esi
> > > je  .L1
> > > xorl%edx, %edx
> > > testb   $1, %sil
> > > je  .L5
> > > movl$1, %edx
> > > movb$0, (%rax)
> > > cmpq%rsi, %rdx
> > > jnb .L19
> > > .L5:
> > > movb$0, (%rax,%rdx)
> > > movb$0, 1(%rax,%rdx)
> > > addq$2, %rdx
> > > cmpq%rsi, %rdx
> > > jb  .L5
Lili found that the regression of 527.cam4_r (PR120943) is caused by
more instructions due to the usage of movb instruction(takes more
iterations) instead of original movq.
The patch optimizes it with vector moves and solves the issue.

> > > .L1:
> > > ret
> > > .p2align 4,,10
> > > .p2align 3
> > > .L18:
> > > movq%rsi, %rdx
> > > xorl%eax, %eax
> > > andq$-64, %rdx
> > > .L3:
> > > movups  %xmm0, (%rdi,%rax)
> > > movups  %xmm0, 16(%rdi,%rax)
> > > movups  %xmm0, 32(%rdi,%rax)
> > > movups  %xmm0, 48(%rdi,%rax)
> > > addq$64, %rax
> > > cmpq%rdx, %rax
> > > jb  .L3
> > > addq%rdi, %rax
> > > jmp .L2
> > > .L19:
> > > ret
> > > .cfi_endproc
> > >
> > > with very poor prologue/epilogue.  With this patch, we now generate:
> > >
> > > foo:
> > > .LFB0:
> > > .cfi_startproc
> > > pxor%xmm0, %xmm0
> > > cmpq$64, %rsi
> > > jnb .L2
> > > testb   $32, %sil
> > > jne .L19
> > > testb   $16, %sil
> > > jne .L20
> > > testb   $8, %sil
> > > jne .L21
> > > testb   $4, %sil
> > > jne .L22
> > > testq   %rsi, %rsi
> > > jne .L23
> > > .L1:
> > > ret
> > > .p2align 4,,10
> > > .p2align 3
> > > .L2:
> > > movups  %xmm0, -64(%rdi,%rsi)
> > > movups  %xmm0, -48(%rdi,%rsi)
> > > movups  %xmm0, -32(%rdi,%rsi)
> > > movups  %xmm0, -16(%rdi,%rsi)
> > > subq$1, %rsi
> > > cmpq$64, %rsi
> > > jb  .L1
> > > andq$-64, %rsi
> > > xorl%eax, %eax
> > > .L9:
> > > movups  %xmm0, (%rdi,%rax)
> > > movups  %xmm0, 16(%rdi,%rax)
> > > movups  %xmm0, 32(%rdi,%rax)
> > > movups  %xmm0, 48(%rdi,%rax)
> > > addq$64, %rax
> > > cmpq%rsi, %rax
> > > jb  .L9
> > > ret
> > > .p2align 4,,10
> > > .p2align 3
> > > .L23:
> > > movb$0, (%rdi)
> > > testb   $2, %sil
> > > je  .L1
> > > xorl%eax, %eax
> > > movw%ax, -2(%rdi,%rsi)
> > > ret
> > > .p2align 4,,10
> > > .p2align 3
> > > .L19:
> > > movups  %xmm0, (%rdi)
> > > movups  %xmm0, 16(%rdi)
> > > movups  %xmm0, -32(%rdi,%rsi)
> > > movups  %xmm0, -16(%rdi,%rsi)
> > > ret
> > > .p2align 4,,10
> > > .p2align 3
> > > .L20:
> > > movups  %xmm0, (%rdi)
> > > movups  %xmm0, -16(%rdi,%rsi)
> > > ret
> > > .p2align 4,,10
> > > .p2align 3
> > > .L21:
> > > movq$0, (%rdi)
> > > movq$0, -8(%rdi,%rsi)
> > > ret
> > > .p2align 4,,10
> > > .p2align 3
> > > .L22:
> > > movl$0, (%rdi)
> > > movl$0, -4(%rdi,%rsi)
> > > ret
> > > .cfi_endproc
> >
> >
> > Here is the v2 patch with the memset improvements:
> >
> > 1. Always duplicate the promoted scalar value for vector_loop if not 0
> > nor -1.
> > 2. Update setmem_epilogue_gen_val to use th

Re: [PATCH v3 2/5] libstdc++: Check prerequisite of extents::extents.

2025-07-07 Thread Tomasz Kaminski

On Mon, Jul 7, 2025 at 11:09 AM Jonathan Wakely  wrote:

> On Fri, 4 Jul 2025 at 09:30, Luc Grosheintz 
> wrote:
> >
> > Previously the prerequisite of the extents ctors that
> >
> > static_extent(i) == dynamic_extent || extent(i) == other.extent(i).
> >
> > was not checked. This commit adds the __glibcxx_assert and test them.
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/std/mdspan (extents): Check prerequisite of the ctor
> that
> > static_extent(i) == dynamic_extent || extent(i) ==
> other.extent(i).
> > * testsuite/23_containers/mdspan/extents/class_mandates_neg.cc:
> > Test the implemented prerequisite.
> >
> > Signed-off-by: Luc Grosheintz 
> > ---
> >  libstdc++-v3/include/std/mdspan   | 13 +++
> >  .../mdspan/extents/class_mandates_neg.cc  |  2 ++
> >  .../mdspan/extents/extents_mismatch_neg.cc| 35 +++
> >  3 files changed, 50 insertions(+)
> >  create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
> >
> > diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> > index cf20553aaa5..1d6cdc93d80 100644
> > --- a/libstdc++-v3/include/std/mdspan
> > +++ b/libstdc++-v3/include/std/mdspan
> > @@ -110,10 +110,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> > return __se;
> > }
> >
> > +   template
> > + constexpr bool
> > + _S_is_compatible_extents(_GetOtherExtent __get_extent) noexcept
>
> Was this intended to be a static member function?
>
Yes, it should be. I will also adjust that locally.

>
> > + {
> > +   if constexpr (_OtherRank == _S_rank)
> > + for (size_t __i = 0; __i < _S_rank; ++__i)
> > +   if (_Extents[__i] != dynamic_extent
> > +   && !cmp_equal(_Extents[__i],
> _S_int_cast(__get_extent(__i
> > + return false;
> > +   return true;
> > + }
> > +
> > template
> >   constexpr void
> >   _M_init_dynamic_extents(_GetOtherExtent __get_extent) noexcept
> >   {
> > +
>  __glibcxx_assert(_S_is_compatible_extents<_OtherRank>(__get_extent));
> > for (size_t __i = 0; __i < _S_rank_dynamic; ++__i)
> >   {
> > size_t __di = __i;
> > diff --git
> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
> > index f9c1c019666..67d18feda96 100644
> > ---
> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
> > +++
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
> > @@ -7,6 +7,8 @@ std::extents e1; // { dg-error
> "from here" }
> >  std::extents e2; // { dg-error "from here" }
> >  std::extents e3; // { dg-error "from here" }
> >  std::extents e4;   // { dg-error "from here" }
> > +
> >  // { dg-prune-output "dynamic or representable as IndexType" }
> >  // { dg-prune-output "signed or unsigned integer" }
> >  // { dg-prune-output "invalid use of incomplete type" }
> > +// { dg-prune-output "non-constant condition for static assertion" }
> > diff --git
> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
> > new file mode 100644
> > index 000..b35e5310d41
> > --- /dev/null
> > +++
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
> > @@ -0,0 +1,35 @@
> > +// { dg-do compile { target c++23 } }
> > +#include
> > +
> > +#include 
> > +
> > +constexpr size_t dyn = std::dynamic_extent;
> > +
> > +constexpr bool
> > +test_dyn2sta_extents_mismatch_00()
> > +{
> > +  auto e0 = std::extents{1};
> > +  [[maybe_unused]] auto e1 = std::extents{e0};// {
> dg-error "expansion of" }
> > +  return true;
> > +}
> > +static_assert(test_dyn2sta_extents_mismatch_00());// {
> dg-error "expansion of" }
> > +
> > +constexpr bool
> > +test_dyn2sta_extents_mismatch_01()
> > +{
> > +  [[maybe_unused]] auto e = std::extents{2, 2}; // {
> dg-error "expansion of" }
> > +  return true;
> > +}
> > +static_assert(test_dyn2sta_extents_mismatch_01());   // {
> dg-error "expansion of" }
> > +
> > +constexpr bool
> > +test_dyn2sta_extents_mismatch_02()
> > +{
> > +  std::array exts{2, 2};
> > +  [[maybe_unused]] auto e = std::extents{exts}; // {
> dg-error "expansion of" }
> > +  return true;
> > +}
> > +static_assert(test_dyn2sta_extents_mismatch_02());   // {
> dg-error "expansion of" }
> > +
> > +// { dg-prune-output "non-constant condition for static assertion" }
> > +// { dg-prune-output "__glibcxx_assert" }
> > --
> > 2.49.0
> >
>
>

Re: [PATCH v3 2/5] libstdc++: Check prerequisite of extents::extents.

2025-07-07 Thread Luc Grosheintz





On 7/7/25 11:13, Tomasz Kaminski wrote:

On Mon, Jul 7, 2025 at 11:09 AM Jonathan Wakely  wrote:


On Fri, 4 Jul 2025 at 09:30, Luc Grosheintz 
wrote:


Previously the prerequisite of the extents ctors that

 static_extent(i) == dynamic_extent || extent(i) == other.extent(i).

was not checked. This commit adds the __glibcxx_assert and test them.

libstdc++-v3/ChangeLog:

 * include/std/mdspan (extents): Check prerequisite of the ctor

that

 static_extent(i) == dynamic_extent || extent(i) ==

other.extent(i).

 * testsuite/23_containers/mdspan/extents/class_mandates_neg.cc:
 Test the implemented prerequisite.

Signed-off-by: Luc Grosheintz 
---
  libstdc++-v3/include/std/mdspan   | 13 +++
  .../mdspan/extents/class_mandates_neg.cc  |  2 ++
  .../mdspan/extents/extents_mismatch_neg.cc| 35 +++
  3 files changed, 50 insertions(+)
  create mode 100644

libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc


diff --git a/libstdc++-v3/include/std/mdspan

b/libstdc++-v3/include/std/mdspan

index cf20553aaa5..1d6cdc93d80 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -110,10 +110,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 return __se;
 }

+   template
+ constexpr bool
+ _S_is_compatible_extents(_GetOtherExtent __get_extent) noexcept


Was this intended to be a static member function?


Yes, it should be. I will also adjust that locally.


Indeed it should have been. Thanks!






+ {
+   if constexpr (_OtherRank == _S_rank)
+ for (size_t __i = 0; __i < _S_rank; ++__i)
+   if (_Extents[__i] != dynamic_extent
+   && !cmp_equal(_Extents[__i],

_S_int_cast(__get_extent(__i

+ return false;
+   return true;
+ }
+
 template
   constexpr void
   _M_init_dynamic_extents(_GetOtherExtent __get_extent) noexcept
   {
+

  __glibcxx_assert(_S_is_compatible_extents<_OtherRank>(__get_extent));

 for (size_t __i = 0; __i < _S_rank_dynamic; ++__i)
   {
 size_t __di = __i;
diff --git

a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc

index f9c1c019666..67d18feda96 100644
---

a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc

+++

b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc

@@ -7,6 +7,8 @@ std::extents e1; // { dg-error

"from here" }

  std::extents e2; // { dg-error "from here" }
  std::extents e3; // { dg-error "from here" }
  std::extents e4;   // { dg-error "from here" }
+
  // { dg-prune-output "dynamic or representable as IndexType" }
  // { dg-prune-output "signed or unsigned integer" }
  // { dg-prune-output "invalid use of incomplete type" }
+// { dg-prune-output "non-constant condition for static assertion" }
diff --git

a/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc

new file mode 100644
index 000..b35e5310d41
--- /dev/null
+++

b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc

@@ -0,0 +1,35 @@
+// { dg-do compile { target c++23 } }
+#include
+
+#include 
+
+constexpr size_t dyn = std::dynamic_extent;
+
+constexpr bool
+test_dyn2sta_extents_mismatch_00()
+{
+  auto e0 = std::extents{1};
+  [[maybe_unused]] auto e1 = std::extents{e0};// {

dg-error "expansion of" }

+  return true;
+}
+static_assert(test_dyn2sta_extents_mismatch_00());// {

dg-error "expansion of" }

+
+constexpr bool
+test_dyn2sta_extents_mismatch_01()
+{
+  [[maybe_unused]] auto e = std::extents{2, 2}; // {

dg-error "expansion of" }

+  return true;
+}
+static_assert(test_dyn2sta_extents_mismatch_01());   // {

dg-error "expansion of" }

+
+constexpr bool
+test_dyn2sta_extents_mismatch_02()
+{
+  std::array exts{2, 2};
+  [[maybe_unused]] auto e = std::extents{exts}; // {

dg-error "expansion of" }

+  return true;
+}
+static_assert(test_dyn2sta_extents_mismatch_02());   // {

dg-error "expansion of" }

+
+// { dg-prune-output "non-constant condition for static assertion" }
+// { dg-prune-output "__glibcxx_assert" }
--
2.49.0

Re: [PATCH v3 5/5] libstdc++: Implement mdspan and tests [PR107761].

2025-07-07 Thread Tomasz Kaminski

On Fri, Jul 4, 2025 at 10:37 AM Luc Grosheintz 
wrote:

> Implements the class mdspan as described in N4950, i.e. without P3029.
> It also adds tests for mdspan. This commit completes the implementation
> of P0009, i.e. the C++23 part .
>
> PR libstdc++/107761
>
> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan (mdspan): New class.
> * src/c++23/std.cc.in (mdspan): Add.
> * testsuite/23_containers/mdspan/class_mandate_neg.cc: New test.
> * testsuite/23_containers/mdspan/mdspan.cc: New test.
> * testsuite/23_containers/mdspan/layout_like.h: Add class
> LayoutLike which models a user-defined layout.
>
> Signed-off-by: Luc Grosheintz 
> ---
>
Only two small comments:
- use of header guard instead of pragma
- use of md.empty() instead of md.extents(0)
I will do these changes locally.

>  libstdc++-v3/include/std/mdspan   | 284 +
>  libstdc++-v3/src/c++23/std.cc.in  |   3 +-
>  .../23_containers/mdspan/class_mandate_neg.cc |  41 ++
>  .../23_containers/mdspan/layout_like.h|  80 +++
>  .../testsuite/23_containers/mdspan/mdspan.cc  | 603 ++
>  .../23_containers/mdspan/out_of_bounds_neg.cc |  24 +
>  6 files changed, 1034 insertions(+), 1 deletion(-)
>  create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/class_mandate_neg.cc
>  create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/layout_like.h
>  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc
>  create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/out_of_bounds_neg.cc
>
> diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> index 7e970c2b905..f64804e2a42 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -1057,6 +1057,290 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>{ return __p + __i; }
>  };
>
> +  namespace __mdspan
> +  {
> +template
> +  constexpr bool
> +  __is_multi_index(const _Extents& __exts, span<_IndexType, _Nm>
> __indices)
> +  {
> +   static_assert(__exts.rank() == _Nm);
> +   for (size_t __i = 0; __i < __exts.rank(); ++__i)
> + if (__indices[__i] >= __exts.extent(__i))
> +   return false;
> +   return true;
> +  }
> +  }
> +
> +  template +  typename _LayoutPolicy = layout_right,
> +  typename _AccessorPolicy = default_accessor<_ElementType>>
> +class mdspan
> +{
> +  static_assert(!is_array_v<_ElementType>,
> +   "ElementType must not be an array type");
> +  static_assert(!is_abstract_v<_ElementType>,
> +   "ElementType must not be an abstract class type");
> +  static_assert(__mdspan::__is_extents<_Extents>,
> +   "Extents must be a specialization of std::extents");
> +  static_assert(is_same_v<_ElementType,
> + typename _AccessorPolicy::element_type>);
> +
> +public:
> +  using extents_type = _Extents;
> +  using layout_type = _LayoutPolicy;
> +  using accessor_type = _AccessorPolicy;
> +  using mapping_type = typename layout_type::template
> mapping;
> +  using element_type = _ElementType;
> +  using value_type = remove_cv_t;
> +  using index_type = typename extents_type::index_type;
> +  using size_type = typename extents_type::size_type;
> +  using rank_type = typename extents_type::rank_type;
> +  using data_handle_type = typename accessor_type::data_handle_type;
> +  using reference = typename accessor_type::reference;
> +
> +  static constexpr rank_type
> +  rank() noexcept { return extents_type::rank(); }
> +
> +  static constexpr rank_type
> +  rank_dynamic() noexcept { return extents_type::rank_dynamic(); }
> +
> +  static constexpr size_t
> +  static_extent(rank_type __r) noexcept
> +  { return extents_type::static_extent(__r); }
> +
> +  constexpr index_type
> +  extent(rank_type __r) const noexcept { return
> extents().extent(__r); }
> +
> +  constexpr
> +  mdspan()
> +  requires (rank_dynamic() > 0)
> +  && is_default_constructible_v
> + && is_default_constructible_v
> + && is_default_constructible_v
> +  : _M_accessor(), _M_mapping(), _M_handle()
> +  { }
> +
> +  constexpr
> +  mdspan(const mdspan& __other) = default;
> +
> +  constexpr
> +  mdspan(mdspan&& __other) = default;
> +
> +  template<__mdspan::__valid_index_type... _OIndexTypes>
> +   requires (sizeof...(_OIndexTypes) == rank()
> +  || sizeof...(_OIndexTypes) == rank_dynamic())
> +&& is_constructible_v
> +&& is_default_constructible_v
> +   constexpr explicit
> +   mdspan(data_handle_type __handle, _OIndexTypes... __exts)
> +   : _M_accessor(),
> +
>  _M_mapping(_Extents(static_cast(std::move(__exts))...)),
> + _M_handle(std::move(__handle))
> +   { }
> +
> +  template<__md

[Ada] Reapply tweaks to delay statements in ACATS 3&4 testsuites

2025-07-07 Thread Eric Botcazou

They had originally been applied to the ACATS 2 testsuite and I forgot to 
reapply them to the ACATS 4 testsuite altogether.

Tested on x86-64/Linux, applied on the mainline.


2025-07-07  Eric Botcazou  

* ada/acats-3/tests/c9/c94001c.ada: Tweak delay statements.
* ada/acats-4/tests/c9/c94001c.ada: Likewise.
* ada/acats-4/tests/c9/c94006a.ada: Likewise.
* ada/acats-4/tests/c9/c94008c.ada: Likewise.
* ada/acats-4/tests/c9/c951002.a: Likewise.
* ada/acats-4/tests/c9/c954a01.a: Likewise.
* ada/acats-4/tests/c9/c940005.a: Tweak duration constant.
* ada/acats-4/tests/c9/c940007.a: Likewise.
* ada/acats-4/tests/c9/c96001a.ada: Likewise.

-- 
Eric Botcazoudiff --git a/gcc/testsuite/ada/acats-3/tests/c9/c94001c.ada b/gcc/testsuite/ada/acats-3/tests/c9/c94001c.ada
index 0cc14f495cd..df38f99d735 100644
--- a/gcc/testsuite/ada/acats-3/tests/c9/c94001c.ada
+++ b/gcc/testsuite/ada/acats-3/tests/c9/c94001c.ada
@@ -211,7 +211,7 @@ BEGIN
 
  BEGIN -- (E)
   WHILE NOT(OUT_TSK'TERMINATED) AND DELAY_COUNT < 60 LOOP
-   DELAY 1.0;
+   DELAY 1.0 * Impdef.One_Long_Second;
DELAY_COUNT := DELAY_COUNT + 1;
   END LOOP;
   IF DELAY_COUNT = 60 THEN
@@ -254,7 +254,7 @@ BEGIN
 
  BEGIN
   WHILE NOT(OUT_TSK'TERMINATED) AND DELAY_COUNT < 60 LOOP
-   DELAY 1.0;
+   DELAY 1.0 * Impdef.One_Long_Second;
DELAY_COUNT := DELAY_COUNT + 1;
   END LOOP;
   IF DELAY_COUNT = 60 THEN
diff --git a/gcc/testsuite/ada/acats-4/tests/c9/c940005.a b/gcc/testsuite/ada/acats-4/tests/c9/c940005.a
index adb58b18ca4..47a97bf2de6 100644
--- a/gcc/testsuite/ada/acats-4/tests/c9/c940005.a
+++ b/gcc/testsuite/ada/acats-4/tests/c9/c940005.a
@@ -85,7 +85,7 @@ begin
   -- In reality one would expect a time of 5 to 10 seconds.  In
   -- the interests of speeding up the test suite a shorter time
   -- is used
-  Pulse_Time_Delta : constant duration := ImpDef.Switch_To_New_Task;
+  Pulse_Time_Delta : constant duration := ImpDef.Long_Switch_To_New_Task;
 
   -- control over stopping tasks
   protected Control is
diff --git a/gcc/testsuite/ada/acats-4/tests/c9/c940007.a b/gcc/testsuite/ada/acats-4/tests/c9/c940007.a
index c678463633a..41e80f4e25e 100644
--- a/gcc/testsuite/ada/acats-4/tests/c9/c940007.a
+++ b/gcc/testsuite/ada/acats-4/tests/c9/c940007.a
@@ -90,7 +90,7 @@ begin
   -- In reality one would expect a time of 5 to 10 seconds.  In
   -- the interests of speeding up the test suite a shorter time
   -- is used
-  Pulse_Time_Delta : constant duration := ImpDef.Switch_To_New_Task;
+  Pulse_Time_Delta : constant duration := ImpDef.Long_Switch_To_New_Task;
 
 
   -- control over stopping tasks
diff --git a/gcc/testsuite/ada/acats-4/tests/c9/c94001c.ada b/gcc/testsuite/ada/acats-4/tests/c9/c94001c.ada
index 0cc14f495cd..df38f99d735 100644
--- a/gcc/testsuite/ada/acats-4/tests/c9/c94001c.ada
+++ b/gcc/testsuite/ada/acats-4/tests/c9/c94001c.ada
@@ -211,7 +211,7 @@ BEGIN
 
  BEGIN -- (E)
   WHILE NOT(OUT_TSK'TERMINATED) AND DELAY_COUNT < 60 LOOP
-   DELAY 1.0;
+   DELAY 1.0 * Impdef.One_Long_Second;
DELAY_COUNT := DELAY_COUNT + 1;
   END LOOP;
   IF DELAY_COUNT = 60 THEN
@@ -254,7 +254,7 @@ BEGIN
 
  BEGIN
   WHILE NOT(OUT_TSK'TERMINATED) AND DELAY_COUNT < 60 LOOP
-   DELAY 1.0;
+   DELAY 1.0 * Impdef.One_Long_Second;
DELAY_COUNT := DELAY_COUNT + 1;
   END LOOP;
   IF DELAY_COUNT = 60 THEN
diff --git a/gcc/testsuite/ada/acats-4/tests/c9/c94006a.ada b/gcc/testsuite/ada/acats-4/tests/c9/c94006a.ada
index 6b9c85f490e..cac5fc6e09c 100644
--- a/gcc/testsuite/ada/acats-4/tests/c9/c94006a.ada
+++ b/gcc/testsuite/ada/acats-4/tests/c9/c94006a.ada
@@ -28,6 +28,7 @@
 -- TBN  9/17/86
 -- PWN 01/31/95  REMOVED PRAGMA PRIORITY FOR ADA 9X.
 
+with Impdef;
 WITH REPORT; USE REPORT;
 WITH SYSTEM; USE SYSTEM;
 PROCEDURE C94006A IS
@@ -41,7 +42,7 @@ PROCEDURE C94006A IS
   SELECT
ACCEPT E;
   OR
-   DELAY 30.0;
+   DELAY 30.0 * Impdef.One_Long_Second;
   END SELECT;
  END TT;
 
diff --git a/gcc/testsuite/ada/acats-4/tests/c9/c94008c.ada b/gcc/testsuite/ada/acats-4/tests/c9/c94008c.ada
index 6d10e25f024..fb2eee97f69 100644
--- a/gcc/testsuite/ada/acats-4/tests/c9/c94008c.ada
+++ b/gcc/testsuite/ada/acats-4/tests/c9/c94008c.ada
@@ -33,6 +33,7 @@
 -- JBG 8/29/86 ELIMINATED SHARED VARIABLES; ADDED GENERIC UNIT
 -- PWN 11/30/94 REMOVED PRAGMA PRIORITY INSTANCES FOR ADA 9X.
 
+with Impdef;
 WITH REPORT; USE REPORT;
 WITH SYSTEM; USE SYSTEM;
 PROCEDURE C94008C IS
@@ -198,10 +199,10 @@ BEGIN -- C94008C
 OR WHEN ENTER_TERMINATE => TERMINATE;
 END SELECT;
 
-DELAY 10.0;
+

Re: [PATCH 2/2] add masked-epilogue tuning

2025-07-07 Thread Hongtao Liu

On Fri, Jul 4, 2025 at 5:45 PM Richard Biener  wrote:
>
> The following adds a x86 tuning to enable the use of AVX512 masked
> epilogues in cases we heuristically determine it to be not detrimental
> by high chance.  Basically problematic cases are when there are
> data streams that are both stored and loaded from and an outer loop
> could end up executing only the inner loop masked epilogue and with
> unlucky data stream advacement from the outer loop end up needing
> to forward from masked stores to masked loads.  This isn't very
> well handled, esp. for the case where unmasked operations would
> not need to forward at all - that is, when forwarding completely
> from the masked out portion of the store (like the AVX upper half
> to the AVX lower half of a load).  There's also the case where
> the number of iterations is known at compile time, only with
> cost comparing we'd consider a non-masked epilog - as we are not
> doing that we have to add heuristics to avoid masking when a
> single vector epilog iteration would cover all scalar iterations
> left (this is exercised by gcc.target/i386/pr110310.c).
>
> SPEC CPU 2017 shows 3% text size savings over not using masked
> epilogues with performance impact in the noise.  Masking all vector
> epilogues gets that to 4% text size savings with some major
> runtime regressions in 503.bwaves_r and 527.cam4_r
> (measured on a Zen4 system), we're leaving a 5% improvement
> for 549.fotonik3d_r unrealized with the implemented heuristic.
It looks interesting.
I'll try with avx256_masked_epilougues to see if there's something unusual.
>
> With the heuristics we turn 22513 vector epilogues + up to 12305 scalar
> epilogues into 12305 masked vector epilogues of which 574 are for
> AVX vector sizes, 79 for SSE vector sizes and the rest for AVX512.
> When masking all epilogues we get 14567 of them from
> 29467 vector + up to 14567 scalar epilogues, so the heuristics disable
> an additional 20% of masked epilogues.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> OK?
>
> Thanks,
> Richard.
>
> * config/i386/x86-tune.def (X86_TUNE_AVX512_MASKED_EPILOGUES):
> New tunable, default on for m_ZNVER4 and m_ZNVER5.
> * config/i386/i386.cc (ix86_vector_costs::finish_cost): With
> X86_TUNE_AVX512_MASKED_EPILOGUES and when the main loop
> had a vectorization factor > 2 use a masked epilogue when
> possible and when not obviously problematic.
>
> * gcc.target/i386/vect-mask-epilogue-1.c: New testcase.
> * gcc.target/i386/vect-mask-epilogue-2.c: Likewise.
> * gcc.target/i386/vect-epilogues-3.c: Adjust.
> ---
>  gcc/config/i386/i386.cc   | 59 +++
>  gcc/config/i386/x86-tune.def  |  5 ++
>  .../gcc.target/i386/vect-epilogues-3.c|  2 +-
>  .../gcc.target/i386/vect-mask-epilogue-1.c| 11 
>  .../gcc.target/i386/vect-mask-epilogue-2.c| 14 +
>  5 files changed, 90 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-mask-epilogue-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/vect-mask-epilogue-2.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index b64175d6c93..8e796ea4033 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -26295,6 +26295,65 @@ ix86_vector_costs::finish_cost (const vector_costs 
> *scalar_costs)
>&& LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
>  m_suggested_epilogue_mode = V8QImode;
>
> +  /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
> + a masked epilogue if that doesn't seem detrimental.  */
> +  if (loop_vinfo
> +  && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
> +  && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
> +  && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
> +  && !OPTION_SET_P (param_vect_partial_vector_usage))
> +{
> +  bool avoid = false;
> +  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
> +   {
> + unsigned int peel_niter
> +   = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
> + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
> +   peel_niter += 1;
> + /* When we know the number of scalar iterations of the epilogue,
> +avoid masking when a single vector epilog iteration handles
> +it in full.  */
> + if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
> +% LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant 
> ()))
> +   avoid = true;
> +   }
> +  if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo
> +   for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
> + {
> +   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
> + ;
> +   else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
> + ;
> +

Re: [PATCH v3 2/5] libstdc++: Check prerequisite of extents::extents.

2025-07-07 Thread Tomasz Kaminski

On Fri, Jul 4, 2025 at 10:30 AM Luc Grosheintz 
wrote:

> Previously the prerequisite of the extents ctors that
>
> static_extent(i) == dynamic_extent || extent(i) == other.extent(i).
>
> was not checked. This commit adds the __glibcxx_assert and test them.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan (extents): Check prerequisite of the ctor that
> static_extent(i) == dynamic_extent || extent(i) == other.extent(i).
> * testsuite/23_containers/mdspan/extents/class_mandates_neg.cc:
> Test the implemented prerequisite.
>
> Signed-off-by: Luc Grosheintz 
> ---
>
LGTM.

>  libstdc++-v3/include/std/mdspan   | 13 +++
>  .../mdspan/extents/class_mandates_neg.cc  |  2 ++
>  .../mdspan/extents/extents_mismatch_neg.cc| 35 +++
>  3 files changed, 50 insertions(+)
>  create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
>
> diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> index cf20553aaa5..1d6cdc93d80 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -110,10 +110,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> return __se;
> }
>
> +   template
> + constexpr bool
> + _S_is_compatible_extents(_GetOtherExtent __get_extent) noexcept
> + {
> +   if constexpr (_OtherRank == _S_rank)
> + for (size_t __i = 0; __i < _S_rank; ++__i)
> +   if (_Extents[__i] != dynamic_extent
> +   && !cmp_equal(_Extents[__i],
> _S_int_cast(__get_extent(__i
> + return false;
> +   return true;
> + }
> +
> template
>   constexpr void
>   _M_init_dynamic_extents(_GetOtherExtent __get_extent) noexcept
>   {
> +
>  __glibcxx_assert(_S_is_compatible_extents<_OtherRank>(__get_extent));
> for (size_t __i = 0; __i < _S_rank_dynamic; ++__i)
>   {
> size_t __di = __i;
> diff --git
> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
> index f9c1c019666..67d18feda96 100644
> ---
> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
> +++
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
> @@ -7,6 +7,8 @@ std::extents e1; // { dg-error
> "from here" }
>  std::extents e2; // { dg-error "from here" }
>  std::extents e3; // { dg-error "from here" }
>  std::extents e4;   // { dg-error "from here" }
> +
>  // { dg-prune-output "dynamic or representable as IndexType" }
>  // { dg-prune-output "signed or unsigned integer" }
>  // { dg-prune-output "invalid use of incomplete type" }
> +// { dg-prune-output "non-constant condition for static assertion" }
> diff --git
> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
> new file mode 100644
> index 000..b35e5310d41
> --- /dev/null
> +++
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
> @@ -0,0 +1,35 @@
> +// { dg-do compile { target c++23 } }
> +#include
> +
> +#include 
> +
> +constexpr size_t dyn = std::dynamic_extent;
> +
> +constexpr bool
> +test_dyn2sta_extents_mismatch_00()
> +{
> +  auto e0 = std::extents{1};
> +  [[maybe_unused]] auto e1 = std::extents{e0};// {
> dg-error "expansion of" }
> +  return true;
> +}
> +static_assert(test_dyn2sta_extents_mismatch_00());// {
> dg-error "expansion of" }
> +
> +constexpr bool
> +test_dyn2sta_extents_mismatch_01()
> +{
> +  [[maybe_unused]] auto e = std::extents{2, 2}; // {
> dg-error "expansion of" }
> +  return true;
> +}
> +static_assert(test_dyn2sta_extents_mismatch_01());   // {
> dg-error "expansion of" }
> +
> +constexpr bool
> +test_dyn2sta_extents_mismatch_02()
> +{
> +  std::array exts{2, 2};
> +  [[maybe_unused]] auto e = std::extents{exts}; // {
> dg-error "expansion of" }
> +  return true;
> +}
> +static_assert(test_dyn2sta_extents_mismatch_02());   // {
> dg-error "expansion of" }
> +
> +// { dg-prune-output "non-constant condition for static assertion" }
> +// { dg-prune-output "__glibcxx_assert" }
> --
> 2.49.0
>
>

[PATCH] tree-optimization/120817 - bogus DSE of .MASK_STORE

2025-07-07 Thread Richard Biener

DSE used ao_ref_init_from_ptr_and_size for .MASK_STORE but
alias-analysis will use the specified size to disambiguate
against smaller objects.  For .MASK_STORE we instead have to
make the access size unspecified but we can still constrain
the access extent based on the maximum size possible.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

No testcase, I'd appreciate a runtime one.

PR tree-optimization/120817
* tree-ssa-dse.cc (initialize_ao_ref_for_dse): Use
ao_ref_init_from_ptr_and_range with unknown size for
.MASK_STORE and .MASK_LEN_STORE.
---
 gcc/tree-ssa-dse.cc | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
index 5ac4280ee36..51a572316cd 100644
--- a/gcc/tree-ssa-dse.cc
+++ b/gcc/tree-ssa-dse.cc
@@ -181,10 +181,10 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write, 
bool may_def_ok = false)
   can provide a may-def variant.  */
if (may_def_ok)
  {
-   ao_ref_init_from_ptr_and_size (
- write, gimple_call_arg (stmt, 0),
- TYPE_SIZE_UNIT (
-   TREE_TYPE (gimple_call_arg (stmt, stored_value_index;
+   ao_ref_init_from_ptr_and_range (
+ write, gimple_call_arg (stmt, 0), true, 0, -1,
+ tree_to_poly_int64 (TYPE_SIZE (
+   TREE_TYPE (gimple_call_arg (stmt, stored_value_index);
return true;
  }
break;
-- 
2.43.0

Re: [PATCH v1] libstdc++: Set FMT for complete C++23 mdspan.

2025-07-07 Thread Tomasz Kaminski

Ah, I just missed the other e-mail, and just found it.
Yes, in that case it makes sense to include it in the patch series.
You may also update the last two commits.

On Mon, Jul 7, 2025 at 1:27 PM Luc Grosheintz 
wrote:

>
>
> On 7/7/25 13:24, Tomasz Kaminski wrote:
> > On Mon, Jul 7, 2025 at 12:34 PM Luc Grosheintz  >
> > wrote:
> >
> >>
> >>
> >> On 7/7/25 11:45, Tomasz Kaminski wrote:
> >>> On Mon, Jul 7, 2025 at 11:41 AM Luc Grosheintz <
> luc.groshei...@gmail.com
> >>>
> >>> wrote:
> >>>
>  libstdc++-v3/ChangeLog:
> 
>    * include/bits/version.def (mdspan): Set to 202207 and
> remove
>    no_stdname.
>    * include/bits/version.h: Regenerate.
>    * testsuite/23_containers/mdspan/mdspan_ftm.cc: Test
> presence
>    of FTM.
> 
>  Signed-off-by: Luc Grosheintz 
>  ---
> 
> >>> Some minor suggestions below. Could you please also mention PR107761 in
> >> the
> >>> commit.
> >>> We have automation that will put that in bugzilla, so people tracking
> >>> issues can see it.
> >>> I would do that in general for any new commits implementing language
> >>> feature.
> >>> Let me know if you will not be able to find a corresponding bugzilla
> >> ticket.
> >>
> >> Makes sense, and I've been horribly inconsistent about this (I
> completely
> >> forgot during all of the layout patches).
> >>
> >> I'll add the updated commit to v4 of the mdspan patch series.
> >>
> > I will be merging patch series for mdspan with changes made locally.
> > Please just send v2 of this patch.
>
> Even though there's a major complaint related to ADL issues in
> swap?
>
> https://gcc.gnu.org/pipermail/libstdc++/2025-July/062380.html
>
> >
> >>
> >>>
> libstdc++-v3/include/bits/version.def | 3 +--
> libstdc++-v3/include/bits/version.h   | 3 ++-
> libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc | 6
> ++
> 3 files changed, 9 insertions(+), 3 deletions(-)
> create mode 100644
>  libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
> 
>  diff --git a/libstdc++-v3/include/bits/version.def
>  b/libstdc++-v3/include/bits/version.def
>  index f4ba501c403..ad909afd20e 100644
>  --- a/libstdc++-v3/include/bits/version.def
>  +++ b/libstdc++-v3/include/bits/version.def
>  @@ -1007,9 +1007,8 @@ ftms = {
> 
> ftms = {
>   name = mdspan;
>  -  no_stdname = true; // FIXME: remove
>   values = {
>  -v = 1; // FIXME: 202207
>  +v = 202207;
> cxxmin = 23;
>   };
> };
>  diff --git a/libstdc++-v3/include/bits/version.h
>  b/libstdc++-v3/include/bits/version.h
>  index dc8ac07be16..72f9231846e 100644
>  --- a/libstdc++-v3/include/bits/version.h
>  +++ b/libstdc++-v3/include/bits/version.h
>  @@ -1126,8 +1126,9 @@
> 
> #if !defined(__cpp_lib_mdspan)
> # if (__cplusplus >= 202100L)
>  -#  define __glibcxx_mdspan 1L
>  +#  define __glibcxx_mdspan 202207L
> #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_mdspan)
>  +#   define __cpp_lib_mdspan 202207L
> #  endif
> # endif
> #endif /* !defined(__cpp_lib_mdspan) &&
> >> defined(__glibcxx_want_mdspan) */
>  diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
>  b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
>  new file mode 100644
>  index 000..fc528293e66
>  --- /dev/null
>  +++ b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan_ftm.cc
> 
> >>> I would name the file just ftm.
> >>>
>  @@ -0,0 +1,6 @@
>  +// { dg-do compile { target c++23 } }
>  +#include 
>  +
>  +#ifndef __cpp_lib_mdspan
>  +#error "Missing FTM"
>  +#endif
> 
> >>> We usually use following to test the feature test macro:
> >>> #ifndef __cpp_lib_ranges_to_container
> >>> # error "Feature test macro for ranges_to_container is missing in
> >> "
> >>> #elif __cpp_lib_ranges_to_container < 202202L
> >>> # error "Feature test macro for ranges_to_container has wrong value in
> >>> "
> >>> #endif
> >>>
> >>>
>  --
>  2.49.0
> 
> 
> >>>
> >>
> >>
> >
>
>

Re: [PATCH 3/7] aarch64: Handle DImode BCAX operations

2025-07-07 Thread Richard Sandiford

Kyrylo Tkachov  writes:
> Hi all,
>
> To handle DImode BCAX operations we want to do them on the SIMD side only if
> the incoming arguments don't require a cross-bank move.
> This means we need to split back the combination to separate GP BIC+EOR
> instructions if the operands are expected to be in GP regs through reload.
> The split happens pre-reload if we already know that the destination will be
> a GP reg. Otherwise if reload descides to use the "=r,r" alternative we ensure
> operand 0 is early-clobber.
> This scheme is similar to how we handle the BSL operations elsewhere in
> aarch64-simd.md.
>
> Thus, for the functions:
> uint64_t bcax_d_gp (uint64_t a, uint64_t b, uint64_t c) { return BCAX (a, b, 
> c); }
> uint64x1_t bcax_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return BCAX 
> (a, b, c); }
>
> we now generate the desired:
> bcax_d_gp:
> bic x1, x1, x2
> eor x0, x1, x0
> ret
>
> bcax_d:
> bcax v0.16b, v0.16b, v1.16b, v2.16b
> ret
>
> When the inputs are in SIMD regs we use BCAX and when they are in GP regs we
> don't force them to SIMD with extra moves.
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
> Ok for trunk?
> Thanks,
> Kyrill
>
> Signed-off-by: Kyrylo Tkachov 
>
> gcc/
>
>   * config/aarch64/aarch64-simd.md (*bcaxqdi4): New
>   define_insn_and_split.
>
> gcc/testsuite/
>
>   * gcc.target/aarch64/simd/bcax_d.c: Add tests for DImode arguments.
>
> From 95268cff1261a7724190dd291f9fcb5a7c817917 Mon Sep 17 00:00:00 2001
> From: Kyrylo Tkachov 
> Date: Thu, 3 Jul 2025 09:45:02 -0700
> Subject: [PATCH 3/7] aarch64: Handle DImode BCAX operations
>
> To handle DImode BCAX operations we want to do them on the SIMD side only if
> the incoming arguments don't require a cross-bank move.
> This means we need to split back the combination to separate GP BIC+EOR
> instructions if the operands are expected to be in GP regs through reload.
> The split happens pre-reload if we already know that the destination will be
> a GP reg.  Otherwise if reload descides to use the "=r,r" alternative we 
> ensure
> operand 0 is early-clobber.
> This scheme is similar to how we handle the BSL operations elsewhere in
> aarch64-simd.md.
>
> Thus, for the functions:
> uint64_t bcax_d_gp (uint64_t a, uint64_t b, uint64_t c) { return BCAX (a, b, 
> c); }
> uint64x1_t bcax_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return BCAX 
> (a, b, c); }
>
> we now generate the desired:
> bcax_d_gp:
> bic x1, x1, x2
> eor x0, x1, x0
> ret
>
> bcax_d:
> bcaxv0.16b, v0.16b, v1.16b, v2.16b
> ret
>
> When the inputs are in SIMD regs we use BCAX and when they are in GP regs we
> don't force them to SIMD with extra moves.
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
>
> Signed-off-by: Kyrylo Tkachov 
>
> gcc/
>
>   * config/aarch64/aarch64-simd.md (*bcaxqdi4): New
>   define_insn_and_split.
>
> gcc/testsuite/
>
>   * gcc.target/aarch64/simd/bcax_d.c: Add tests for DImode arguments.
> ---
>  gcc/config/aarch64/aarch64-simd.md| 29 +++
>  .../gcc.target/aarch64/simd/bcax_d.c  |  6 +++-
>  2 files changed, 34 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 4493e55603d..be6a16b4be8 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -9252,6 +9252,35 @@
>[(set_attr "type" "crypto_sha3")]
>  )
>  
> +(define_insn_and_split "*bcaxqdi4"
> +  [(set (match_operand:DI 0 "register_operand" "=w,&r")
> + (xor:DI
> +   (and:DI
> + (not:DI (match_operand:DI 3 "register_operand" "w,r"))
> + (match_operand:DI 2 "register_operand" "w,r"))
> +   (match_operand:DI 1 "register_operand" "w,r")))]

I think the constraint on operand 1 should be "w,r0", so that we allow
operand 1 to be the same as operand 0.  Without that, and with split1
disabled/sidelined, we would end up with an extra move for:

  uint64_t f(uint64_t x0, uint64_t x1, uint64_t x2) {
return x0 ^ (x1 & ~x2);
  }

(The only reason split1 avoids the extra move is that combine combines
the hard register copy into the *bcaxqdi4, which is a bit dubious from
an RA perspective.)

It would be good to use the new alternative syntax for multi-alternative
instructions.

OK with those changes, thanks.

Richard

> +  "TARGET_SHA3"
> +  "@
> +   bcax\t%0.16b, %1.16b, %2.16b, %3.16b
> +   #"
> +  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
> +  [(set (match_dup 4)
> + (and:DI (not:DI (match_dup 3))
> + (match_dup 2)))
> +   (set (match_dup 0)
> + (xor:DI (match_dup 4)
> + (match_dup 1)))]
> +  {
> +if (reload_completed)
> +  operands[4] = operands[0];
> +else if (can_create_pseudo_p ())
> +  operands[4] = gen_reg_rtx (DImode);
> +else
> +  FAIL;
> +  }
> +  [(set_attr "type" "crypto_sha3,multiple")]
> +)
> +
>  ;; SM3
>  
>  (define_insn "aarch64_sm

RE: [PATCH] tree-optimization/120817 - bogus DSE of .MASK_STORE

2025-07-07 Thread Tamar Christina

> -Original Message-
> From: Richard Biener 
> Sent: Monday, July 7, 2025 12:30 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Tamar Christina 
> Subject: [PATCH] tree-optimization/120817 - bogus DSE of .MASK_STORE
> 
> DSE used ao_ref_init_from_ptr_and_size for .MASK_STORE but
> alias-analysis will use the specified size to disambiguate
> against smaller objects.  For .MASK_STORE we instead have to
> make the access size unspecified but we can still constrain
> the access extent based on the maximum size possible.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> No testcase, I'd appreciate a runtime one.

Tested locally:

/* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-mcpu=neoverse-n2 -O1 -ftree-loop-vectorize" { 
target aarch64*-*-* } */

#include "tree-vect.h"

typedef struct {
int _M_current;
} __normal_iterator;

typedef struct {
char _M_elems[5];
} array_5;

__normal_iterator __trans_tmp_1 = {-5};

__attribute__((noipa))
array_5 copySourceIntoTarget() {
array_5 target;
char* target_it = target._M_elems;

while (__trans_tmp_1._M_current != 0) {
*target_it = 1;
__trans_tmp_1._M_current++;
target_it++;
}

return target;
}

int main ()
{

  check_vect ();

  array_5 res = copySourceIntoTarget();

#pragma GCC novector
  for (int i = 0; i < 5; i++)
if (res._M_elems[i] != 1)
  __builtin_abort ();
}

--

> 
>   PR tree-optimization/120817
>   * tree-ssa-dse.cc (initialize_ao_ref_for_dse): Use
>   ao_ref_init_from_ptr_and_range with unknown size for
>   .MASK_STORE and .MASK_LEN_STORE.
> ---
>  gcc/tree-ssa-dse.cc | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
> index 5ac4280ee36..51a572316cd 100644
> --- a/gcc/tree-ssa-dse.cc
> +++ b/gcc/tree-ssa-dse.cc
> @@ -181,10 +181,10 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write,
> bool may_def_ok = false)
>  can provide a may-def variant.  */
>   if (may_def_ok)
> {
> - ao_ref_init_from_ptr_and_size (
> -   write, gimple_call_arg (stmt, 0),
> -   TYPE_SIZE_UNIT (
> - TREE_TYPE (gimple_call_arg (stmt, stored_value_index;
> + ao_ref_init_from_ptr_and_range (
> +   write, gimple_call_arg (stmt, 0), true, 0, -1,
> +   tree_to_poly_int64 (TYPE_SIZE (
> + TREE_TYPE (gimple_call_arg (stmt, stored_value_index);
>   return true;
> }
>   break;
> --
> 2.43.0

Re: [PATCH 3/7] aarch64: Handle DImode BCAX operations

2025-07-07 Thread Richard Sandiford

Richard Sandiford  writes:
> Kyrylo Tkachov  writes:
>> Hi all,
>>
>> To handle DImode BCAX operations we want to do them on the SIMD side only if
>> the incoming arguments don't require a cross-bank move.
>> This means we need to split back the combination to separate GP BIC+EOR
>> instructions if the operands are expected to be in GP regs through reload.
>> The split happens pre-reload if we already know that the destination will be
>> a GP reg. Otherwise if reload descides to use the "=r,r" alternative we 
>> ensure
>> operand 0 is early-clobber.
>> This scheme is similar to how we handle the BSL operations elsewhere in
>> aarch64-simd.md.
>>
>> Thus, for the functions:
>> uint64_t bcax_d_gp (uint64_t a, uint64_t b, uint64_t c) { return BCAX (a, b, 
>> c); }
>> uint64x1_t bcax_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return BCAX 
>> (a, b, c); }
>>
>> we now generate the desired:
>> bcax_d_gp:
>> bic x1, x1, x2
>> eor x0, x1, x0
>> ret
>>
>> bcax_d:
>> bcax v0.16b, v0.16b, v1.16b, v2.16b
>> ret
>>
>> When the inputs are in SIMD regs we use BCAX and when they are in GP regs we
>> don't force them to SIMD with extra moves.
>>
>> Bootstrapped and tested on aarch64-none-linux-gnu.
>> Ok for trunk?
>> Thanks,
>> Kyrill
>>
>> Signed-off-by: Kyrylo Tkachov 
>>
>> gcc/
>>
>>  * config/aarch64/aarch64-simd.md (*bcaxqdi4): New
>>  define_insn_and_split.
>>
>> gcc/testsuite/
>>
>>  * gcc.target/aarch64/simd/bcax_d.c: Add tests for DImode arguments.
>>
>> From 95268cff1261a7724190dd291f9fcb5a7c817917 Mon Sep 17 00:00:00 2001
>> From: Kyrylo Tkachov 
>> Date: Thu, 3 Jul 2025 09:45:02 -0700
>> Subject: [PATCH 3/7] aarch64: Handle DImode BCAX operations
>>
>> To handle DImode BCAX operations we want to do them on the SIMD side only if
>> the incoming arguments don't require a cross-bank move.
>> This means we need to split back the combination to separate GP BIC+EOR
>> instructions if the operands are expected to be in GP regs through reload.
>> The split happens pre-reload if we already know that the destination will be
>> a GP reg.  Otherwise if reload descides to use the "=r,r" alternative we 
>> ensure
>> operand 0 is early-clobber.
>> This scheme is similar to how we handle the BSL operations elsewhere in
>> aarch64-simd.md.
>>
>> Thus, for the functions:
>> uint64_t bcax_d_gp (uint64_t a, uint64_t b, uint64_t c) { return BCAX (a, b, 
>> c); }
>> uint64x1_t bcax_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return BCAX 
>> (a, b, c); }
>>
>> we now generate the desired:
>> bcax_d_gp:
>> bic x1, x1, x2
>> eor x0, x1, x0
>> ret
>>
>> bcax_d:
>> bcaxv0.16b, v0.16b, v1.16b, v2.16b
>> ret
>>
>> When the inputs are in SIMD regs we use BCAX and when they are in GP regs we
>> don't force them to SIMD with extra moves.
>>
>> Bootstrapped and tested on aarch64-none-linux-gnu.
>>
>> Signed-off-by: Kyrylo Tkachov 
>>
>> gcc/
>>
>>  * config/aarch64/aarch64-simd.md (*bcaxqdi4): New
>>  define_insn_and_split.
>>
>> gcc/testsuite/
>>
>>  * gcc.target/aarch64/simd/bcax_d.c: Add tests for DImode arguments.
>> ---
>>  gcc/config/aarch64/aarch64-simd.md| 29 +++
>>  .../gcc.target/aarch64/simd/bcax_d.c  |  6 +++-
>>  2 files changed, 34 insertions(+), 1 deletion(-)
>>
>> diff --git a/gcc/config/aarch64/aarch64-simd.md 
>> b/gcc/config/aarch64/aarch64-simd.md
>> index 4493e55603d..be6a16b4be8 100644
>> --- a/gcc/config/aarch64/aarch64-simd.md
>> +++ b/gcc/config/aarch64/aarch64-simd.md
>> @@ -9252,6 +9252,35 @@
>>[(set_attr "type" "crypto_sha3")]
>>  )
>>  
>> +(define_insn_and_split "*bcaxqdi4"
>> +  [(set (match_operand:DI 0 "register_operand" "=w,&r")
>> +(xor:DI
>> +  (and:DI
>> +(not:DI (match_operand:DI 3 "register_operand" "w,r"))
>> +(match_operand:DI 2 "register_operand" "w,r"))
>> +  (match_operand:DI 1 "register_operand" "w,r")))]
>
> I think the constraint on operand 1 should be "w,r0", so that we allow
> operand 1 to be the same as operand 0.  Without that, and with split1
> disabled/sidelined, we would end up with an extra move for:
>
>   uint64_t f(uint64_t x0, uint64_t x1, uint64_t x2) {
> return x0 ^ (x1 & ~x2);
>   }
>
> (The only reason split1 avoids the extra move is that combine combines
> the hard register copy into the *bcaxqdi4, which is a bit dubious from
> an RA perspective.)

Sigh.  Wrong way round, of course: it's operands 2 and 3 that can be "w,r0".

Richard

[PATCH] libstdc++: Format %a/%A/%b/%h/%B/%p without using locale::classic [PR110739]

2025-07-07 Thread Tomasz Kamiński

With changes r16-2063-g8ad5968a8dcb47 the _M_a_A, _M_b_B and _M_p functions
are called only if the locale is equal to the locale::classic(), for which
the behavior is know. This patch changes they implementation, so instead of
reffering to __timepunct facet members, they use hardcoded list of English
weekday, months names. Only one list is needed, as in case of locale::classic()
abbreviated name corresponds to first tree letters of the full name.

For _M_p, _M_r we use a new _M_fill_ampm helper, that fills provided buffer
with "AM"/"PM" depending on the hours value.

In _M_S we no longer guard querying of numpuct facet, with check that requires
potentially equally expensive construction of locale::classic. We also mark
localized path as unlikely.

The _M_locale method is no longer used in __formatter_chrono, and thus was
moved to __formatter_duration.

PR libstdc++/110739

libstdc++-v3/ChangeLog:

* include/bits/chrono_io.h (__formatter_chrono::_S_weekdays)
(__formatter_chrono::_S_months, __formatter_chrono::_S_fill_ampm):
Define.
(__formatter_chrono::_M_format_to): Do not pass context parameter
to functions listed below.
(__formatter_chrono::_M_a_A, __formatter_chrono::_M_b_B): Implement
using harcoded list of names, and remove format context parameter.
(__formatter_chrono::_M_p, __formatter_chrono::_M_r): Implement
using _S_fill_ampm.
(__formatter_chrono::_M_c): Removed format context parameter.
(__formatter_chrono::_M_subsecs): Call __ctx.locale() directly,
instead of _M_locale and do not compare with locale::classic().
Add [[unlikely]] attributes.
(__formatter_chrono::_M_locale): Move to __formatter_duration.
(__formatter_duration::_M_locale): Moved from __formatter_chrono.
---
Changing the approach, as we _M_a_A, _M_b_B and _M_p functions are no
longer called for locale other than classic().

Testing on x86_64-linux. Test for std/time/format* already passed.
OK for trunk, when all test passes.

 libstdc++-v3/include/bits/chrono_io.h | 154 +++---
 1 file changed, 87 insertions(+), 67 deletions(-)

diff --git a/libstdc++-v3/include/bits/chrono_io.h 
b/libstdc++-v3/include/bits/chrono_io.h
index 75ee7e818b2..fe3c9126749 100644
--- a/libstdc++-v3/include/bits/chrono_io.h
+++ b/libstdc++-v3/include/bits/chrono_io.h
@@ -881,16 +881,32 @@ namespace __format
   _S_empty_fs()
   { return _Runtime_format_string<_CharT>(_S_empty_spec); }
 
-  // Return the formatting locale.
-  template
-   std::locale
-   _M_locale(_FormatContext& __fc) const
-   {
- if (!_M_spec._M_localized)
-   return std::locale::classic();
- else
-   return __fc.locale();
-   }
+  static constexpr const _CharT* _S_weekdays[]
+  {
+   _GLIBCXX_WIDEN("Sunday"),
+   _GLIBCXX_WIDEN("Monday"),
+   _GLIBCXX_WIDEN("Tuesday"),
+   _GLIBCXX_WIDEN("Wednesday"),
+   _GLIBCXX_WIDEN("Thursday"),
+   _GLIBCXX_WIDEN("Friday"),
+   _GLIBCXX_WIDEN("Saturday"),
+  };
+
+  static constexpr const _CharT* _S_months[]
+  {
+   _GLIBCXX_WIDEN("January"),
+   _GLIBCXX_WIDEN("February"),
+   _GLIBCXX_WIDEN("March"),
+   _GLIBCXX_WIDEN("April"),
+   _GLIBCXX_WIDEN("May"),
+   _GLIBCXX_WIDEN("June"),
+   _GLIBCXX_WIDEN("July"),
+   _GLIBCXX_WIDEN("August"),
+   _GLIBCXX_WIDEN("September"),
+   _GLIBCXX_WIDEN("October"),
+   _GLIBCXX_WIDEN("November"),
+   _GLIBCXX_WIDEN("December"),
+  };
 
 private:
   template
@@ -1051,15 +1067,15 @@ namespace __format
  break;
case 'a':
case 'A':
- __out = _M_a_A(__t._M_weekday, std::move(__out), __fc, __c == 
'A');
+ __out = _M_a_A(__t._M_weekday, std::move(__out), __c == 'A');
  break;
case 'b':
case 'h':
case 'B':
- __out = _M_b_B(__t._M_month, std::move(__out), __fc, __c == 
'B');
+ __out = _M_b_B(__t._M_month, std::move(__out), __c == 'B');
  break;
case 'c':
- __out = _M_c(__t, std::move(__out), __fc);
+ __out = _M_c(__t, std::move(__out));
  break;
case 'C':
case 'y':
@@ -1095,7 +,7 @@ namespace __format
  __out = _M_M(__t._M_minutes, __print_sign());
  break;
case 'p':
- __out = _M_p(__t._M_hours, std::move(__out), __fc);
+ __out = _M_p(__t._M_hours, std::move(__out));
  break;
case 'q':
  __out = _M_q(__t._M_unit_suffix, std::move(__out));
@@ -1104,7 +1120,7 @@ namespace __format
  __out = _M_Q(__t, __print_sign(), __fc);
  break;
case 'r':
-

[PATCH v2] Handle non default git prefix configurations

2025-07-07 Thread Pierre-Emmanuel Patry

Mklog parses the diff content from prepare-commit-msg hook but fails
when git has been configured with mnemonicPrefix. Forcing the default
values for the prefixes would set a distinct diff configuration supported
by mklog and prevent most failures.

contrib/ChangeLog:

* prepare-commit-msg: Force default git prefixes.

Signed-off-by: Pierre-Emmanuel Patry 
---
 contrib/prepare-commit-msg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/prepare-commit-msg b/contrib/prepare-commit-msg
index 1b878772dcc..75d102559c7 100755
--- a/contrib/prepare-commit-msg
+++ b/contrib/prepare-commit-msg
@@ -78,4 +78,4 @@ else
 tee="cat"
 fi
 
-git $cmd | $tee | git gcc-mklog -c "$COMMIT_MSG_FILE"
+git $cmd --default-prefix | $tee | git gcc-mklog -c "$COMMIT_MSG_FILE"
-- 
2.47.2

Re: [PATCH] c-family: Check backend for argument alignment on stack

2025-07-07 Thread Jason Merrill


On 7/1/25 5:36 PM, H.J. Lu wrote:

On Tue, Jul 1, 2025 at 9:37 PM Jason Merrill  wrote:


On 6/30/25 7:03 PM, H.J. Lu wrote:

On Mon, Jun 30, 2025 at 10:36 PM Jason Merrill  wrote:


On 6/28/25 7:00 AM, H.J. Lu wrote:

Since a backend may ignore user type alignment for arguments passed on
stack, check backend for argument alignment on stack when evaluating
__alignof.


I assume that's reflected in DECL_ALIGN, so could we just add PARM_DECL to


No.  targetm.calls.function_arg_boundary may have special handling for it.


Why wouldn't we adjust DECL_ALIGN of the PARM_DECL to reflect the actual
alignment of the argument?  Are you saying it could be different from
one call to another?


Function argument alignment is different from other places in memory if
the main variant type alignment is different:


Yes, I understand that function parameter alignment can be different 
from other objects of that type.


But since we have a PARM_DECL to represent that particular function 
parameter, it seems natural to represent that difference in the 
DECL_ALIGN of the PARM_DECL.  If you don't, its DECL_ALIGN is wrong.


Jason

Re: [PATCH] Handle non default git configurations with mklog

2025-07-07 Thread Pierre-Emmanuel Patry


On 7/7/25 3:41 PM, Alexander Monakov wrote:

Is switching the algorithm actually necessary for solving the problem at hand?
I think it is possible for the default algorithm to produce diffs that lead to
poorly generated changelog entries, so forcing it here seems undesirable.


I was not aware of such effect on changelog entries. I'll apply these 
modifications.



If just --default-prefix is enough, I think you can just add it to the below
line and commit/push.


I do not have write access to the repository.

--
Patry Pierre-Emmanuel
Compiler Engineer - Embecosm


OpenPGP_signature.asc
Description: OpenPGP digital signature

Re: [PATCH] libstdc++: Format %a/%A/%b/%h/%B/%p without using locale::classic [PR110739]

2025-07-07 Thread Tomasz Kaminski

On Mon, Jul 7, 2025 at 5:06 PM Tomasz Kamiński  wrote:

> With changes r16-2063-g8ad5968a8dcb47 the _M_a_A, _M_b_B and _M_p functions
> are called only if the locale is equal to the locale::classic(), for which
> the behavior is know. This patch changes they implementation, so instead of
> reffering to __timepunct facet members, they use hardcoded list of English
> weekday, months names. Only one list is needed, as in case of
> locale::classic()
> abbreviated name corresponds to first tree letters of the full name.
>
> For _M_p, _M_r we use a new _M_fill_ampm helper, that fills provided buffer
> with "AM"/"PM" depending on the hours value.
>
> In _M_S we no longer guard querying of numpuct facet, with check that
> requires
> potentially equally expensive construction of locale::classic. We also mark
> localized path as unlikely.
>
> The _M_locale method is no longer used in __formatter_chrono, and thus was
> moved to __formatter_duration.
>
> PR libstdc++/110739
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/chrono_io.h (__formatter_chrono::_S_weekdays)
> (__formatter_chrono::_S_months, __formatter_chrono::_S_fill_ampm):
> Define.
> (__formatter_chrono::_M_format_to): Do not pass context parameter
> to functions listed below.
> (__formatter_chrono::_M_a_A, __formatter_chrono::_M_b_B): Implement
> using harcoded list of names, and remove format context parameter.
> (__formatter_chrono::_M_p, __formatter_chrono::_M_r): Implement
> using _S_fill_ampm.
> (__formatter_chrono::_M_c): Removed format context parameter.
> (__formatter_chrono::_M_subsecs): Call __ctx.locale() directly,
> instead of _M_locale and do not compare with locale::classic().
> Add [[unlikely]] attributes.
> (__formatter_chrono::_M_locale): Move to __formatter_duration.
> (__formatter_duration::_M_locale): Moved from __formatter_chrono.
> ---
> Changing the approach, as we _M_a_A, _M_b_B and _M_p functions are no
> longer called for locale other than classic().
>
> Testing on x86_64-linux. Test for std/time/format* already passed.
> OK for trunk, when all test passes.
>
x86_64-linux test passed locally.

>
>  libstdc++-v3/include/bits/chrono_io.h | 154 +++---
>  1 file changed, 87 insertions(+), 67 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/chrono_io.h
> b/libstdc++-v3/include/bits/chrono_io.h
> index 75ee7e818b2..fe3c9126749 100644
> --- a/libstdc++-v3/include/bits/chrono_io.h
> +++ b/libstdc++-v3/include/bits/chrono_io.h
> @@ -881,16 +881,32 @@ namespace __format
>_S_empty_fs()
>{ return _Runtime_format_string<_CharT>(_S_empty_spec); }
>
> -  // Return the formatting locale.
> -  template
> -   std::locale
> -   _M_locale(_FormatContext& __fc) const
> -   {
> - if (!_M_spec._M_localized)
> -   return std::locale::classic();
> - else
> -   return __fc.locale();
> -   }
> +  static constexpr const _CharT* _S_weekdays[]
> +  {
> +   _GLIBCXX_WIDEN("Sunday"),
> +   _GLIBCXX_WIDEN("Monday"),
> +   _GLIBCXX_WIDEN("Tuesday"),
> +   _GLIBCXX_WIDEN("Wednesday"),
> +   _GLIBCXX_WIDEN("Thursday"),
> +   _GLIBCXX_WIDEN("Friday"),
> +   _GLIBCXX_WIDEN("Saturday"),
> +  };
> +
> +  static constexpr const _CharT* _S_months[]
> +  {
> +   _GLIBCXX_WIDEN("January"),
> +   _GLIBCXX_WIDEN("February"),
> +   _GLIBCXX_WIDEN("March"),
> +   _GLIBCXX_WIDEN("April"),
> +   _GLIBCXX_WIDEN("May"),
> +   _GLIBCXX_WIDEN("June"),
> +   _GLIBCXX_WIDEN("July"),
> +   _GLIBCXX_WIDEN("August"),
> +   _GLIBCXX_WIDEN("September"),
> +   _GLIBCXX_WIDEN("October"),
> +   _GLIBCXX_WIDEN("November"),
> +   _GLIBCXX_WIDEN("December"),
> +  };
>
>  private:
>template
> @@ -1051,15 +1067,15 @@ namespace __format
>   break;
> case 'a':
> case 'A':
> - __out = _M_a_A(__t._M_weekday, std::move(__out), __fc,
> __c == 'A');
> + __out = _M_a_A(__t._M_weekday, std::move(__out), __c ==
> 'A');
>   break;
> case 'b':
> case 'h':
> case 'B':
> - __out = _M_b_B(__t._M_month, std::move(__out), __fc, __c
> == 'B');
> + __out = _M_b_B(__t._M_month, std::move(__out), __c ==
> 'B');
>   break;
> case 'c':
> - __out = _M_c(__t, std::move(__out), __fc);
> + __out = _M_c(__t, std::move(__out));
>   break;
> case 'C':
> case 'y':
> @@ -1095,7 +,7 @@ namespace __format
>   __out = _M_M(__t._M_minutes, __print_sign());
>   break;
> case 'p':
> - __out = _M_p(__t._M_hours, std::move(__out), __fc);
> +

Re: [PATCH 4/7] aarch64: Use EOR3 for DImode values

2025-07-07 Thread Remi Machet

On 7/7/25 06:18, Kyrylo Tkachov wrote:
> External email: Use caution opening links or attachments
>
>
> Hi all,
>
> Similar to BCAX, we can use EOR3 for DImode, but we have to be careful
> not to force GP<->SIMD moves unnecessarily, so add a splitter for that case.
>
> So for input:
> uint64_t eor3_d_gp (uint64_t a, uint64_t b, uint64_t c) { return EOR3 (a, b, 
> c); }
> uint64x1_t eor3_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return EOR3 
> (a, b, c); }
>
> We generate the desired:
> eor3_d_gp:
> eor x1, x1, x2
> eor x0, x1, x0
> ret
>
> eor3_d:
> eor3 v0.16b, v0.16b, v1.16b, v2.16b
> ret
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
> Ok for trunk?
> Thanks,
> Kyrill
>
> Signed-off-by: Kyrylo Tkachov 
>
> gcc/
>
>  * config/aarch64/aarch64-simd.md (*eor3qdi4): New
>  define_insn_and_split.
>
> gcc/testsuite/
>
>  * gcc.target/aarch64/simd/eor3_d.c: Add tests for DImode operands.

Hi Kyrill,

I assume compact syntax is a no-go because of the different modifiers on 
operand 0 ('=' and '&')?

Also, shouldn't the second variant use '=&r' for operand 0 instead of '&r'?

Remi

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-07 Thread Siddhesh Poyarekar

On 2025-07-07 09:33, Siddhesh Poyarekar wrote:
The only difference between &a->fam[0] and &a->fam is not the value 
(that is

the same), just the type in one case say int *, in the other int [0:] *.
At least in GIMPLE pointer conversions are useless, so what exact type of
the argument is doesn't matter that much, but it matters e.g. when you're
dereferencing it.

Yes, that's why I'm thinking that we could use that flexibility match 
the type passed to .ACCESS_WITH_SIZE with that in 
__builtin_dynamic_object_size.  There it's almost always a->fam or &a- 
 >fam[0] in practice, rarely ever &a->fam.

Even in the rare case of __builtin_dynamic_object_size being passed &a- 
 >fam, shouldn't a->fam = .ACCESS_WITH_SIZE(a->fam, ...) ought to be 
sufficient in its minimal function of preventing reordering?  There's no 
actual dereference in .ACCESS_WITH_SIZE (it's a nop in practice, just a 
reordering barrier until the __builtin_dynamic_object_size call is 
replaced), so maybe we could do this?

I apologize, I based my understanding of .ACCESS_WITH_SIZE based on my 
faulty memory from discussions last year and not on what actually got 
implemented.  Upon looking closer, this looks wrong to me for pointer:

  tree first_param = is_fam ? array_to_pointer_conversion (loc, ref)
 : build_unary_op (loc, ADDR_EXPR, ref, false);

  /* The result type of the call is a pointer to the original type
 of the ref.  */
  tree result_type = c_build_pointer_type (TREE_TYPE (ref));

We shouldn't be building an ADDR_EXPR for pointers, that's not 
equivalent to what's happening for FAM.  For the FAM case, the 
array_to_pointer conversion will make a cast (TYPE *)a->fam.  So for 
analogous treatment for pointers, it should be passed on directly, no 
need to build the ADDR_EXPR.

Also, I don't see why the result_type needs to be deduced separately 
instead of simply passing on TREE_TYPE (first_param), since 
array_to_pointer_conversion should set the type of the pointer to be the 
same as the element type.

Thanks,
Sid

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-07 Thread Siddhesh Poyarekar


On 2025-07-07 13:07, Qing Zhao wrote:

If the current bug is urgent to be fixed. and you are not comfortable with the 
simple Patch Sid provided, then I am okay to back it out now and then push it 
back with the fix to this current bug at a later time after everyone is 
comfortable with the current implementation.



Please back out the patchset for now and repost the series with the fix 
for 120929.  I agree with Richi and Jakub that my dereference hack is 
not a good idea.


Thanks,
Sid

Fix profile scaling in tree-inline.cc:initialize_cfun

2025-07-07 Thread Jan Hubicka

Hi,
initialize_cfun calls
 profile_count::adjust_for_ipa_scaling (&num, &den);
but then the result is never used.  This patch fixes it.  Overall scalling
of entry/exit block is bit sloppy in tree-inline.  I see if I can clean it up.

Bootstrapped/regtested x86_64-linux, comitted.

* tree-inline.cc (initialize_cfun): Use num and den for scaling.
diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc
index 7e0ac698e5e..e8fe035b180 100644
--- a/gcc/tree-inline.cc
+++ b/gcc/tree-inline.cc
@@ -2888,11 +2888,9 @@ initialize_cfun (tree new_fndecl, tree callee_fndecl, 
profile_count count)
   profile_count::adjust_for_ipa_scaling (&num, &den);
 
   ENTRY_BLOCK_PTR_FOR_FN (cfun)->count =
-ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count,
-   ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count);
+ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (num, den);
   EXIT_BLOCK_PTR_FOR_FN (cfun)->count =
-EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count,
-   ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count);
+EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (num, den);
   if (src_cfun->eh)
 init_eh_for_function ();

Re: [PATCH, v2] Fortran: fix minor issues with coarrays (extended)

2025-07-07 Thread Steve Kargl

On Mon, Jul 07, 2025 at 08:53:16PM +0200, Harald Anlauf wrote:
> Andre,
> 
> I still don't get it, and the present version made it worse for me...
> 
> So let's see what I was thinking.  There are the following types of
> functions:
> 
> (0) impure, non-elemental functions, which likely have side-effects
> 
> (1) pure functions (in the f95 sense), i.e. pure non-elemental
> 
> (2) pure elemental functions (in the f95 sense)
> 
> (3) impure elemental functions (>= f2008)

It's now a bit more complicated.  There are SIMPLE procedures,
but gfortran does not support that prefix yet.

> Note that I understand "pure elemental" being different from
> "pure and elemental" as used in the comment: the first version
> really means both pure and elemental, the second could be read
> as either pure or elemental or pure elemental.  A native speaker
> may correct me if I am wrong...
> 
> Back to gfortran: we have in decl.cc::gfc_match_prefix
> 
>   /* If IMPURE it not seen but the procedure is ELEMENTAL, mark it as PURE.
> */
>   if (!seen_impure && current_attr.elemental && !current_attr.pure)
> {
>   if (!gfc_add_pure (¤t_attr, NULL))
>   goto error;
> }
> 
> This explains the possible attributes we should see.

The standardese is

  F2023, 15.9 Elemental procedures
  ...

  An elemental subprogram has the prefix-spec ELEMENTAL.  An
  elemental subprogram is a pure subprogram unless it has the
  prefix-spec IMPURE.


I think the rest of your observations/questions are spot-on.

-- 
steve

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-07 Thread Siddhesh Poyarekar


On 2025-07-07 17:47, Jakub Jelinek wrote:

Even 6 arguments is IMHO too much.
/* Expand the IFN_ACCESS_WITH_SIZE function:
ACCESS_WITH_SIZE (REF_TO_OBJ, REF_TO_SIZE, CLASS_OF_SIZE,
  TYPE_OF_SIZE, ACCESS_MODE)
which returns the REF_TO_OBJ same as the 1st argument;

1st argument REF_TO_OBJ: The reference to the object;
2nd argument REF_TO_SIZE: The reference to the size of the object,
3rd argument CLASS_OF_SIZE: The size referenced by the REF_TO_SIZE 
represents
  0: the number of bytes.
  1: the number of the elements of the object type;
4th argument TYPE_OF_SIZE: A constant 0 with its TYPE being the same as the 
TYPE
 of the object referenced by REF_TO_SIZE


Wouldn't this always be TREE_TYPE(TREE_TYPE(REF_TO_SIZE))?


5th argument ACCESS_MODE:
 -1: Unknown access semantics
  0: none
  1: read_only
  2: write_only
  3: read_write
6th argument: A constant 0 with the pointer TYPE to the original flexible
  array type.


Likewise, wouldn't this always be TREE_TYPE(TREE_TYPE(REF_TO_OBJ))?  For 
a FAM, the frontend does array_to_pointer, so with the INDIRECT_REF at 
the end of build_access_with_size_for_counted_by gone, I reckon you 
should be able to get the type of the array element.  Likewise if it was 
a pointer and not a FAM.


TYPE_SIZE_UNIT may not work for them like you said, but there ought to 
be a usable expression that we can reach from the type, no?


Thanks,
Sid

[FYI] [vxworks] add aarch64 to vxworks-dummy.h set

2025-07-07 Thread Alexandre Oliva



It's not strictly necessary, because nothing defined therein is
referenced by anything in gcc/config/aarch64, but it was an oversight
to not have it there.

Tested with aarch64-vx7r2.  I'm checking it in.


for  gcc/ChangeLog

* config.gcc (vxworks-dummy.h): Add to aarch64-*-* as well.
---
 gcc/config.gcc |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index a6f6efec4e103..5953ace0afff0 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -5894,7 +5894,7 @@ esac
 # distinguish VxWorks variants such as VxWorks 7 or 64).
 
 case ${target} in
-arm*-*-* | i[34567]86-*-* | mips*-*-* | powerpc*-*-* | sh*-*-* \
+aarch64*-*-* | arm*-*-* | i[34567]86-*-* | mips*-*-* | powerpc*-*-* | sh*-*-* \
 | sparc*-*-* | x86_64-*-*)
tm_file="vxworks-dummy.h ${tm_file}"
;;


-- 
Alexandre Oliva, happy hackerhttps://blog.lx.oliva.nom.br/
Free Software Activist FSFLA co-founder GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity.
Excluding neuro-others for not behaving ""normal"" is *not* inclusive!

Re: [AutoFDO] Fix get_original_name to strip only names that are generated after auto-profile

2025-07-07 Thread Jan Hubicka

Hi,
there are two bugs in get_original_name.  First the for loop walking
list of known suffixes uses sizeos (suffixes).  It evnetually walks to
an empty suffix.  Second problem is that strcmp may accept suffixes that
are longer.  I.e.  mix up .isra with .israabc.  This is probably not a
big deal but the first bug makes get_original_name to effectively strip
all suffixes, even important one on my setup.

Bootstrapped/regtesed x86_64-linux, committed.

gcc/ChangeLog:

* auto-profile.cc (get_original_name): Fix loop walking the
suffixes.

diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
index a970eb8972f..8419aa5b4a9 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -622,9 +638,11 @@ get_original_name (const char *name, bool alloc = true)
 }
   /* Suffixes of clones that compiler generates after auto-profile.  */
   const char *suffixes[] = {"isra", "constprop", "lto_priv", "part", "cold"};
-  for (unsigned i = 0; i < sizeof (suffixes); ++i)
+  for (unsigned i = 0; i < sizeof (suffixes) / sizeof (const char *); ++i)
 {
-  if (strncmp (next_dot + 1, suffixes[i], strlen (suffixes[i])) == 0)
+  int len = strlen (suffixes[i]);
+  if (len == last_dot - next_dot - 1
+ && strncmp (next_dot + 1, suffixes[i], strlen (suffixes[i])) == 0)
{
  *next_dot = 0;
  return get_original_name (ret, false);

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-07 Thread Qing Zhao

Hi, 
Sorry for the late reply. And thanks a lot for all the help so far. 

For the documantion of the .ACCESS_WITH_SIZE for pointers, please see the 
following in c/c-typeck.cc :


/* Given a COMPONENT_REF REF with the location LOC, the corresponding
   COUNTED_BY_REF, and the COUNTED_BY_TYPE, generate the corresponding
   call to the internal function .ACCESS_WITH_SIZE.

   Generate an INDIRECT_REF to a call to the internal function
   .ACCESS_WITH_SIZE.

   REF

   to:

   (*.ACCESS_WITH_SIZE (REF, COUNTED_BY_REF, 1, (TYPE_OF_SIZE)0, -1,
(TYPE_OF_ARRAY *)0))

   NOTE: The return type of this function is the POINTER type pointing
   to the original flexible array type or the original pointer type.
   Then the type of the INDIRECT_REF is the original flexible array type
   or the original pointer type.

   The 4th argument of the call is a constant 0 with the TYPE of the
   object pointed by COUNTED_BY_REF.

   The 6th argument of the call is a constant 0 of the same TYPE as
   the return type of the call.

  */
static tree
build_access_with_size_for_counted_by (location_t loc, tree ref,
   tree counted_by_ref,
   tree counted_by_type)

Qing

> On Jul 7, 2025, at 02:05, Richard Biener  wrote:
> 
> On Sat, Jul 5, 2025 at 2:10 PM Siddhesh Poyarekar  wrote:
>> 
>> On 2025-07-05 07:23, Richard Biener wrote:
 OK, should I revert right away or can we wait till Qing returns on Monday?
>>> 
>>> Monday is OK with me.
>>> 
>> 
>> Thanks, so I thought about this some more and I think when I said in
>> bugzilla:
>> 
>> "In fact, maybe the .ACCESS_WITH_SIZE handling in objsz probably needs
>> improvement to express it better, but that's an orthogonal matter."
>> 
>> I had the right intuition but I was completely wrong about it being an
>> orthogonal matter.  That *is* the issue and it only becomes relevant
>> when the member being described is a pointer and not a FAM.  e.g. for
>> the following:
>> 
>> ```
>> struct A
>> {
>>   int count;
>> #ifndef PTR
>>   char c[] __attribute ((__counted_by__ (count)));
>> #else
>>   char *c __attribute ((__counted_by__ (count)));
>> #endif
>> } a;
>> 
>> unsigned long
>> foo (struct A *a)
>> {
>>   return __builtin_dynamic_object_size (a->c, 1);
>> }
>> ```
>> 
>> the .ACCESS_WITH_SIZE abstraction records the size using &a->c:
>> 
>>   _2 = &a->c;
>>   _3 = &a->count;
>>   _1 = .ACCESS_WITH_SIZE (_2, _3, 1, 0, -1, 0B);
>>   D.2964 = __builtin_dynamic_object_size (_1, 1);
>> 
>> this doesn't make a difference when c is an array since the & operator
>> is a nop.  However when the same is applied to the pointer a->c, it
>> becomes an additional dereference, which changes the semantic meaning:
>> 
>>   _2 = &a->c;
>>   _3 = &a->count;
>>   _1 = .ACCESS_WITH_SIZE (_2, _3, 1, 0, -1, 0B);
>>   _4 = *_1;
>>   D.2964 = __builtin_dynamic_object_size (_4, 1);
>> 
>> Since the intent of the .ACCESS_WITH_SIZE was to associate the storage
>> of count with c to prevent reordering, maybe the semantically correct
>> solution here is that when c is a pointer, the frontend emits:
>> 
>>   _2 = a->c;
>>   _3 = &a->count;
>>   _1 = .ACCESS_WITH_SIZE (_2, _3, 1, 0, -1, 0B);
>>   D.2964 = __builtin_dynamic_object_size (_, 1);
>> 
>> so a->c instead of &a->c.
> 
> Yes.  That's what I'd have expected happens?  I thought .ACCESS_WITH_SIZE
> annotates the pointer, it doesn't perform an access itself - correct?  Where
> is .ACCESS_WITH_SIZE documented?  I can't find it documented in the
> internals manual, internal-fn.def has
> 
> /* A function to associate the access size and access mode information
>   with the corresponding reference to an object.  It only reads from the
>   2nd argument.  */
> DEF_INTERNAL_FN (ACCESS_WITH_SIZE, ECF_PURE | ECF_LEAF | ECF_NOTHROW, NULL)
> 
> that suggests .ACCESS_WITH_SIZE performs a read on the size.  It doesn't
> say what the function returns at all.
> 
> Is the above only happening
> when using __builtin_dynamic_object_size (_1, 1) or also when performing
> an actual access like
> 
> return a->c[i];
> 
> ?
> 
>> In fact, maybe taking the address of a->c
>> doesn't make sense in general and .ACCESS_WITH_SIZE should always be the
>> above even for FAM?  Does that sound correct?
>> 
>> Sid

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-07 Thread Jakub Jelinek

On Mon, Jul 07, 2025 at 09:18:53PM +, Qing Zhao wrote:
> From OLD:
> 
>  _2 = &a->c;
>  _3 = &a->count;
>  _1 = .ACCESS_WITH_SIZE (_2, _3, 1, 0, -1, 0B);
>  _4 = *_1;
>  D.2964 = __builtin_dynamic_object_size (_4, 1);
> 
> To NEW:
> 
>  _2 = a->c;
>  _3 = &a->count;
>  _1 = .ACCESS_WITH_SIZE (_2, _3, 1, 0, -1, 0B, 0);
>  D.2964 = __builtin_dynamic_object_size (_, 1);
> 
> 
> NOTE, in the above, in addition to pass “a->c” instead of “&a->c” as the 
> first parameter,  I also
> added one more argument for .ACCESS_WITH_SIZE:
> 
> +   the 7th argument of the call is 1 when for FAM, 0 for pointers.
> 
> To distinguish whether this .ACCESS_WITH_SIZE is for FAM or for pointers. 
> And this argument will be used in tree-object-size.cc 
>  to get the element_type of the associated FAM 
> or pointer array.

Even 6 arguments is IMHO too much.
/* Expand the IFN_ACCESS_WITH_SIZE function:
   ACCESS_WITH_SIZE (REF_TO_OBJ, REF_TO_SIZE, CLASS_OF_SIZE,
 TYPE_OF_SIZE, ACCESS_MODE)
   which returns the REF_TO_OBJ same as the 1st argument;

   1st argument REF_TO_OBJ: The reference to the object;
   2nd argument REF_TO_SIZE: The reference to the size of the object,
   3rd argument CLASS_OF_SIZE: The size referenced by the REF_TO_SIZE represents
 0: the number of bytes.
 1: the number of the elements of the object type;
   4th argument TYPE_OF_SIZE: A constant 0 with its TYPE being the same as the 
TYPE
of the object referenced by REF_TO_SIZE
   5th argument ACCESS_MODE:
-1: Unknown access semantics
 0: none
 1: read_only
 2: write_only
 3: read_write
   6th argument: A constant 0 with the pointer TYPE to the original flexible
 array type.

I agree with argument 1 and 2 and agree we need 2 INTEGER_CST arguments with
the 2 pointer types.  Nobody says those 2 arguments have to be 0 though,
they can be some other INTEGER_CST, similarly how MEM_REF's second argument
is INTEGER_CST with type meaning something and value something different.
Perhaps one can be that -1/0/1/2/3 and another one a bitmask for the
remaining flags, or one can be say 0/1/2/3/4 ored with 0/8 ored with 0/16.

Though, it is unclear to me how the "the number of the elements of the
object type" actually works.  If the FAM has constant sized elements
or pointer points to constant sized element, I agree you can just grab the
size from TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (gimple_call_arg (call, 5
But what if the FAM has a variable length type or it is pointer to VLA?
Trying to use TYPE_SIZE_UNIT will not really work well in that case, while
perhaps during gimplification it will be gimplified and exist, later
optimizations will not see it being used and can optimize it away.
If all you care is to get the size from that, why don't you just pass
the size as the argument?  So instead of that 0: the number of bytes
1: the number of the elements of the object type + the former 6th
argument just pass one argument, 1 if it is the "the number of bytes"
case and some other number, the size of the element.  So in all cases
the size in bytes is effectively *(type_of_size *)ref_to_size * eltsz
This argument would be INTEGER_CST whenever it is not VLA or the VLA size.

Jakub

[PATCH] cobol: Implement CXXFLAGS_FOR_COBOL.

2025-07-07 Thread Robert Dubner

I have elsewhere described my frustration in trying, during development,
to set more stringent error-finding and warning-generating compilation
options.  But they seem to require the use of CXXFLAGS, which in at least
the case of trying to set -std=c++14, causes libcody's compilation to
fail, since it wants, specifically c++11.

So, I dove in and figured out how to implement CXXFLAGS_FOR_COBOL.  This
new flag applies only to compilations of c++ programs in the gcc/cobol
source code tree.

These changes were tested with a bootstrap build of
--enable-languages=c,c++,fortran,cobol.

Is this okay for trunk?

Thanks.

Bob Dubner


>From ff295ac4ba9728fdd8d97fa73754cd17a21d0f18 Mon Sep 17 00:00:00 2001
From: Robert Dubner 
Date: Mon, 7 Jul 2025 16:34:09 -0400
Subject: [PATCH] cobol: Implement CXXFLAGS_FOR_COBOL.

It's useful during development to be able to specify compilation options
that
apply only to the COBOL front end.  These changes provide the ability to
set persistent flags at configure time:

CXXFLAGS_FOR_COBOL=xxx ../configure 

while providing the ability to subsequently override them at build time

make CXXFLAGS_FOR_COBOL=yyy

ChangeLog:

* configure.ac: Create output variable @CXXFLAGS_FOR_COBOL@.
* Makefile.def: Include CXXFLAGS_FOR_COBOL in flags_to_pass.
* Makefile.tpl: Include CXXFLAGS_FOR_COBOL in HOST_EXPORTS;
incorporate @CXXFLAGS_FOR_COBOL@.
* configure: Regenerated.
* Makefile.in: Regenerated

gcc/cobol/ChangeLog:

* Make-lang.in: Use CXXFLAGS_FOR_COBOL in gcc/cobol compilations.
---
 Makefile.def   |  1 +
 Makefile.in|  3 +++
 Makefile.tpl   |  2 ++
 configure  |  2 ++
 configure.ac   |  1 +
 gcc/cobol/Make-lang.in | 20 
 6 files changed, 29 insertions(+)

diff --git a/Makefile.def b/Makefile.def
index fa60f6ea0b9..bd17c89b875 100644
--- a/Makefile.def
+++ b/Makefile.def
@@ -304,6 +304,7 @@ flags_to_pass = { flag= BOOT_CFLAGS ; };
 flags_to_pass = { flag= BOOT_LDFLAGS ; };
 flags_to_pass = { flag= CFLAGS ; };
 flags_to_pass = { flag= CXXFLAGS ; };
+flags_to_pass = { flag= CXXFLAGS_FOR_COBOL ; };
 flags_to_pass = { flag= LDFLAGS ; };
 flags_to_pass = { flag= LIBCFLAGS ; };
 flags_to_pass = { flag= LIBCXXFLAGS ; };
diff --git a/Makefile.in b/Makefile.in
index 12d4395d8e2..1ab2efbc612 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -203,6 +203,7 @@ HOST_EXPORTS = \
CONFIG_SHELL="$(SHELL)"; export CONFIG_SHELL; \
CXX="$(CXX)"; export CXX; \
CXXFLAGS="$(CXXFLAGS)"; export CXXFLAGS; \
+   CXXFLAGS_FOR_COBOL="$(CXXFLAGS_FOR_COBOL)"; export
CXXFLAGS_FOR_COBOL; \
GFORTRAN="$(GFORTRAN)"; export GFORTRAN; \
GOC="$(GOC)"; export GOC; \
GDC="$(GDC)"; export GDC; \
@@ -451,6 +452,7 @@ CFLAGS = @CFLAGS@
 LDFLAGS = @LDFLAGS@
 LIBCFLAGS = $(CFLAGS)
 CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_COBOL = @CXXFLAGS_FOR_COBOL@
 LIBCXXFLAGS = $(CXXFLAGS) -fno-implicit-templates
 GOCFLAGS = $(CFLAGS)
 GDCFLAGS = @GDCFLAGS@
@@ -882,6 +884,7 @@ BASE_FLAGS_TO_PASS = \
"BOOT_LDFLAGS=$(BOOT_LDFLAGS)" \
"CFLAGS=$(CFLAGS)" \
"CXXFLAGS=$(CXXFLAGS)" \
+   "CXXFLAGS_FOR_COBOL=$(CXXFLAGS_FOR_COBOL)" \
"LDFLAGS=$(LDFLAGS)" \
"LIBCFLAGS=$(LIBCFLAGS)" \
"LIBCXXFLAGS=$(LIBCXXFLAGS)" \
diff --git a/Makefile.tpl b/Makefile.tpl
index ddcca558913..047845b7692 100644
--- a/Makefile.tpl
+++ b/Makefile.tpl
@@ -206,6 +206,7 @@ HOST_EXPORTS = \
CONFIG_SHELL="$(SHELL)"; export CONFIG_SHELL; \
CXX="$(CXX)"; export CXX; \
CXXFLAGS="$(CXXFLAGS)"; export CXXFLAGS; \
+   CXXFLAGS_FOR_COBOL="$(CXXFLAGS_FOR_COBOL)"; export
CXXFLAGS_FOR_COBOL; \
GFORTRAN="$(GFORTRAN)"; export GFORTRAN; \
GOC="$(GOC)"; export GOC; \
GDC="$(GDC)"; export GDC; \
@@ -454,6 +455,7 @@ CFLAGS = @CFLAGS@
 LDFLAGS = @LDFLAGS@
 LIBCFLAGS = $(CFLAGS)
 CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_COBOL = @CXXFLAGS_FOR_COBOL@
 LIBCXXFLAGS = $(CXXFLAGS) -fno-implicit-templates
 GOCFLAGS = $(CFLAGS)
 GDCFLAGS = @GDCFLAGS@
diff --git a/configure b/configure
index ccec3f21cd8..c8e6d729f1b 100755
--- a/configure
+++ b/configure
@@ -624,6 +624,7 @@ RANLIB_PLUGIN_OPTION
 AR_PLUGIN_OPTION
 PKG_CONFIG_PATH
 GDCFLAGS
+CXXFLAGS_FOR_COBOL
 READELF
 OTOOL
 OBJDUMP
@@ -14218,6 +14219,7 @@ fi
 
 
 
+
 GDCFLAGS=${GDCFLAGS-${CFLAGS}}
 
 
diff --git a/configure.ac b/configure.ac
index 55d10265958..6153b1d7957 100644
--- a/configure.ac
+++ b/configure.ac
@@ -4010,6 +4010,7 @@ AC_SUBST(CC)
 AC_SUBST(CXX)
 AC_SUBST(CFLAGS)
 AC_SUBST(CXXFLAGS)
+AC_SUBST(CXXFLAGS_FOR_COBOL)
 AC_SUBST(GDC)
 AC_SUBST(GDCFLAGS)
 GDCFLAGS=${GDCFLAGS-${CFLAGS}}
diff --git a/gcc/cobol/Make-lang.in b/gcc/cobol/Make-lang.in
index 18eb3b0f1e5..1e60cea060e 100644
--- a/gcc/cobol/Make-lang.in
+++ b/gcc/cobol/Make-lang.in
@@ -384,3 +384,23 @@ cobol.stagefeedback: stagefeedback-start
 selftest-cobol:
 
 lang_checks += check-cobol
+
+#
+# Front-end spec

Re: [PATCH] cobol: Implement CXXFLAGS_FOR_COBOL.

2025-07-07 Thread Rainer Orth

Hi Robert,

> I have elsewhere described my frustration in trying, during development,
> to set more stringent error-finding and warning-generating compilation
> options.  But they seem to require the use of CXXFLAGS, which in at least
> the case of trying to set -std=c++14, causes libcody's compilation to
> fail, since it wants, specifically c++11.
>
> So, I dove in and figured out how to implement CXXFLAGS_FOR_COBOL.  This
> new flag applies only to compilations of c++ programs in the gcc/cobol
> source code tree.
>
> These changes were tested with a bootstrap build of
> --enable-languages=c,c++,fortran,cobol.
>
> Is this okay for trunk?

I think there's a far easier way which doesn't require any patch.  Have
a look at toplevel configure.ac:

# When bootstrapping with GCC, build stage 1 in C++14 mode to ensure that a
# C++14 compiler can still start the bootstrap.  Otherwise, if building GCC,
# require C++14 (or higher).
if test "$enable_bootstrap:$GXX" = "yes:yes"; then
  CXX="$CXX -std=c++14"
elif test "$have_compiler" = yes; then
  AX_CXX_COMPILE_STDCXX(14)

  if test "${build}" != "${host}"; then 
AX_CXX_COMPILE_STDCXX(14, [], [], [_FOR_BUILD])
  fi
fi

You should be able to achieve what you want by building with CXX='g++
-std=c++14', just as one can build a 32-bit gcc with a 64-bit host
compiler using CXX='g++ -m32'.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University

Re: [PATCH] libstdc++: Make VERIFY a variadic macro

2025-07-07 Thread Tomasz Kaminski

On Wed, Jul 2, 2025 at 12:31 PM Jonathan Wakely  wrote:

> This defines the testsuite assertion macro VERIFY so that it allows
> un-parenthesized expressions containing commas. This matches how assert
> is defined in C++26, following the approval of P2264R7.
>
> The primary motivation is to allow expressions that the preprocessor
> splits into multiple arguments, e.g.
> VERIFY( vec == std::vector{1,2,3,4} );
>
> To achieve this, VERIFY is redefined as a variadic macro and then the
> arguments are grouped together again through the use of __VA_ARGS__.
>
> The implementation is complex due to the following points:
>
> - The arguments __VA_ARGS__ are contextually-converted to bool, so that
>   scoped enums and types that are not contextually convertible to bool
>   cannot be used with VERIFY.
> - bool(__VA_ARGS__) is used so that multiple arguments (i.e. those which
>   are separated by top-level commas) are ill-formed. Nested commas are
>   allowed, but likely mistakes such as VERIFY( cond, "some string" ) are
>   ill-formed.
> - The bool(__VA_ARGS__) expression needs to be unevaluated, so that we
>   don't evaluate __VA_ARGS__ more than once. The simplest way to do that
>   would be just sizeof bool(__VA_ARGS__), without parentheses to avoid a
>   vexing parse for VERIFY(bool(i)). However that wouldn't work for e.g.
>   VERIFY( []{ return true; }() ), because lambda expressions are not
>   allowed in unevaluated contexts until C++20. So we use another
>   conditional expression with bool(__VA_ARGS__) as the unevaluated
>   operand.
>
> libstdc++-v3/ChangeLog:
>
> * testsuite/util/testsuite_hooks.h (VERIFY): Define as variadic
> macro.
> * testsuite/ext/verify_neg.cc: New test.
> ---
>
> Tested powerpc64le-linux.
>
LGTM. I have followed the discussion on the LWG reflector on it.

>
>  libstdc++-v3/testsuite/ext/verify_neg.cc  | 28 +++
>  libstdc++-v3/testsuite/util/testsuite_hooks.h | 17 +--
>  2 files changed, 35 insertions(+), 10 deletions(-)
>  create mode 100644 libstdc++-v3/testsuite/ext/verify_neg.cc
>
> diff --git a/libstdc++-v3/testsuite/ext/verify_neg.cc
> b/libstdc++-v3/testsuite/ext/verify_neg.cc
> new file mode 100644
> index ..ce033741beeb
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/ext/verify_neg.cc
> @@ -0,0 +1,28 @@
> +// { dg-do compile { target c++11 } }
> +
> +#include 
> +
> +struct X { explicit operator void*() const { return nullptr; } };
> +
> +void
> +test_VERIFY(int i)
> +{
> +  // This should not be parsed as a function type bool(bool(i)):
> +  VERIFY( bool(i) );
> +
> +  // This should not produce warnings about lambda in unevaluated context:
> +  VERIFY( []{ return 1; }() );
> +
> +  // Only one expression allowed:
> +  VERIFY(1, 2); // { dg-error "in expansion of macro" }
> +  // { dg-error "compound expression in functional cast" "" { target
> *-*-* } 0 }
> +
> +  // A scoped enum is not contextually convertible to bool:
> +  enum class E { E0 };
> +  VERIFY( E::E0 ); // { dg-error "could not convert" }
> +
> +  // explicit conversion to void* is not contextually convertible to bool:
> +  X x;
> +  VERIFY( x ); // { dg-error "in expansion of macro" }
> +  // { dg-error "invalid cast .* to type 'bool'" "" { target *-*-* } 0 }
> +}
> diff --git a/libstdc++-v3/testsuite/util/testsuite_hooks.h
> b/libstdc++-v3/testsuite/util/testsuite_hooks.h
> index faa01ba6abd8..bf34fd121c1b 100644
> --- a/libstdc++-v3/testsuite/util/testsuite_hooks.h
> +++ b/libstdc++-v3/testsuite/util/testsuite_hooks.h
> @@ -58,16 +58,13 @@
>  # define _VERIFY_PRINT(S, F, L, P, C) __builtin_printf(S, F, L, P, C)
>  #endif
>
> -#define VERIFY(fn)  \
> -  do\
> -  { \
> -if (! (fn))
>   \
> -  {
>   \
> -   _VERIFY_PRINT("%s:%d: %s: Assertion '%s' failed.\n",\
> - __FILE__, __LINE__, __PRETTY_FUNCTION__, #fn);\
> -   __builtin_abort();  \
> -  }
>   \
> -  } while (false)
> +#define VERIFY(...)\
> +   ((void)((__VA_ARGS__)   \
> +? (void)(true ? true : bool(__VA_ARGS__))  \
> +: (_VERIFY_PRINT("%s:%d: %s: Assertion '%s' failed.\n",\
> + __FILE__, __LINE__, __PRETTY_FUNCTION__,  \
> + #__VA_ARGS__),\
> +   __builtin_abort(
>
>  #ifdef _GLIBCXX_HAVE_UNISTD_H
>  # include 
> --
> 2.50.0
>
>

Re: [PATCH][RFC] phiopt: Optimize A < 0 ? ARG1 OP 2^n-1 : ARG1

2025-07-07 Thread Raphael Zinsly

On Mon, Jun 30, 2025 at 11:29 AM Jeff Law  wrote:
> So I think the overall direction is:
>
> 1. Try to do the translation in match.pd
> 2. Target the conditional move form
> 3. Adjust expanders and/or target bits as necessary to avoid regressions

That works for me too, I'll start working on it.


Thank you all for the comments,
-- 
Raphael Moreira Zinsly

1 2 >

1 - 100 of 157 matches

Mail list logo