[Bug jit/112603] Allow setting the personality function

2023-11-18 Thread redi at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112603

--- Comment #2 from Jonathan Wakely  ---
Patches should be sent to the mailing list, not here. Please see
https://gcc.gnu.org/contribute.html

[Bug jit/112603] Allow setting the personality function

2023-11-18 Thread sjames at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112603

Sam James  changed:

   What|Removed |Added

 CC||sjames at gcc dot gnu.org

--- Comment #3 from Sam James  ---
I think his workflow is to file a bug for it, attach the patch, then send it to
the ML.

I'm not sure why though -- IMO there's not much value in filing a bug for it if
you're not going to describe much inside of it, but I guess it could be useful
as a personal way of tracking sent vs. merged patches...?

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread sjames at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #8 from Sam James  ---
Bringing MLRegallocEvictAdvisor.cpp.o into a good build then relinking
lib/libLLVMCodeGen.so.17 is enough to break it.

Some selected diffs:
```
│  llvm::NextPowerOf2(unsigned long):
│  /home/sam/git/llvm-project/llvm/include/llvm/Support/MathExtras.h:356
│   inc%rdi
│  unsigned int const& std::max(unsigned int const&, unsigned int
const&):
│ -/tmp/build-reverted/include/c++/14.0.0/bits/stl_algobase.h:262
│ +/tmp/build/include/c++/14.0.0/bits/stl_algobase.h:262
│   cmp%eax,%edi
│   cmovb  %eax,%edi
│  llvm::DenseMap, llvm::detail::DenseMapPair >::grow
(unsigned int):
│  /home/sam/git/llvm-project/llvm/include/llvm/ADT/DenseMap.h:892
│   mov%edi,0x18(%rbx)
│  allocateBuckets():
│  /home/sam/git/llvm-project/llvm/include/llvm/ADT/DenseMap.h:899
│ @@ -321,9 +321,9 @@
│  /home/sam/git/llvm-project/llvm/include/llvm/ADT/DenseMap.h:658
(discriminator 3)
│   cmp%r10d,%edx
│   je 135 ,
llvm::detail::DenseMapPair >::grow(unsigned int)+0x135>
│   mov0xc(%rsp),%r15d
│   jmp199 ,
llvm::detail::DenseMapPair >::grow(unsigned int)+0x199>
│  initEmpty():
│  /home/sam/git/llvm-project/llvm/include/llvm/ADT/DenseMap.h:658
(discriminator 3)
│ - call   288 <.LC133>
│ + call   288 <.LC133+0x8>


```
│  std::unique_ptr >::operator[](unsigned long)
const:
│ -/tmp/build-reverted/include/c++/14.0.0/bits/unique_ptr.h:726 (discriminator
4)
│ +/tmp/build/include/c++/14.0.0/bits/unique_ptr.h:726 (discriminator 4)
│   lea0x0(%rip),%rcx# 16f7 <(anonymous
namespace)::MLEvictAdvisor::extractFeatures(llvm::SmallVectorImpl const&, llvm::SmallVectorImpl&, unsigned long, long,
 long, float, llvm::SmallVectorImpl&) const [clone
.isra.0]+0x16f7>
│   R_X86_64_PC32   .LC78-0x4
│   lea0x0(%rip),%rdx# 16fe <(anonymous
namespace)::MLEvictAdvisor::extractFeatures(llvm::SmallVectorImpl const&, llvm::SmallVectorImpl&, unsigned long, long,
 long, float, llvm::SmallVectorImpl&) const [clone
.isra.0]+0x16fe>
│   R_X86_64_PC32   .LC79-0x4
│   mov$0x2d6,%esi
│   lea0x0(%rip),%rdi# 170a <(anonymous
namespace)::MLEvictAdvisor::extractFeatures(llvm::SmallVectorImpl const&, llvm::SmallVectorImpl&, unsigned long, long,
 long, float, llvm::SmallVectorImpl&) const [clone
.isra.0]+0x170a>
│   R_X86_64_PC32   .LC80-0x4
│ @@ -2665,9 +2665,9 @@
│   vxorps %xmm3,%xmm3,%xmm3
│   shr%rdx
│   or %rax,%rdx
│   vcvtsi2ss %rdx,%xmm3,%xmm0
│   vaddss %xmm0,%xmm0,%xmm1
│   jmp472 <(anonymous
namespace)::MLEvictAdvisor::extractFeatures(llvm::SmallVectorImpl const&, llvm::SmallVectorImpl&, unsigned long, long, long,
float, llvm::Small
VectorImpl&) const [clone .isra.0]+0x472>
│   mov%rax,%rbx
│ - jmp1ad0 <.LC100+0x8d8>
│ + jmp1ad0 <.LC100+0x8f0>
```

```
│  extractInstructionFeatures(llvm::SmallVectorImpl&,
llvm::MLModelRunner*, llvm::function_ref,
llvm::function_ref, llvm::function_ref, int, int, int, int, llvm::SlotIndex):
│ -/tmp/build-reverted/include/c++/14.0.0/bits/stl_algo.h:1897
│ +/tmp/build/include/c++/14.0.0/bits/stl_algo.h:1897
│   cmp%rcx,%r12
│  __sort&,
llvm::MLModelRunner*, llvm::function_ref,
llvm::function_r
ef,
llvm::function_ref, int, int, int,
int, llvm::SlotIndex):: > >():
│ -/tmp/build-reverted/include/c++/14.0.0/bits/stl_algo.h:1897
│ +/tmp/build/include/c++/14.0.0/bits/stl_algo.h:1897
│   je e8
&,
llvm::MLModelRunner*, llvm::function_ref,
llvm::function_ref,
llvm::function_ref, int, int, int,
int, llvm::SlotIndex)+0xe8>
│ -/tmp/build-reverted/include/c++/14.0.0/bits/stl_algo.h:1900
│ +/tmp/build/include/c++/14.0.0/bits/stl_algo.h:1900
│   mov%rbx,%rax
│   movabs $0xaaab,%rdx
│ -/tmp/build-reverted/include/c++/14.0.0/bits/stl_algo.h:1899
│ - mov%rcx,%rsi
│ +/tmp/build/include/c++/14.0.0/bits/stl_algo.h:1899
│ + mov%r15,%rsi
│   mov%r12,%rdi
│ -/tmp/build-reverted/include/c++/14.0.0/bits/stl_algo.h:1900
│ +/tmp/build/include/c++/14.0.0/bits/stl_algo.h:1900
│   sar$0x3,%rax
│ - mov%rcx,%r15
│   imul   %rdx,%rax
│  long std::__lg(long):
```

```
│  __sort&,
llvm::MLModelRunner*, llvm::function_ref,
llvm::function_ref,
llvm::function_ref, int, int, int,
int, llvm::SlotIndex):: > >():
│ -/tmp/build-reverted/include/c++/14.0.0/bits/stl_algo.h:1899
│ +/tmp/build/include/c++/14.0.0/bits/stl_algo.h:1899
│   add%rdx,%rdx
│ - call   a0
&,
llvm::MLModelRunner*, llvm::function_ref,
llvm::function_ref, llvm::functi
on_ref, int, int, int, int,
llvm::SlotIndex)+0xa0>
│ + call   9d
&,
llvm::MLModelRunner*, llvm::function_ref,
llvm::function_ref, llvm::functi
on_ref, int, int, int, int,
llvm::SlotIndex)+0x9d>
│   R_X86_64_PC32  
.text._ZSt16__introsort_loopIP14LRStartEndInfolN9__gnu_cxx5__ops15_Iter_comp_iterIZ26extractInstructionFeaturesRN4llvm15SmallVectorImplIS0_EEPNS5_13MLModelRunnerENS5_

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread sjames at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #9 from Sam James  ---
*
https://dev.gentoo.org/~sam/bugs/gcc/gcc-llvm-x86/MLRegallocEvictAdvisor.cpp.o-bad.xz
*
https://dev.gentoo.org/~sam/bugs/gcc/gcc-llvm-x86/MLRegallocEvictAdvisor.cpp.o-good.xz

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread sjames at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #10 from Sam James  ---
Created attachment 56629
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=56629&action=edit
MLRegallocEvictAdvisor.cpp.ii.xz

Attaching preprocesed sources from:
```
/tmp/build/bin/g++ -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GNU_SOURCE
-D_LIBCPP_ENABLE_HARDENED_MODE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
-D__STDC_LIMIT_MACROS -I/home/sam/data/build/llvm-project-bad/lib/CodeGen
-I/home/sam/git/llvm-project/llvm/lib/CodeGen
-I/home/sam/data/build/llvm-project-bad/include
-I/home/sam/git/llvm-project/llvm/include -O3 -march=znver2 -ggdb3 -fPIC
-fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time
-fno-lifetime-dse -Wall -Wextra -Wno-unused-parameter -Wwrite-strings
-Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long
-Wimplicit-fallthrough -Wno-maybe-uninitialized -Wno-nonnull
-Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move
-Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment
-Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color
-ffunction-sections -fdata-sections -O2 -g -DNDEBUG -std=c++17 -fPIC -UNDEBUG
-MD -MT lib/CodeGen/CMakeFiles/LLVMCodeGen.dir/MLRegallocEvictAdvisor.cpp.o -MF
lib/CodeGen/CMakeFiles/LLVMCodeGen.dir/MLRegallocEvictAdvisor.cpp.o.d -o
lib/CodeGen/CMakeFiles/LLVMCodeGen.dir/MLRegallocEvictAdvisor.cpp.o -c
/home/sam/git/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
-save-temps
```

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread sjames at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #11 from Sam James  ---
The first pass to differ is expand.

Some selected diffs:
```
void llvm::cl::opt >::~opt (struct opt * const
this)
{
  bool [-(*)-]{+(*)+} (union _Any_data & {ref-all}, const union
_Any_data & {ref-all}, _Manager_operation) _15;
  union _Any_data * {ref-all} _16;
  char * _17;
  const char[16] * _18;
@@ -71879,7 +71879,7 @@ _32 replace with --> _32 = _31 + 1;

void llvm::cl::opt >::~opt (struct opt * const
this)
{
  bool [-(*)-]{+(*)+} (union _Any_data & {ref-all}, const union
_Any_data & {ref-all}, _Manager_operation) _15;
  union _Any_data * {ref-all} _16;
  char * _17;
  const char[16] * _18;
@@ -72195,14 +72195,14 @@ void llvm::cl::opt
>::~opt (struct opt * const
```

```
@@ -205739,7 +205739,7 @@ void llvm::cl::opt
>::opt)-]{+(*)+} (union _Any_data & {ref-all}, const union
_Any_data & {ref-all}, _Manager_operation) _77;
  union _Any_data * {ref-all} _78;
  unsigned long _79;
  struct basic_string * _80;
@@ -206849,7 +206849,7 @@ void llvm::cl::opt
>::opt)-]{+(*)+} (union _Any_data & {ref-all}, const union
_Any_data & {ref-all}, _Manager_operation) _77;
  union _Any_data * {ref-all} _78;
  unsigned long _79;
  struct basic_string * _80;
```

```
{
  bool [-(*)-]{+(*)+} (union _Any_data & {ref-all}, const union
_Any_data & {ref-all}, _Manager_operation) _12;
  union _Any_data * {ref-all} _13;
  char * _14;
  const char[16] * _15;
@@ -427838,7 +427838,7 @@ _29 replace with --> _29 = _28 + 1;

void llvm::cl::opt >::~opt (struct opt * const
this)
{
  bool [-(*)-]{+(*)+} (union _Any_data & {ref-all}, const union
_Any_data & {ref-all}, _Manager_operation) _12;
  union _Any_data * {ref-all} _13;
  char * _14;
  const char[16] * _15;
```

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread sjames at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #12 from Sam James  ---
Created attachment 56630
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=56630&action=edit
MLRegallocEvictAdvisor.cpp.cpp.262r.expand-bad.xz

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread sjames at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #13 from Sam James  ---
Created attachment 56631
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=56631&action=edit
MLRegallocEvictAdvisor.cpp.cpp.262r.expand-good.xz

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread jakub at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #14 from Jakub Jelinek  ---
Try to use optimize (0) attribute or corresponding pragma on some functions in
that file (just those where bad vs. good results in different code generation)
to find out which one is problematic?

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread sjames at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #15 from Sam James  ---
Looks like it's extractInstructionFeatures
(https://github.com/llvm/llvm-project/blob/llvmorg-17.0.5/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp#L943).

Adding __attribute__((optimize("O0"))) to it makes the test pass.

[Bug target/112547] 9% exec time regression of 462.libquantum SPEC on AMD zen4 CPU

2023-11-18 Thread fkastl at suse dot cz via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112547

--- Comment #8 from Filip Kastl  ---
I've just ran the test on another zen4 machine. Between the originally
mentioned commits g:53010f6ff6dfbf7b and g:1a55724f7870719d there was only 1%
slowdown on this other machine. I guess this means that the 9% slowdown is
specific to the machine where we measure the data I sent.

Since there seems to be no reason why there should be a general zen4 slowdown
between the two commits and if there are no objections, I'll mark this bug as
RESOLVED WORKSFORME.

[Bug target/112604] New: [ia64] Output register not preserved after a branch is not taken

2023-11-18 Thread jakub at jermar dot eu via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112604

Bug ID: 112604
   Summary: [ia64] Output register not preserved after a branch is
not taken
   Product: gcc
   Version: 13.2.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: target
  Assignee: unassigned at gcc dot gnu.org
  Reporter: jakub at jermar dot eu
  Target Milestone: ---

After an upgrade to GCC 13.2 the HelenOS VFS server started to crash on IA-64
(see http://www.helenos.org/ticket/864). I looked into the issue and the
problem seems to be that in the following code snippet:

fibril_mutex_lock(&vfs_data->lock);
if (!vfs_data->files) {
vfs_data->files = malloc(VFS_MAX_OPEN_FILES * sizeof(vfs_file_t *));
if (!vfs_data->files) {
fibril_mutex_unlock(&vfs_data->lock);
return false;
}
memset(vfs_data->files, 0, VFS_MAX_OPEN_FILES * sizeof(vfs_file_t *));
}
fibril_mutex_unlock(&vfs_data->lock);

The output argument prepared for the possible call to malloc (1024 in this
case) destroys the argument for fibril_mutex_unlock() if the branch is not
taken.

In assembly it looks like this:

40001a00 <_vfs_fd_alloc>:
40001a00:   08 48 39 18 80 05   [MMI]   alloc
r41=ar.pfs,14,12,0
40001a06:   c0 02 80 00 42 00   mov r44=r32
40001a0c:   05 00 c4 00 mov r40=b0
40001a10:   09 38 01 41 00 21   [MMI]   adds r39=64,r32
40001a16:   a0 02 04 00 42 40   mov r42=r1
40001a1c:   04 10 41 00 zxt1 r34=r34;;
40001a20:   11 28 fd 01 00 24   [MIB]   mov r37=127
40001a26:   b0 02 04 65 00 00   mov.i r43=ar.lc
40001a2c:   e8 b4 01 50 br.call.sptk.many
b0=4001cf00 ;;
40001a30:   08 60 01 40 00 21   [MMI]   mov r44=r32
40001a36:   e0 00 9c 30 20 20   ld8 r14=[r39]
40001a3c:   00 50 01 84 mov r1=r42
40001a40:   0a 68 05 00 00 24   [MMI]   mov r45=1;;
40001a46:   c0 02 00 10 48 e0   mov r44=1024
40001a4c:   00 70 18 e4 cmp.eq p7,p6=0,r14
40001a50:   16 00 00 00 00 c8   [BBB]   nop.b 0x0
40001a56:   01 f0 01 80 21 00 (p07) br.cond.dpnt.few
40001e30 <_vfs_fd_alloc+0x430>
40001a5c:   10 00 00 40 br.few
40001a60 <_vfs_fd_alloc+0x60>
40001a60:   11 00 00 00 01 00   [MIB]   nop.m 0x0
40001a66:   00 00 00 02 00 00   nop.i 0x0
40001a6c:   28 ba 01 50 br.call.sptk.many
b0=4001d480 ;;
40001a70:   08 60 01 40 00 21   [MMI]   mov r44=r32

The out0 is r44 in this context. Note how it is first correctly restored to the
mutex address at address 1a30 after the fibril_mutex_lock call. But then this
value is not used and gets rewritten to 1024 at address 1a46 in preparation for
a possible branch and a consequent call to malloc. If the branch is taken, the
register is restored properly (not shown here), but if the branch is not taken
at address 1a56, the call to fibril_mutex_unlock at address 1a6c is made with a
wrong value of r44.

We used the following command line to compile the above snippet:
usr/local/cross/bin/ia64-helenos-gcc -Iuspace/srv_vfs.p -Iuspace
-I../../../uspace -fdiagnostics-color=always -D_FILE_OFFSET_BITS=64 -Wall
-Winvalid-pch -Wextra -std=gnu11 -imacros
/home/jermar/software/HelenOS/helenos/build_all/ia64/ski/config.h -O3
-fexec-charset=UTF-8 -finput-charset=UTF-8 -D_HELENOS_SOURCE
-Wa,--fatal-warnings -Wall -Wextra -Wwrite-strings -Wunknown-pragmas
-Wno-unused-parameter -pipe -ffunction-sections -fdata-sections -fno-common
-fdebug-prefix-map=/home/jermar/software/HelenOS/helenos/=
-fdebug-prefix-map=../../= -Wsystem-headers -Werror -Wmissing-prototypes
-Werror-implicit-function-declaration -Wno-missing-braces
-Wno-missing-field-initializers -Wno-unused-parameter -Wno-clobbered
-Wno-nonnull-compare -fno-builtin-strftime -isystem../../../common/include
-isystem../../../abi/include -isystem../../../abi/arch/ia64/include
-isystem../../../uspace/lib/c/arch/ia64/include
-isystem../../../uspace/lib/c/include -D__LE__ -fno-unwind-tables -MD -MQ
uspace/srv_vfs.p/srv_vfs_vfs_file.c.o -MF
uspace/srv_vfs.p/srv_vfs_vfs_file.c.o.d -o
uspace/srv_vfs.p/srv_vfs_vfs_file.c.o -c ../../../uspace/srv/vfs/vfs_file.c

$ /usr/local/cross/bin/ia64-helenos-gcc -v 
   
1209ms  Sat 18 Nov 2023 12:42:07 PM UTC
Using built-in specs.
COLLECT_GCC=/usr/local/cross/bin/ia64-helenos-gcc
C

[Bug target/112604] [ia64] Output register not preserved after a branch is not taken

2023-11-18 Thread sjames at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112604

Sam James  changed:

   What|Removed |Added

   See Also||http://www.helenos.org/tick
   ||et/864

--- Comment #1 from Sam James  ---
Could you attach bad/good object files and preprocessed source for vfs_file.c?

[Bug c/112605] New: ICE: in gen_reg_rtx, at emit-rtl.cc:1176

2023-11-18 Thread iamanonymous.cs at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112605

Bug ID: 112605
   Summary: ICE: in gen_reg_rtx, at emit-rtl.cc:1176
   Product: gcc
   Version: unknown
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c
  Assignee: unassigned at gcc dot gnu.org
  Reporter: iamanonymous.cs at gmail dot com
  Target Milestone: ---

***
OS and Platform:
$ uname -a:
Linux ubuntu 4.15.0-213-generic #224-Ubuntu SMP Mon Jun 19 13:30:12 UTC 2023
x86_64 x86_64 x86_64 GNU/Linux
***
gcc version:
$ gcc -v
Using built-in specs.
COLLECT_GCC=/root/gcc_set/202311021000/bin/gcc
COLLECT_LTO_WRAPPER=/root/gcc_set/202311021000/libexec/gcc/x86_64-pc-linux-gnu/14.0.0/lto-wrapper
Target: x86_64-pc-linux-gnu
Configured with: ../gcc/configure --prefix=/root/gcc_set/202311021000
--with-gmp=/root/build_essential --with-mpfr=/root/build_essential
--with-mpc=/root/build_essential --enable-languages=c,c++ --disable-multilib
--with-sanitizer=address,undefined,thread,leak
Thread model: posix
Supported LTO compression algorithms: zlib
gcc version 14.0.0 20231102 (experimental) (GCC)

git version: d508a94167c186b2baacc679896e2809554c0b99
***
Program:
$ cat prog.c
int x;
int y;

void __attribute__((noinline)) f1(void)
{
x++;
}

static __attribute__((noinline)) void f3(void)
{
y++;
}

void f2()
{
f1();
f3();
f1();
}
***
Command Lines:
$ gcc -c -O2 -mforce-indirect-call -fsplit-stack prog.c
during RTL pass: pro_and_epilogue
prog.c: In function ‘f3’:
prog.c:16:1: internal compiler error: in gen_reg_rtx, at emit-rtl.cc:1176
   16 | }
  | ^
0x7658e3 gen_reg_rtx(machine_mode)
../../gcc/gcc/emit-rtl.cc:1176
0xbcc1ae copy_to_mode_reg(machine_mode, rtx_def*)
../../gcc/gcc/explow.cc:650
0x14c9538 ix86_expand_call(rtx_def*, rtx_def*, rtx_def*, rtx_def*, rtx_def*,
bool)
../../gcc/gcc/config/i386/i386-expand.cc:9707
0x13d7702 ix86_expand_split_stack_prologue()
../../gcc/gcc/config/i386/i386.cc:10486
0x18fe57a gen_split_stack_prologue()
../../gcc/gcc/config/i386/i386.md:18094
0x13bea25 target_gen_split_stack_prologue
../../gcc/gcc/config/i386/i386.md:17849
0xc62e0a make_split_prologue_seq
../../gcc/gcc/function.cc:5785
0xc62e0a make_split_prologue_seq
../../gcc/gcc/function.cc:5778
0xc6301a thread_prologue_and_epilogue_insns()
../../gcc/gcc/function.cc:6039
0xc63752 rest_of_handle_thread_prologue_and_epilogue
../../gcc/gcc/function.cc:6543
0xc63752 execute
../../gcc/gcc/function.cc:6624
Please submit a full bug report, with preprocessed source (by using
-freport-bug).
Please include the complete backtrace with any bug report.
See  for instructions.

[Bug target/112604] [ia64] Output register not preserved after a branch is not taken

2023-11-18 Thread jakub at jermar dot eu via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112604

--- Comment #2 from Jakub Jermar  ---
Created attachment 56632
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=56632&action=edit
Requested object and preprocessed source files

Here are the bad and good object and preprocessed sources fies for vfs_file.c.
Note that the difference between the good and bad version is -O3 (bad) v.s -O2
(good), but it was the same compiler. I am not sure this is going to be very
helpful as with -O2 the code is quite different and the offending function is
not inlined. If desired, I may try to go back to GCC 8.2 (the last good version
known to me) and try to provide a good file generated with the same compiler
flags. Let me know if this would be more useful.

I also attached the entire binary of the VFS server, both good and bad
versions. Note these are HelenOS binaries and need to be run in the environment
of the HelenOS operating system, which might not be practical for you.

[Bug rtl-optimization/112606] New: [14 Regression] powerpc64le-linux-gnu: 'FAIL: gcc.target/powerpc/p8vector-fp.c scan-assembler xsnabsdp'

2023-11-18 Thread tschwinge at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112606

Bug ID: 112606
   Summary: [14 Regression] powerpc64le-linux-gnu: 'FAIL:
gcc.target/powerpc/p8vector-fp.c scan-assembler
xsnabsdp'
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Keywords: testsuite-fail
  Severity: normal
  Priority: P3
 Component: rtl-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: tschwinge at gcc dot gnu.org
CC: pinskia at gcc dot gnu.org, xry111 at gcc dot gnu.org
  Target Milestone: ---
Target: powerpc64le-linux-gnu

If my tracking is to be believed, the recent commit
r14-5542-g9e9279fadbd1c673c875b9d20261d2de0473f63f "Only allow (copysign x,
NEG_CONST) -> (fneg (fabs x)) simplification for constant folding [PR112483]"
causes a regression for powerpc64le-linux-gnu:

@@ -169558,7 +169764,7 @@ PASS: gcc.target/powerpc/p8vector-fp.c
scan-assembler xsdivdp
PASS: gcc.target/powerpc/p8vector-fp.c scan-assembler xsdivsp
PASS: gcc.target/powerpc/p8vector-fp.c scan-assembler xsmuldp
PASS: gcc.target/powerpc/p8vector-fp.c scan-assembler xsmulsp
[-PASS:-]{+FAIL:+} gcc.target/powerpc/p8vector-fp.c scan-assembler xsnabsdp
PASS: gcc.target/powerpc/p8vector-fp.c scan-assembler xsnegdp
PASS: gcc.target/powerpc/p8vector-fp.c scan-assembler xssqrtdp
PASS: gcc.target/powerpc/p8vector-fp.c scan-assembler xssqrtsp

With that commit reverted (PASS) vs. non-reverted (FAIL):

--- p8vector-fp.s   2023-11-18 14:22:23.862421425 +0100
+++ p8vector-fp.s   2023-11-18 14:11:49.554421425 +0100
@@ -33,13 +33,15 @@
 0: addis 2,12,.TOC.-.LCF1@ha
addi 2,2,.TOC.-.LCF1@l
.localentry nabs_sf,.-nabs_sf
+   addis 9,2,.LC0@toc@ha
lxsspx 32,0,3
 #APP
  # 16 "[...]/source-gcc/gcc/testsuite/gcc.target/powerpc/p8vector-fp.c" 1
# reg 32
  # 0 "" 2
 #NO_APP
-   xsnabsdp 1,32
+   lfs 1,.LC0@toc@l(9)
+   xscpsgndp 1,1,32
blr
.long 0
.byte 0,0,0,0,0,0,0,0
@@ -201,13 +203,15 @@
 0: addis 2,12,.TOC.-.LCF9@ha
addi 2,2,.TOC.-.LCF9@l
.localentry nabs_df,.-nabs_df
+   addis 9,2,.LC2@toc@ha
lxsdx 32,0,3
 #APP
  # 77 "[...]/source-gcc/gcc/testsuite/gcc.target/powerpc/p8vector-fp.c" 1
# reg 32
  # 0 "" 2
 #NO_APP
-   xsnabsdp 1,32
+   lfd 1,.LC2@toc@l(9)
+   xscpsgndp 1,1,32
blr
.long 0
.byte 0,0,0,0,0,0,0,0
@@ -338,5 +342,14 @@
.cfi_endproc
 .LFE15:
.size   sqrt_df,.-sqrt_df
+   .section.rodata.cst4,"aM",@progbits,4
+   .align 2
+.LC0:
+   .long   -1082130432
+   .section.rodata.cst8,"aM",@progbits,8
+   .align 3
+.LC2:
+   .long   0
+   .long   -1074790400
.gnu_attribute 4, 1
.section.note.GNU-stack,"",@progbits

[Bug rtl-optimization/112606] [14 Regression] powerpc64le-linux-gnu: 'FAIL: gcc.target/powerpc/p8vector-fp.c scan-assembler xsnabsdp'

2023-11-18 Thread tnfchris at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112606

Tamar Christina  changed:

   What|Removed |Added

 CC||tnfchris at gcc dot gnu.org

--- Comment #1 from Tamar Christina  ---
This looks like a target bug. You seem to have an fneg (fabs (..)) instruction
on powerpc.  This means your copysign pattern needs to either reject the
copysign expansion when the second operand is negative, or it needs to emit
xsnabsdp in this case rather than copysign.

The generic optimization is correct and is doing what the target has requested,
your copysign optab implementation says you prefer it over for any operand.

[Bug target/112604] [ia64] Output register not preserved after a branch is not taken

2023-11-18 Thread jakub at jermar dot eu via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112604

--- Comment #3 from Jakub Jermar  ---
Created attachment 56633
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=56633&action=edit
Updated requested good object file for vfs_file.c

Seems like -O3 -fno-unswitch-loops makes the bug go away. See the new
attachment for a better example of the good binary and the object file. With
this optimization disabled the argument for the malloc/calloc is set only after
the branch is taken so it does not destroy the argument for
fibril_mutex_unlock():

40001a00 <_vfs_fd_alloc>:
40001a00:   08 48 3d 1a 80 05   [MMI]   alloc
r41=ar.pfs,15,13,0
40001a06:   d0 02 80 00 42 60   mov r45=r32
40001a0c:   05 00 cc 00 mov r43=pr
40001a10:   09 38 01 41 00 21   [MMI]   adds r39=64,r32
40001a16:   a0 02 04 00 42 40   mov r42=r1
40001a1c:   04 10 41 00 zxt1 r34=r34;;
40001a20:   11 80 00 44 91 39   [MIB]   cmp4.eq
p16,p17=0,r34
40001a26:   80 02 00 62 00 00   mov r40=b0
40001a2c:   68 b2 01 50 br.call.sptk.many
b0=4001cc80 ;;
40001a30:   08 68 01 40 00 21   [MMI]   mov r45=r32
40001a36:   44 02 00 00 42 20 (p16) mov r36=r0
40001a3c:   00 50 01 84 mov r1=r42
40001a40:   09 70 00 4e 18 10   [MMI]   ld8 r14=[r39]
40001a46:   54 02 00 00 42 c0 (p16) mov r37=r0
40001a4c:   15 00 00 90 mov r46=1;;
40001a50:   38 22 e1 01 07 64   [MMB] (p17) mov r36=1016
40001a56:   54 fa 03 00 48 00 (p17) mov r37=127
40001a5c:   00 00 00 20 nop.b 0x0
40001a60:   11 38 00 1c 06 39   [MIB]   cmp.eq p7,p6=0,r14
40001a66:   c0 02 04 65 80 03   mov.i r44=ar.lc
40001a6c:   f0 03 00 43   (p07) br.cond.dpnt.few
40001e50 <_vfs_fd_alloc+0x450>;;
40001a70:   10 00 00 00 01 00   [MIB]   nop.m 0x0
40001a76:   00 00 00 02 00 00   nop.i 0x0
40001a7c:   10 00 00 40 br.few
40001a80 <_vfs_fd_alloc+0x80>
40001a80:   11 00 00 00 01 00   [MIB]   nop.m 0x0
40001a86:   00 00 00 02 00 00   nop.i 0x0
40001a8c:   88 b7 01 50 br.call.sptk.many
b0=4001d200 ;;
40001a90:   11 68 01 40 00 21   [MIB]   mov r45=r32

40001e50:   11 68 01 00 08 24   [MIB]   mov r45=1024
40001e56:   00 00 00 02 00 00   nop.i 0x0
40001e5c:   f8 bd 00 50 br.call.sptk.many
b0=4000dc40 ;;


[Bug libstdc++/112607] New: : _Normalize does not consider char_type for the basic_string_view case

2023-11-18 Thread hewillk at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112607

Bug ID: 112607
   Summary: : _Normalize does not consider char_type for
the basic_string_view case
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: libstdc++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: hewillk at gmail dot com
  Target Milestone: ---

When T in basic_format_arg(T& v) is a specialization of basic_string_view or
basic_string, format#arg-6.8 indicates: 

otherwise, if TD is a specialization of basic_string_view or basic_string and
TD​::​value_type is char_type, initializes value with
basic_string_view(v.data(), v.size());

We need to consider TD​::​value_type is char_type.

However, libstd++ only uses __is_specialization_of to detect whether T is a
specialization of basic_string_view or basic_string (format#L3118-L3121):

else if constexpr (__is_specialization_of<_Td, basic_string_view>)
  return type_identity>();
else if constexpr (__is_specialization_of<_Td, basic_string>)
  return type_identity>();

This causes basic_format_arg to incorrectly use wstring_view to initialize
string_view when customizing std::wstring.

https://godbolt.org/z/6Kd16z8qK

   #include 

   template<>
   struct std::formatter : std::formatter {
 auto format(const std::wstring& obj, auto& ctx) const {
 return std::formatter::format(" ", ctx);
   }
  };

  int main(){
std::wstring wstr;
std::string str = std::format("{}", wstr);
  }

[Bug rtl-optimization/96388] scheduling takes forever with -fPIC

2023-11-18 Thread mkuvyrkov at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96388

Maxim Kuvyrkov  changed:

   What|Removed |Added

 Status|NEW |ASSIGNED

--- Comment #15 from Maxim Kuvyrkov  ---
Finished analysis.  Will post a patch next week.

[Bug tree-optimization/112608] New: Missed-optimization: Multiple Division Constants

2023-11-18 Thread goon.pri.low at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112608

Bug ID: 112608
   Summary: Missed-optimization: Multiple Division Constants
   Product: gcc
   Version: 13.2.1
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: goon.pri.low at gmail dot com
  Target Milestone: ---

The following C code on all optimization levels uses a division instruction:

int inst_division(int a, int b) {
if (b < 10)
b = 7;
else
b = 13;

return a / b;
}

inst_division:
cmp esi, 9
mov edx, 7
mov ecx, 13
mov eax, edi
cmovle  ecx, edx
cdq
idivecx
ret

This could be avoided since we know all the possible divisors and instead could
do multiplications as shown in the following code:

int constant_division(int a, int b) {
if (b < 10)
return a / 7;
else
return a / 13;
}

constant_division:
movsx   rax, edi
cmp esi, 9
jg  .L6
imulrax, rax, -1840700269
shr rax, 32
add eax, edi
sar edi, 31
sar eax, 2
sub eax, edi
ret
.L6:
imulrax, rax, 1321528399
sar edi, 31
sar rax, 34
sub eax, edi
ret

[Bug libstdc++/112607] : _Normalize does not consider char_type for the basic_string_view case

2023-11-18 Thread redi at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112607

--- Comment #1 from Jonathan Wakely  ---
That is not a valid specialization since it doesn't depend on a program-defined
type.

[Bug libstdc++/112607] : _Normalize does not consider char_type for the basic_string_view case

2023-11-18 Thread redi at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112607

--- Comment #2 from Jonathan Wakely  ---
basic_string would be a valid program-defined specialization
though.

[Bug tree-optimization/22196] Missed back prop

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=22196

Andrew Pinski  changed:

   What|Removed |Added

 CC||goon.pri.low at gmail dot com

--- Comment #5 from Andrew Pinski  ---
*** Bug 112608 has been marked as a duplicate of this bug. ***

[Bug tree-optimization/112608] Missed-optimization: Multiple Division Constants

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112608

Andrew Pinski  changed:

   What|Removed |Added

 Resolution|--- |DUPLICATE
 Status|UNCONFIRMED |RESOLVED

--- Comment #1 from Andrew Pinski  ---
Dup.

*** This bug has been marked as a duplicate of bug 22196 ***

[Bug libstdc++/112607] : _Normalize does not consider char_type for the basic_string_view case

2023-11-18 Thread redi at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112607

--- Comment #3 from Jonathan Wakely  ---
The standard tries quite hard to avoid this kind of specialization:

https://eel.is/c++draft/format.formatter.spec#note-1

But I suppose you can contrive this kind of custom formatter, or the inverse,
i.e. std::formatter, wchar_t>

[Bug fortran/112609] New: [F2023] Restrictions on integer arguments to SYSTEM_CLOCK

2023-11-18 Thread anlauf at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112609

Bug ID: 112609
   Summary: [F2023] Restrictions on integer arguments to
SYSTEM_CLOCK
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: fortran
  Assignee: unassigned at gcc dot gnu.org
  Reporter: anlauf at gcc dot gnu.org
  Target Milestone: ---

F2023 has changed the requirements for the arguments to SYSTEM_CLOCK:

"Fortran 2018 allowed integer arguments to the intrinsic subroutine
SYSTEM_CLOCK to be of any kind. This document requires integer arguments to
SYSTEM_CLOCK to have a decimal exponent range at least as large as a default
integer, and requires that all integer arguments in a reference to
SYSTEM_CLOCK have the same kind type parameter."

[Bug libstdc++/112607] : _Normalize does not consider char_type for the basic_string_view case

2023-11-18 Thread redi at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112607

Jonathan Wakely  changed:

   What|Removed |Added

 Status|UNCONFIRMED |ASSIGNED
   Assignee|unassigned at gcc dot gnu.org  |redi at gcc dot gnu.org
 Ever confirmed|0   |1
   Last reconfirmed||2023-11-18

[Bug target/112606] [14 Regression] powerpc64le-linux-gnu: 'FAIL: gcc.target/powerpc/p8vector-fp.c scan-assembler xsnabsdp'

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112606

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |14.0
  Component|rtl-optimization|target
   Keywords||missed-optimization

[Bug target/112605] ICE: in gen_reg_rtx with -mforce-indirect-call -fsplit-stack

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112605

Andrew Pinski  changed:

   What|Removed |Added

  Known to fail||8.1.0
   Last reconfirmed||2023-11-18
Summary|ICE: in gen_reg_rtx, at |ICE: in gen_reg_rtx with
   |emit-rtl.cc:1176|-mforce-indirect-call
   ||-fsplit-stack
 Status|UNCONFIRMED |NEW
   Keywords||ice-on-valid-code
 Ever confirmed|0   |1

--- Comment #1 from Andrew Pinski  ---
Confirmed, it has been a bug since -mforce-indirect-call was introduced in GCC
8.

[Bug target/112605] ICE: in gen_reg_rtx with -mforce-indirect-call -fsplit-stack

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112605

Andrew Pinski  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |DUPLICATE

--- Comment #2 from Andrew Pinski  ---
Dup of bug 89316.

*** This bug has been marked as a duplicate of bug 89316 ***

[Bug target/89316] ICE with -mforce-indirect-call and -fsplit-stack

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89316

Andrew Pinski  changed:

   What|Removed |Added

 CC||iamanonymous.cs at gmail dot 
com

--- Comment #11 from Andrew Pinski  ---
*** Bug 112605 has been marked as a duplicate of this bug. ***

[Bug rtl-optimization/112610] New: [12/13/14 Regression] ICE: SIGSEGV with -flive-range-shrinkage -fdump-rtl-all-all -fira-verbose=9

2023-11-18 Thread zsojka at seznam dot cz via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112610

Bug ID: 112610
   Summary: [12/13/14 Regression] ICE: SIGSEGV with
-flive-range-shrinkage -fdump-rtl-all-all
-fira-verbose=9
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Keywords: ice-on-valid-code
  Severity: normal
  Priority: P3
 Component: rtl-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: zsojka at seznam dot cz
  Target Milestone: ---
  Host: x86_64-pc-linux-gnu
Target: x86_64-pc-linux-gnu

Created attachment 56634
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=56634&action=edit
reduced testcase

Compiler output:
$ x86_64-pc-linux-gnu-gcc -flive-range-shrinkage -fdump-rtl-all-all
-fira-verbose=9 testcase.c -wrapper
valgrind,-q,--exit-on-first-error=yes,--error-exitcode=1
==31926== Invalid read of size 4
==31926==at 0x4E688E7: __vfprintf_internal (in /lib64/libc.so.6)
==31926==by 0x4F221A9: __fprintf_chk (in /lib64/libc.so.6)
==31926==by 0x1285694: fprintf (stdio2.h:79)
==31926==by 0x1285694: record_reg_classes(int, int, rtx_def**,
machine_mode*, char const**, rtx_insn*, reg_class*) [clone .constprop.0]
(ira-costs.cc:512)
==31926==by 0x1287415: record_operand_costs(rtx_insn*, reg_class*)
(ira-costs.cc:1502)
==31926==by 0x1287A47: scan_one_insn (ira-costs.cc:1596)
==31926==by 0x1287A47: process_bb_for_costs(basic_block_def*)
(ira-costs.cc:1746)
==31926==by 0x128A20F: find_costs_and_classes(_IO_FILE*)
(ira-costs.cc:2030)
==31926==by 0x128AC7A: ira_set_pseudo_classes(bool, _IO_FILE*)
(ira-costs.cc:2614)
==31926==by 0x26736F5: alloc_global_sched_pressure_data
(haifa-sched.cc:7201)
==31926==by 0x26736F5: sched_init() (haifa-sched.cc:7356)
==31926==by 0x2674F5D: haifa_sched_init() (haifa-sched.cc:7368)
==31926==by 0x14714BC: schedule_insns() [clone .part.0] (sched-rgn.cc:3524)
==31926==by 0x1471B9B: schedule_insns (sched-rgn.cc:3518)
==31926==by 0x1471B9B: rest_of_handle_live_range_shrinkage
(sched-rgn.cc:3720)
==31926==by 0x1471B9B: (anonymous
namespace)::pass_live_range_shrinkage::execute(function*) (sched-rgn.cc:3807)
==31926==by 0x139801A: execute_one_pass(opt_pass*) (passes.cc:2641)
==31926==  Address 0x5628c30 is 192 bytes inside a block of size 472 free'd
==31926==at 0x4843A5F: free (vg_replace_malloc.c:985)
==31926==by 0x4E7EA19: fclose@@GLIBC_2.2.5 (in /lib64/libc.so.6)
==31926==by 0xFE7F4A: gcc::dump_manager::dump_finish(int)
(dumpfile.cc:1519)
==31926==by 0x1397421: pass_fini_dump_file(opt_pass*) (passes.cc:2258)
==31926==by 0x13984AD: execute_one_pass(opt_pass*) (passes.cc:2722)
==31926==by 0x13988FF: execute_pass_list_1(opt_pass*) (passes.cc:2750)
==31926==by 0x1398911: execute_pass_list_1(opt_pass*) (passes.cc:2751)
==31926==by 0x1398938: execute_pass_list(function*, opt_pass*)
(passes.cc:2761)
==31926==by 0xFA89F5: expand (cgraphunit.cc:1841)
==31926==by 0xFA89F5: cgraph_node::expand() (cgraphunit.cc:1794)
==31926==by 0xFA9909: output_in_order (cgraphunit.cc:2191)
==31926==by 0xFA9909: symbol_table::compile() [clone .part.0]
(cgraphunit.cc:2395)
==31926==by 0xFAC8A7: compile (cgraphunit.cc:2311)
==31926==by 0xFAC8A7: symbol_table::finalize_compilation_unit()
(cgraphunit.cc:2583)
==31926==by 0x14D99B1: compile_file() (toplev.cc:473)
==31926==  Block was alloc'd at
==31926==at 0x4840804: malloc (vg_replace_malloc.c:442)
==31926==by 0x4E7F42A: __fopen_internal (in /lib64/libc.so.6)
==31926==by 0xFE61D8: dump_open (dumpfile.cc:378)
==31926==by 0xFE61D8: dump_open(char const*, bool) (dumpfile.cc:369)
==31926==by 0xFE8265: gcc::dump_manager::dump_start(int, dump_flag*)
(dumpfile.cc:1477)
==31926==by 0x13954FD: pass_init_dump_file(opt_pass*) [clone .part.0]
(passes.cc:2228)
==31926==by 0x1398337: pass_init_dump_file (passes.cc:2563)
==31926==by 0x1398337: execute_one_pass(opt_pass*) (passes.cc:2626)
==31926==by 0x13988FF: execute_pass_list_1(opt_pass*) (passes.cc:2750)
==31926==by 0x1398911: execute_pass_list_1(opt_pass*) (passes.cc:2751)
==31926==by 0x1398938: execute_pass_list(function*, opt_pass*)
(passes.cc:2761)
==31926==by 0xFA89F5: expand (cgraphunit.cc:1841)
==31926==by 0xFA89F5: cgraph_node::expand() (cgraphunit.cc:1794)
==31926==by 0xFA9909: output_in_order (cgraphunit.cc:2191)
==31926==by 0xFA9909: symbol_table::compile() [clone .part.0]
(cgraphunit.cc:2395)
==31926==by 0xFAC8A7: compile (cgraphunit.cc:2311)
==31926==by 0xFAC8A7: symbol_table::finalize_compilation_unit()
(cgraphunit.cc:2583)
==31926== 
==31926== 
==31926== Exit program on first error (--exit-on-first-error=yes)

$ x86_64-pc-linux-gnu-gcc -vUsing built-in specs.
COLLECT_GCC=/repo/gcc-trunk/binary-latest-amd64/bin/x86_64-pc-linux-gnu-gcc
COLLECT_LTO_WRAPPER=/repo/gcc-trunk/binary-trunk-r

[Bug target/112611] New: LoongArch: Test cases lsx-vshuf.c and lasx-xvshuf_b.c fails on LA664

2023-11-18 Thread xry111 at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112611

Bug ID: 112611
   Summary: LoongArch: Test cases lsx-vshuf.c and lasx-xvshuf_b.c
fails on LA664
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: target
  Assignee: unassigned at gcc dot gnu.org
  Reporter: xry111 at gcc dot gnu.org
  Target Milestone: ---

Some test cases in these two files depend on some undocumented behavior of
LA464, now they fail with LA664.

I guess we should just remove the problematic test cases.

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #16 from Andrew Pinski  ---
Note using -fdump-unnumbered might help to avoid dumping memory addresses that
were from memory addresses inside gcc itself.

[Bug rtl-optimization/112568] [14 Regression] Miscompilation of radeonsi (mesa) with -march=raptorlake (-mavx) since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112568

--- Comment #16 from Andrew Pinski  ---
(In reply to Kostadin Shishmanov from comment #13)
> Created attachment 56617 [details]
> dump of the first different pass with the commit reverted

Note using -fdump-unnumbered might help to avoid dumping memory addresses that
were from memory addresses inside gcc itself. I forgot about that when I
mentioned the -da option.

[Bug c/112612] New: [Missed Optimization] Holding on the loop variable rather than a derived value which can replace it

2023-11-18 Thread eyalroz1 at gmx dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112612

Bug ID: 112612
   Summary: [Missed Optimization] Holding on the loop variable
rather than a derived value which can replace it
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c
  Assignee: unassigned at gcc dot gnu.org
  Reporter: eyalroz1 at gmx dot com
  Target Milestone: ---

Consider the following function:

void foo(int* __restrict__ a) {
int i, val;
for (i = 0; i < 100; i++) {
val = 2 * i;
a[i] = val;
}
}

When compiling it for x86_64 with -O3 -fno-unroll-loops -fno-tree-vectorize,
GCC 7.2 used to give:

foo:
xor eax, eax
.L2:
mov DWORD PTR [rdi], eax
add eax, 2
add rdi, 4
cmp eax, 200
jne .L2
rep ret

which was rather wasteful, as eax and rdi - eax are linearly related. With GCC
13.2 or trunk on GodBolt as of today, this improves, but not really:

foo:
xor eax, eax
.L2:
lea edx, [rax+rax]
mov DWORD PTR [rdi+rax*4], edx
add rax, 1
cmp rax, 100
jne .L2
ret

So, we don't increment two things; but - we do have an addition-via-lea in each
iteration. Is that really necessary? I mean, instead of keeping the i variable
(in rax), we could keep v = 2 * i, and that's good enough for both addressing
and condition checking. Indeed, clang 17 emits:

foo: # @foo
  xor eax, eax
.LBB0_1: # =>This Inner Loop Header: Depth=1
  mov dword ptr [rdi + 2*rax], eax
  add rax, 2
  cmp rax, 200
  jne .LBB0_1
  ret

which is almost the same, except that it holds v = 2 * i rather than i. (clang
has produced this code since v3.0.0 at least.)

GodBolt link: https://gcc.godbolt.org/z/MjzTbr831
Originally discussed in this SO question:
https://stackoverflow.com/q/48354636/1593077

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #17 from Andrew Pinski  ---
It looks like another pass after vzeroupper depends on REG_DEAD/REG_UNUSED to
be correct but a different pass before that pass messes it up ...
I am still trying to figure out which pass is making the bad change.
Note vzeroupper does seems to be the same/correct but the updated
REG_DEAD/REG_UNUSED notes mess up the passes afterwards ...

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

Andrew Pinski  changed:

   What|Removed |Added

 Ever confirmed|0   |1
   Last reconfirmed||2023-11-18
 Status|UNCONFIRMED |NEW

--- Comment #18 from Andrew Pinski  ---
This fixes the issue for me:
```
diff --git a/gcc/compare-elim.cc b/gcc/compare-elim.cc
index c59dc0cf5a5..50424bdfa09 100644
--- a/gcc/compare-elim.cc
+++ b/gcc/compare-elim.cc
@@ -908,6 +908,7 @@ static unsigned int
 execute_compare_elim_after_reload (void)
 {
   df_set_flags (DF_LR_RUN_DCE);
+  df_note_add_problem ();
   df_analyze ();

   gcc_checking_assert (!all_compares.exists ());

```

compare-elim.cc depends on up to date REG_UNUSED and between before vzeroupper
and cmpelim the note gets out of date.

[Bug rtl-optimization/112568] [14 Regression] Miscompilation of radeonsi (mesa) with -march=raptorlake (-mavx) since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112568

--- Comment #17 from Andrew Pinski  ---
Can you try the patch in
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572#c18 ?

[Bug target/112613] New: gcc generates incorrect argument reads at prologue

2023-11-18 Thread slyfox at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112613

Bug ID: 112613
   Summary: gcc generates incorrect argument reads at prologue
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: target
  Assignee: unassigned at gcc dot gnu.org
  Reporter: slyfox at gcc dot gnu.org
  Target Milestone: ---

This is an example extracted from an attempt to debug PR112572.

Attached file is not directly executable. And it's not easy to amoend to make
it executable, but I think it illustrates the problem well:

For the following function in the source file:


__attribute__((noipa))
void X86InterleavedAccessGroup::decompose(
 Instruction *VecInst, unsigned NumSubVectors, FixedVectorType *SubVecTy,
 SmallVectorImpl &DecomposedVectors) {
 if (ShuffleVectorInst *SVI = dyn_cast(VecInst)) {
...

gcc generates the following prologue:


$ gcc/xg++ -Bgcc -O2 -march=znver2 -fno-checking -fno-lifetime-dse -std=c++17
-S /tmp/bug.cpp -o - -fPIE
...
_ZN1n25X86InterleavedAccessGroup9decomposeEPN4llvm11InstructionEjPNS1_15FixedVectorTypeERNS1_15SmallVectorImplIS3_EE:
.LFB19440:
.cfi_startproc
.cfi_personality 0x9b,DW.ref.__gxx_personality_v0
.cfi_lsda 0x1b,.LLSDA19440
pushq   %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq%rsp, %rbp
.cfi_def_cfa_register 6
pushq   %r15
pushq   %r14
pushq   %r13
pushq   %r12
pushq   %rbx
.cfi_offset 15, -24
.cfi_offset 14, -32
.cfi_offset 13, -40
.cfi_offset 12, -48
.cfi_offset 3, -56
movl%edx, %r12d
movq%r8, %rbx
subq$328, %rsp
cmpb$91, 16(%r14) // <- what is %r14? it should be %rsi
...

I think %r14 is an invalid value. It should be something like %rsi (that's what
-fno-PIE does).

I hope this example is analysable to get the idea why this register is chosen.

$ gcc/xg++ -Bgcc -v
Reading specs from gcc/specs
COLLECT_GCC=gcc/xg++
COLLECT_LTO_WRAPPER=gcc/lto-wrapper
Target: x86_64-pc-linux-gnu
Configured with: /home/slyfox/dev/git/gcc/configure --disable-multilib
--disable-bootstrap --disable-lto --disable-libsanitizer
--disable-libstdcxx-pch --enable-languages=c,c++ --disable-libgomp
--disable-libquadmath --disable-libvtv CFLAGS='-O1 -g0' CXXFLAGS='-O1 -g0'
LDFLAGS='-O1 -g0'
Thread model: posix
Supported LTO compression algorithms: zlib
gcc version 14.0.0 20231118 (experimental) (GCC)

[Bug target/112613] gcc generates incorrect argument reads at prologue

2023-11-18 Thread slyfox at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112613

--- Comment #1 from Sergei Trofimovich  ---
Created attachment 56635
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=56635&action=edit
bug.cpp.xz

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread slyfox at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #19 from Sergei Trofimovich  ---
I spent some time poking at the bug and was not able to reproduce it on my
toolchain.

I was able to get it to fail on gentoo's toolchain and arrived at problems in
lib/Target/X86/X86InterleavedAccess.cpp. My theory is that
X86InterleavedAccessGroup::decompose() gets compiled incorrectly.

`-fPIE` seems to be the flag that triggers gcc to generate slightly incorrect
prologue (wrong register is being used). I filed a separate issue in case it's
an invalid manual reduction: https://gcc.gnu.org/PR112613

[Bug target/112613] gcc generates incorrect argument reads at prologue

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112613

--- Comment #2 from Andrew Pinski  ---
since the bad instruction is a compare, it does seem like it might be solved
via https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572#c18 too. compare
elimination is going wrong.

[Bug tree-optimization/112612] Holding on the loop variable rather than a derived value which can replace it

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112612

Andrew Pinski  changed:

   What|Removed |Added

  Component|middle-end  |tree-optimization
Summary|[Missed Optimization]   |Holding on the loop
   |Holding on the loop |variable rather than a
   |variable rather than a  |derived value which can
   |derived value which can |replace it
   |replace it  |
 Target||x86_64-linux-gnu

--- Comment #1 from Andrew Pinski  ---
IV-OPTs selects these IVs and is very much target specific due to cost model.
It is a N complete problem after all too.

[Bug libstdc++/110801] std::format code runs slower than equivalent {fmt} code

2023-11-18 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110801

--- Comment #3 from CVS Commits  ---
The master branch has been updated by Jonathan Wakely :

https://gcc.gnu.org/g:41a5ea4cab2c59f9911325281f7df1d3ae846d48

commit r14-5587-g41a5ea4cab2c59f9911325281f7df1d3ae846d48
Author: Jonathan Wakely 
Date:   Tue Aug 15 22:43:41 2023 +0100

libstdc++: Add fast path for std::format("{}", x) [PR110801]

This optimizes the simple case of formatting a single string, integer
or bool, with no format-specifier (so no padding, alignment, alternate
form etc.)

libstdc++-v3/ChangeLog:

PR libstdc++/110801
* include/std/format (_Sink_iter::_M_reserve): New member
function.
(_Sink::_Reservation): New nested class.
(_Sink::_M_reserve, _Sink::_M_bump): New virtual functions.
(_Seq_sink::_M_reserve, _Seq_sink::_M_bump): New virtual
overrides.
(_Iter_sink::_M_reserve): Likewise.
(__do_vformat_to): Use new functions to optimize "{}" case.

[Bug libstdc++/112607] : _Normalize does not consider char_type for the basic_string_view case

2023-11-18 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112607

--- Comment #4 from CVS Commits  ---
The master branch has been updated by Jonathan Wakely :

https://gcc.gnu.org/g:279e407a06cc676d8e6e0bb5755b0a804e05377c

commit r14-5588-g279e407a06cc676d8e6e0bb5755b0a804e05377c
Author: Jonathan Wakely 
Date:   Sat Nov 18 20:56:35 2023 +

libstdc++: Check string value_type in std::make_format_args [PR112607]

libstdc++-v3/ChangeLog:

PR libstdc++/112607
* include/std/format (basic_format_arg::_S_to_arg_type): Check
value_type for basic_string_view and basic_string
specializations.
* testsuite/std/format/arguments/112607.cc: New test.

[Bug libstdc++/110801] std::format code runs slower than equivalent {fmt} code

2023-11-18 Thread redi at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110801

Jonathan Wakely  changed:

   What|Removed |Added

 Status|UNCONFIRMED |NEW
   Last reconfirmed||2023-11-18
 Ever confirmed|0   |1

--- Comment #4 from Jonathan Wakely  ---
I get these numbers now:

sprintf   530172 ns   529123 ns 1366
ostringstream1150474 ns  1147048 ns  613
to_string 150262 ns   149901 ns 4680
format284007 ns   283124 ns 2481
format_to 142947 ns   142612 ns 5046
std_format340518 ns   339737 ns 2062
std_format_to 296434 ns   295710 ns 2407

There's still room for improvement.

[Bug libstdc++/112607] : _Normalize does not consider char_type for the basic_string_view case

2023-11-18 Thread redi at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112607

Jonathan Wakely  changed:

   What|Removed |Added

   Target Milestone|--- |13.3

--- Comment #5 from Jonathan Wakely  ---
Fixed on trunk so far.

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #20 from Andrew Pinski  ---
(In reply to Andrew Pinski from comment #18)
> compare-elim.cc depends on up to date REG_UNUSED and between before
> vzeroupper and cmpelim the note gets out of date.

Note it depends on it indirectly via single_set in fact any pass that uses
single_set will depends on the REG_UNUSED notes to be up to date.

[Bug tree-optimization/112094] `popcnt(a) == n | a != 0` should be simplified to `a!=0 | n == 0`

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112094

Andrew Pinski  changed:

   What|Removed |Added

   Last reconfirmed||2023-11-18
 Status|UNCONFIRMED |ASSIGNED
 Ever confirmed|0   |1

--- Comment #1 from Andrew Pinski  ---
.

[Bug target/112613] gcc generates incorrect argument reads at prologue

2023-11-18 Thread slyfox at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112613

--- Comment #3 from Sergei Trofimovich  ---
> since the bad instruction is a compare, it does seem like it might be solved 
> via https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572#c18 too. compare 
> elimination is going wrong.

Yeah, that fixes it. Looking at a bad case bug.cpp.317r.cmpelim contains the
following start of the function:

1: NOTE_INSN_DELETED
  922: NOTE_INSN_BASIC_BLOCK 2
  889: [bp:DI-0x130]=di:DI
  888: r14:DI=si:DI
  REG_UNUSED r14:DI
4: r12:DI=zero_extend(dx:SI)
  REG_DEAD dx:SI
6: bx:DI=r8:DI
  REG_DEAD r8:DI
7: NOTE_INSN_FUNCTION_BEG
  818: si:DI=r14:DI
   16: flags:CCZ=cmp([si:DI+0x10],0x5b)

The 'REG_UNUSED r14:DI' looks wrong. That possibly caused use of dangling
register. I wonder if `gcc` could fail in -fchecking= mode if it's a reasonable
check.

[Bug fortran/112609] [F2023] Restrictions on integer arguments to SYSTEM_CLOCK

2023-11-18 Thread anlauf at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112609

anlauf at gcc dot gnu.org changed:

   What|Removed |Added

   Last reconfirmed||2023-11-18
   Assignee|unassigned at gcc dot gnu.org  |anlauf at gcc dot 
gnu.org
 Ever confirmed|0   |1
 Status|UNCONFIRMED |ASSIGNED

--- Comment #1 from anlauf at gcc dot gnu.org ---
Patch: https://gcc.gnu.org/pipermail/fortran/2023-November/059923.html

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #21 from Andrew Pinski  ---
So originally we had in vzeroupper:
```
(insn # # # 2 (set (reg/f:DI 43 r15 [orig:126 _84 ] [126])
(reg/f:DI 2 cx [orig:126 _84 ] [126]))
"/home/sam/git/llvm-project/llvm/include/llvm/ADT/SmallVector.h":272:42 discrim
2# {*movdi_internal}
 (expr_list:REG_UNUSED (reg/f:DI 43 r15 [orig:126 _84 ] [126])
(nil)))
(insn # # # 2 (set (reg/f:DI 43 r15 [orig:126 _84 ] [126])
(reg/f:DI 2 cx [orig:126 _84 ] [126]))
"/tmp/build/include/c++/14.0.0/bits/stl_algo.h":1897:7# {*movdi_internal}
 (expr_list:REG_DEAD (reg/f:DI 2 cx [orig:126 _84 ] [126])
(nil)))
```
And then in postreload we changed it to just:
```
(insn # # # 2 (set (reg/f:DI 43 r15 [orig:126 _84 ] [126])
(reg/f:DI 2 cx [orig:126 _84 ] [126]))
"/home/sam/git/llvm-project/llvm/include/llvm/ADT/SmallVector.h":272:42 discrim
2# {*movdi_internal}
 (expr_list:REG_UNUSED (reg/f:DI 43 r15 [orig:126 _84 ] [126])
(nil)))
```
Without updating the REG notes there. I suspect if we moved vzeroupper after
postreload it would also just had worked. reload/LRA looks like it likes to
generate extra moves that do the same thing ...
Moving vzeroupper after postreload will also reduce memory and compile time due
to less REG_DEAD/REG_UNUSED being generated ...

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #22 from Andrew Pinski  ---
*** Bug 112613 has been marked as a duplicate of this bug. ***

[Bug target/112613] gcc generates incorrect argument reads at prologue

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112613

Andrew Pinski  changed:

   What|Removed |Added

 Resolution|--- |DUPLICATE
 Status|UNCONFIRMED |RESOLVED

--- Comment #4 from Andrew Pinski  ---
Marking this as a dup of bug 112572 since the issue is the same.

*** This bug has been marked as a duplicate of bug 112572 ***

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread rsandifo at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #23 from Richard Sandiford  ---
(In reply to Andrew Pinski from comment #18)
> compare-elim.cc depends on up to date REG_UNUSED and between before
> vzeroupper and cmpelim the note gets out of date.
Thanks for tracking it down.

I can't find this written down from a quick search, so perhaps I'm
misremembering, but I thought that REG_UNUSED notes had to be kept
up to date.  Only REG_DEAD ones are allowed to rot.  Like you say,
single_set is used all over the place, and is expected to be safe.

If so, I guess it's postreload that should be fixed.

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #24 from Andrew Pinski  ---
(In reply to Richard Sandiford from comment #23)
> (In reply to Andrew Pinski from comment #18)
> > compare-elim.cc depends on up to date REG_UNUSED and between before
> > vzeroupper and cmpelim the note gets out of date.
> Thanks for tracking it down.
> 
> I can't find this written down from a quick search, so perhaps I'm
> misremembering, but I thought that REG_UNUSED notes had to be kept
> up to date.  Only REG_DEAD ones are allowed to rot.  Like you say,
> single_set is used all over the place, and is expected to be safe.
> 
> If so, I guess it's postreload that should be fixed.

Eric B. mentioned back in 2011, neither REG_DEAD nor REG_UNUSED needs to be
kept up to date:
https://gcc.gnu.org/pipermail/gcc-patches/2011-October/326733.html
Also see PR 48773 where he mentioned the same.
He mentioned the same thing again in 2018 even:
https://gcc.gnu.org/pipermail/gcc-patches/2018-October/508057.html


Though you mentioned the opposite here:
https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634121.html

Though I wonder if this is all a mess due to single_set indirectly using them
which has been there since before 2000 (and before df was added).

[Bug tree-optimization/112612] Holding on the loop variable rather than a derived value which can replace it

2023-11-18 Thread eyalroz1 at gmx dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112612

--- Comment #2 from Eyal Rozenberg  ---
(In reply to Andrew Pinski from comment #1)
> IV-OPTs selects these IVs and is very much target specific due to cost model.

In this example, it seems that the missed optimization should be useful under
most/all cost models. Of course, I may be wrong, I'm no CPU expert.

> It is a N[P] complete problem after all too.

I wonder if the asymptotic nature of the general problem is really the issue
here.

Anyway - I'm just noting the behavior. It is of course up to you all to decide
whether you want to do something about it.

[Bug c/112614] New: Compile-time float-to-_Decimal64 fails for -NAN

2023-11-18 Thread terra at gnome dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112614

Bug ID: 112614
   Summary: Compile-time float-to-_Decimal64 fails for -NAN
   Product: gcc
   Version: 11.4.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c
  Assignee: unassigned at gcc dot gnu.org
  Reporter: terra at gnome dot org
  Target Milestone: ---

Created attachment 56636
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=56636&action=edit
Preprocessed source code

It looks like compile-time conversion of -NAN and -(double)NAN to _Decimal64
fails.  Runtime conversion seems ok.  -INFINITY is ok.

Tentatively blaming the C front end.

$ gcc -Wall -O2 d64nansign.c
$ ./a.out 
Sign bit set as expected
Sign bit set as expected
Sign bit not set as expected
Sign bit not set as expected
Sign bit set as expected
Sign bit set as expected

Target: x86_64-linux-gnu


#include 
#include 
#include 
#include 
#include 
#include 

static void
test (_Decimal64 x)
{
  uint64_t u;
  assert (sizeof (x) == sizeof (u));
  memcpy (&u, &x, sizeof(x));
  if (u >> 63) {
printf ("Sign bit set as expected\n");
  } else {
printf ("Sign bit not set as expected\n");
  }
}


int
main (int argc, char **argv)
{
  // compile-time
  test (-(_Decimal64)(NAN));
  test (-(_Decimal64)(NAN));
  test ((_Decimal64)(-NAN));   // Fails
  test ((_Decimal64)(-(double)NAN));   // Fails

  // runtime
  test (atof("-nan"));
  test (-atof("nan"));
}

[Bug target/89316] ICE with -mforce-indirect-call and -fsplit-stack

2023-11-18 Thread ubizjak at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89316

Uroš Bizjak  changed:

   What|Removed |Added

   Assignee|unassigned at gcc dot gnu.org  |ubizjak at gmail dot com
 Status|NEW |ASSIGNED

--- Comment #12 from Uroš Bizjak  ---
Created attachment 56637
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=56637&action=edit
Proposed patch

Patch that implements ideas from Comment 7 and Comment 8.

[Bug c/112615] New: gcc incorrectly assumes char *x[2]={"str1", "str2"} has 16-byte minimum alignment and generates SSE instructions (e.g. movaps) when accessing this data

2023-11-18 Thread gandalf at winds dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112615

Bug ID: 112615
   Summary: gcc incorrectly assumes char *x[2]={"str1", "str2"}
has 16-byte minimum alignment and generates SSE
instructions (e.g. movaps) when accessing this data
   Product: gcc
   Version: 13.2.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c
  Assignee: unassigned at gcc dot gnu.org
  Reporter: gandalf at winds dot org
  Target Milestone: ---

I ran into the following problem while trying to get Oracle 21c to run on
Gentoo OS under glibc-2.38 and GCC 13.2 on x86-64 with SSE instructions
enabled.

glibc-2.38's time/tzset.c file (see
https://github.com/bminor/glibc/blob/master/time/tzset.c) has the following
non-static declaration on line 31:

char *__tzname[2] = { (char *) "GMT", (char *) "GMT" };

GCC wrongly assumes that this variable __tzname has a minimum alignment of 16
bytes instead of 8. GCC thus generates the following assembly instructions for
this portion of __tzset_parse_tz() when compiling with -march=x86-64:

327   /* Get the standard time zone abbreviations.  */
328   if (parse_tzname (&tz, 0) && parse_offset (&tz, 0))
   0x0a6f <+63>:movups %xmm0,0x0(%rip)# 0xa76
<__tzset_parse_tz+70>
   0x0a76 <+70>:movups %xmm0,0x0(%rip)# 0xa7d
<__tzset_parse_tz+77>
   0x0a7d <+77>:movups %xmm0,0x0(%rip)# 0xa84
<__tzset_parse_tz+84>
   0x0a84 <+84>:movups %xmm0,0x0(%rip)# 0xa8b
<__tzset_parse_tz+91>
   0x0a8b <+91>:call   0x440 
   0x0a90 <+96>:test   %al,%al
   0x0a92 <+98>:jne0xac8 <__tzset_parse_tz+152>
   0x0a94 <+100>:   movq   0x0(%rip),%xmm0# 0xa9c
<__tzset_parse_tz+108>

132   __tzname[1] = (char *) tz_rules[1].name;
   0x0a9c <+108>:   xor%eax,%eax
   0x0a9e <+110>:   xor%edx,%edx
   0x0aa0 <+112>:   pinsrq $0x1,0x0(%rip),%xmm0# 0xaab
<__tzset_parse_tz+123>

129   __daylight = tz_rules[0].offset != tz_rules[1].offset;
   0x0aab <+123>:   mov%edx,0x0(%rip)# 0xab1
<__tzset_parse_tz+129>

130   __timezone = -tz_rules[0].offset;
   0x0ab1 <+129>:   mov%rax,0x0(%rip)# 0xab8
<__tzset_parse_tz+136>

131   __tzname[0] = (char *) tz_rules[0].name;
132   __tzname[1] = (char *) tz_rules[1].name;
   0x0ab8 <+136>:   movaps %xmm0,0x0(%rip)# 0xabf
<__tzset_parse_tz+143>

In the above, line 131 and 132 are combined into a "movaps" instruction that
requires 16-byte alignment to work properly. However, if a C program is
compiled with a variable called __tzname that is not 16-byte aligned (due to
the fact that char* only requires 8-byte alignment), and this is then linked to
glibc (causing the locally defined __tzname to override the one declared in
glibc), and the if(parse_tzname()) check on line 328 fails due to an invalid TZ
environment variable setting (such as is the case when using Oracle 21c on
Gentoo), the movaps instruction above causes a segmentation fault. Here is an
example test.c C program:

#include 
#include 

/* Specifically align __tzname to a non-16-byte boundary */
__attribute__((aligned(8))) char *__tzname[2]={"GMT", "GMT"};

char *x="xx";  // This is here to take up the first 8 bytes in .data

int main()
{
  struct tm tm={};
  printf("%ld\n", mktime(&tm));
  return 0;
}

$ gcc -O3 -march=x86-64 test.c -o test -Wall -ggdb3
$ nm test | grep __tzname
00404028 D __tzname
$ ./test
-2209057200
$ TZ=xx ./test
Segmentation fault (core dumped)

Removing the __attribute__((aligned(8))) from the test.c program, as follows,
causes the following change:

#include 
#include 

/* GCC now aligns __tzname to 16 bytes */
char *__tzname[2]={"GMT", "GMT"};
char *x="xx";

int main()
{
  struct tm tm={};
  printf("%ld\n", mktime(&tm));
  return 0;
}

$ gcc -O3 -march=x86-64 test.c -o test -Wall -ggdb3
$ nm test | grep __tzname
00404030 D __tzname
$ ./test
-2209057200
$ TZ=xx ./test
-2209057200

In the examples above, the "x" variable is used to consume 8 bytes in .data, so
that the next available address for "__tzname" is 0x404028.

Assuming a minimum alignment of 16 for __tzname only makes sense when you're
either compiling the whole program or when __tzname is static, but GCC should
not do this when the variable is non-static (as is the case when tzset.o is
compiled inside the glibc source package).

I should clarify that optimizing the variable's address to use 16-byte
alignment can be ideal for data storage vs cache-line boundaries and so this
optimization should likely remain. But the instructions acting on this data
area must assume an 8-byte minimum alignment here, not 16.

[Bug analyzer/107573] RFE: analyzer handling of strtok

2023-11-18 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107573

--- Comment #2 from CVS Commits  ---
The master branch has been updated by David Malcolm :

https://gcc.gnu.org/g:f65f63c4d86a48be042a3ad242fffe5fe8347ff0

commit r14-5591-gf65f63c4d86a48be042a3ad242fffe5fe8347ff0
Author: David Malcolm 
Date:   Sat Nov 18 20:35:59 2023 -0500

analyzer: new warning: -Wanalyzer-undefined-behavior-strtok [PR107573]

This patch:
- adds support to the analyzer for tracking API-private state
  or which we don't have a decl (such as strtok's internal state),
- uses it to implement a new -Wanalyzer-undefined-behavior-strtok which
  warns when strtok (NULL, delim) is called as the first call to
  strtok after main.

gcc/analyzer/ChangeLog:
PR analyzer/107573
* analyzer.h (register_known_functions): Add region_model_manager
param.
* analyzer.opt (Wanalyzer-undefined-behavior-strtok): New.
* call-summary.cc
(call_summary_replay::convert_region_from_summary_1): Handle
RK_PRIVATE.
* engine.cc (impl_run_checkers): Pass model manager to
register_known_functions.
* kf.cc (class undefined_function_behavior): New.
(class kf_strtok): New.
(register_known_functions): Add region_model_manager param.
Use it to register "strtok".
* region-model-manager.cc
(region_model_manager::get_or_create_conjured_svalue): Add "idx"
param.
* region-model-manager.h
(region_model_manager::get_or_create_conjured_svalue): Add "idx"
param.
(region_model_manager::get_root_region): New accessor.
* region-model.cc (region_model::scan_for_null_terminator): Handle
"expr" being null.
(region_model::get_representative_path_var_1): Handle RK_PRIVATE.
* region-model.h (region_model::called_from_main_p): Make public.
* region.cc (region::get_memory_space): Handle RK_PRIVATE.
(region::can_have_initial_svalue_p): Handle MEMSPACE_PRIVATE.
(private_region::dump_to_pp): New.
* region.h (MEMSPACE_PRIVATE): New.
(RK_PRIVATE): New.
(class private_region): New.
(is_a_helper ::test): New.
* store.cc (store::replay_call_summary_cluster): Handle
RK_PRIVATE.
* svalue.h (struct conjured_svalue::key_t): Add "idx" param to
ctor and "m_idx" field.
(class conjured_svalue::conjured_svalue): Likewise.

gcc/ChangeLog:
PR analyzer/107573
* doc/invoke.texi: Add -Wanalyzer-undefined-behavior-strtok.

gcc/testsuite/ChangeLog:
PR analyzer/107573
* c-c++-common/analyzer/strtok-1.c: New test.
* c-c++-common/analyzer/strtok-2.c: New test.
* c-c++-common/analyzer/strtok-3.c: New test.
* c-c++-common/analyzer/strtok-4.c: New test.
* c-c++-common/analyzer/strtok-cppreference.c: New test.

Signed-off-by: David Malcolm 

[Bug target/112615] gcc incorrectly assumes char *x[2]={"str1", "str2"} has 16-byte minimum alignment and generates SSE instructions (e.g. movaps) when accessing this data

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112615

Andrew Pinski  changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |INVALID

--- Comment #1 from Andrew Pinski  ---
Well the x86_64 SYSV ABI says:
An array uses the same alignment as its elements, except that a local or 
global array variable that requires at least 16 bytes, or a C99 local or 
global variable-length array variable, always has alignment of at least 16 
bytes.[4]


So I think this is not a bug in GCC or glibc but rather `Oracle 21c`.
Notice how it says global array variable.

[Bug target/112615] gcc incorrectly assumes char *x[2]={"str1", "str2"} has 16-byte minimum alignment and generates SSE instructions (e.g. movaps) when accessing this data

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112615

--- Comment #2 from Andrew Pinski  ---
>Assuming a minimum alignment of 16 for __tzname only makes sense when you're 
>either compiling the whole program or when __tzname is static


Not if you follow the ABI which has had this in the ABI for years now (more
than 20).

You can workaround the issue inside glibc by adding the attribute but GCC is
producing correct code according to the ABI.

[Bug target/112615] gcc incorrectly assumes char *x[2]={"str1", "str2"} has 16-byte minimum alignment and generates SSE instructions (e.g. movaps) when accessing this data

2023-11-18 Thread gandalf at winds dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112615

gandalf at winds dot org changed:

   What|Removed |Added

 Resolution|INVALID |FIXED

--- Comment #3 from gandalf at winds dot org ---
Thank you for the insight. Sounds like a bug in Oracle's compiler then.

I have just finished adding the aligned(8) attribute to glibc's __tzname on my
system and recompiled glibc. I verified this fixed the problem (no segfault).

Thanks much.

[Bug target/112615] gcc incorrectly assumes char *x[2]={"str1", "str2"} has 16-byte minimum alignment and generates SSE instructions (e.g. movaps) when accessing this data

2023-11-18 Thread gandalf at winds dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112615

gandalf at winds dot org changed:

   What|Removed |Added

 Resolution|FIXED   |INVALID

[Bug target/112615] gcc incorrectly assumes char *x[2]={"str1", "str2"} has 16-byte minimum alignment and generates SSE instructions (e.g. movaps) when accessing this data

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112615

--- Comment #4 from Andrew Pinski  ---
.

Or their sources for Oracle DB ... 
I suspect they have an assembly file that contains that variable and didn't
realize the alignment rules.

Note GCC 4.1.2 even sets the alignment to 16 for a simple:
char *x[2]={"str1", "str2"};

[Bug target/112561] [14 Regression] Segfault only on RISC-V vector

2023-11-18 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112561

--- Comment #2 from CVS Commits  ---
The master branch has been updated by Pan Li :

https://gcc.gnu.org/g:af7fa3135b6b046fe3ba869993221042a65301eb

commit r14-5592-gaf7fa3135b6b046fe3ba869993221042a65301eb
Author: Juzhe-Zhong 
Date:   Sun Nov 19 09:45:33 2023 +0800

RISC-V: Fix bug of tuple move splitter

PR target/112561

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_tuple_move): Fix bug.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr112561.c: New test.

[Bug target/96253] decimal floating point missing on ARM

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96253

Andrew Pinski  changed:

   What|Removed |Added

 Target|aarch64, arm|arm

--- Comment #3 from Andrew Pinski  ---
aarch64 support was added in GCC 13 (via r13-679-gafd82c104b1038 and a few
others) .

[Bug other/103487] "paranoia" Makefile target in gcc/Makefile fails to build

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103487

Andrew Pinski  changed:

   What|Removed |Added

   Last reconfirmed||2023-11-19
 CC|rth at gcc dot gnu.org |
 Ever confirmed|0   |1
 Status|UNCONFIRMED |NEW

--- Comment #1 from Andrew Pinski  ---
Confirmed.

Here is the begining of the fixes, there are more that needs to be done:
```
diff --git a/contrib/paranoia.cc b/contrib/paranoia.cc
index a7821c42603..2955e3619dd 100644
--- a/contrib/paranoia.cc
+++ b/contrib/paranoia.cc
@@ -132,7 +132,6 @@ lines
  I'm doing this in C++ so that I can do operator overloading and not
  have to modify so damned much of the existing code.  */

-  extern "C" {
 #include 
 #include 
 #include 
@@ -151,6 +150,8 @@ lines

 #include "ansidecl.h"
 #include "auto-host.h"
+#include "system.h"
+#include "coretypes.h"
 #include "hwint.h"

 #undef EXTRA_MODES_FILE
@@ -169,13 +170,8 @@ lines
 };
 #undef DEFTREECODE

-#define class klass
-
 #include "real.h"

-#undef class
-  }
-
 /* We never produce signals from the library.  Thus setjmp need do nothing. 
*/
 #undef setjmp
 #define setjmp(x)  (0)
```

real_from_integer for an example now takes signop instead of an int:
/home/apinski/src/upstream-gcc-match/gcc/gcc/../contrib/paranoia.cc:254:41:
error: invalid conversion from ‘int’ to ‘signop’ [-fpermissive]

and even the number of arguments has been changed:
/home/apinski/src/upstream-gcc-match/gcc/gcc/../contrib/paranoia.cc:254:21:
error: too many arguments to function ‘void real_from_integer(real_value*,
format_helper, const wide_int_ref&, signop)’

machine_mode is now not just an enum.

And a few others.

[Bug tree-optimization/112616] New: wrong code at -O{s,2,3} on x86_64-linux-gnu

2023-11-18 Thread zhendong.su at inf dot ethz.ch via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112616

Bug ID: 112616
   Summary: wrong code at -O{s,2,3} on x86_64-linux-gnu
   Product: gcc
   Version: unknown
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: zhendong.su at inf dot ethz.ch
  Target Milestone: ---

It appears to be a regression from 9.*, and affects 10.* and later.

Compiler Explorer: https://godbolt.org/z/GxdzdqTjd


[510] % gcctk -v
Using built-in specs.
COLLECT_GCC=gcctk
COLLECT_LTO_WRAPPER=/local/suz-local/software/local/gcc-trunk/libexec/gcc/x86_64-pc-linux-gnu/14.0.0/lto-wrapper
Target: x86_64-pc-linux-gnu
Configured with: ../gcc-trunk/configure --disable-bootstrap
--enable-checking=yes --prefix=/local/suz-local/software/local/gcc-trunk
--enable-sanitizers --enable-languages=c,c++ --disable-werror --enable-multilib
Thread model: posix
Supported LTO compression algorithms: zlib
gcc version 14.0.0 20231118 (experimental) (GCC) 
[511] % 
[511] % gcctk -O1 small.c; ./a.out
[512] % 
[512] % gcctk -O3 small.c
[513] % ./a.out
Segmentation fault
[514] % 
[514] % cat small.c
unsigned a;
int b, d, e, f = 2, g, h = 1, *i = &b;
volatile int c = 1;
static int *o() {
  long m = ~a;
  int j = f / b, k = f - 1, n = m << -1 / ~g / k;
  if (j && n)
c;
  return &e;
}
static long p() {
  int *q = 0, **r = &q;
  if (c) {
*i = h;
*r = o();
  }
  return *q;
}
int main() {
  p();
  int *l = 0;
  if (d)
c = *l;
  return 0;
}

[Bug rtl-optimization/112568] [14 Regression] Miscompilation of radeonsi (mesa) with -march=raptorlake (-mavx) since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread kocelfc at tutanota dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112568

--- Comment #18 from Kostadin Shishmanov  ---
(In reply to Andrew Pinski from comment #17)
> Can you try the patch in
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572#c18 ?

The patch does fix it.

[Bug middle-end/112572] [14 regression] LLVM miscompiled since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112572

--- Comment #25 from Andrew Pinski  ---
*** Bug 112568 has been marked as a duplicate of this bug. ***

[Bug rtl-optimization/112568] [14 Regression] Miscompilation of radeonsi (mesa) with -march=raptorlake (-mavx) since r14-5355-g3cd3a09b3f91a1

2023-11-18 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112568

Andrew Pinski  changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |DUPLICATE

--- Comment #19 from Andrew Pinski  ---
Even though this is an older bug, pr 112572 has more analysis of what is going
wrong so marking as a dup.

*** This bug has been marked as a duplicate of bug 112572 ***