[gcc r15-1906] c++: Implement C++26 CWG2819 - Allow cv void * null pointer value conversion to object types in cons

2024-07-09 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:8eab5064d54f41054b6a50d233a1a78a935b1c2a

commit r15-1906-g8eab5064d54f41054b6a50d233a1a78a935b1c2a
Author: Jakub Jelinek 
Date:   Tue Jul 9 09:37:16 2024 +0200

c++: Implement C++26 CWG2819 - Allow cv void * null pointer value 
conversion to object types in constant expressions

The following patch implements CWG2819 (which wasn't a DR because
it changes behavior of C++26 only).

2024-07-09  Jakub Jelinek  

* constexpr.cc (cxx_eval_constant_expression): CWG2819 - Allow
cv void * null pointer value conversion to object types in constant
expressions.

* g++.dg/cpp26/constexpr-voidptr3.C: New test.
* g++.dg/cpp0x/constexpr-cast2.C: Adjust expected diagnostics for
C++26.
* g++.dg/cpp0x/constexpr-cast4.C: Likewise.

Diff:
---
 gcc/cp/constexpr.cc | 37 ++---
 gcc/testsuite/g++.dg/cpp0x/constexpr-cast2.C|  4 +--
 gcc/testsuite/g++.dg/cpp0x/constexpr-cast4.C|  1 -
 gcc/testsuite/g++.dg/cpp26/constexpr-voidptr3.C | 13 +
 4 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 0cdac0af7de0..14bbdea2546d 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -8157,10 +8157,13 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
|| DECL_NAME (decl) == heap_vec_uninit_identifier))
  /* OK */;
/* P2738 (C++26): a conversion from a prvalue P of type "pointer to
-  cv void" to a pointer-to-object type T unless P points to an
-  object whose type is similar to T.  */
+  cv void" to a pointer-to-object type T unless P is a null
+  pointer value or points to an object whose type is similar to
+  T.  */
else if (cxx_dialect > cxx23)
  {
+   if (integer_zerop (sop))
+ return build_int_cst (type, 0);
r = cxx_fold_indirect_ref (ctx, loc, TREE_TYPE (type), sop);
if (r)
  {
@@ -8169,26 +8172,16 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
  }
if (!ctx->quiet)
  {
-   if (TREE_CODE (sop) == ADDR_EXPR)
- {
-   auto_diagnostic_group d;
-   error_at (loc, "cast from %qT is not allowed in a "
- "constant expression because "
- "pointed-to type %qT is not similar to %qT",
- TREE_TYPE (op), TREE_TYPE (TREE_TYPE (sop)),
- TREE_TYPE (type));
-   tree obj = build_fold_indirect_ref (sop);
-   inform (DECL_SOURCE_LOCATION (obj),
-   "pointed-to object declared here");
- }
-   else
- {
-   gcc_assert (integer_zerop (sop));
-   error_at (loc, "cast from %qT is not allowed in a "
- "constant expression because "
- "%qE does not point to an object",
- TREE_TYPE (op), oldop);
- }
+   gcc_assert (TREE_CODE (sop) == ADDR_EXPR);
+   auto_diagnostic_group d;
+   error_at (loc, "cast from %qT is not allowed in a "
+ "constant expression because "
+ "pointed-to type %qT is not similar to %qT",
+ TREE_TYPE (op), TREE_TYPE (TREE_TYPE (sop)),
+ TREE_TYPE (type));
+   tree obj = build_fold_indirect_ref (sop);
+   inform (DECL_SOURCE_LOCATION (obj),
+   "pointed-to object declared here");
  }
*non_constant_p = true;
return t;
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-cast2.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-cast2.C
index 3efbd92f0439..71ec08f36137 100644
--- a/gcc/testsuite/g++.dg/cpp0x/constexpr-cast2.C
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-cast2.C
@@ -5,9 +5,9 @@
 static int i;
 constexpr void *vp0 = nullptr;
 constexpr void *vpi = &i;
-constexpr int *p1 = (int *) vp0; // { dg-error "cast from .void\\*. is not 
allowed" }
+constexpr int *p1 = (int *) vp0; // { dg-error "cast from .void\\*. is not 
allowed" "" { target c++23_down } }
 constexpr int *p2 = (int *) vpi; // { dg-error "cast from .void\\*. is not 
allowed" "" { target c++23_down } }
-constexpr int *p3 = static_cast(vp0); // { dg-error "cast from 
.void\\*. is not allowed" }
+constexpr int *p3 = static_cast(vp0); // { dg-error "cast from 
.void\\*. is not allowed" "" { target c++23_down } }
 cons

[gcc r15-1907] c: Rewrite c_parser_omp_tile_sizes to use c_parser_expr_list

2024-07-09 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:4f767174b83027091f0e84b4ddb9a6370e549ffd

commit r15-1907-g4f767174b83027091f0e84b4ddb9a6370e549ffd
Author: Jakub Jelinek 
Date:   Tue Jul 9 10:45:25 2024 +0200

c: Rewrite c_parser_omp_tile_sizes to use c_parser_expr_list

The following patch simplifies c_parser_omp_tile_sizes to use
c_parser_expr_list, so that it will get CPP_EMBED parsing naturally,
without having another spot that needs to be adjusted for it.

2024-07-09  Jakub Jelinek  

* c-parser.cc (c_parser_omp_tile_sizes): Use c_parser_expr_list.

* c-c++-common/gomp/tile-11.c: Adjust expected diagnostics for c.
* c-c++-common/gomp/tile-12.c: Likewise.

Diff:
---
 gcc/c/c-parser.cc | 25 +
 gcc/testsuite/c-c++-common/gomp/tile-11.c |  6 +++---
 gcc/testsuite/c-c++-common/gomp/tile-12.c |  6 +++---
 3 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 8c4e697a4e10..12c5ed5d92c7 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -26431,24 +26431,20 @@ c_parser_omp_tile_sizes (c_parser *parser, location_t 
loc)
   if (!parens.require_open (parser))
 return error_mark_node;
 
-  do
-{
-  if (sizes && !c_parser_require (parser, CPP_COMMA, "expected %<,%>"))
-   return error_mark_node;
-
-  location_t expr_loc = c_parser_peek_token (parser)->location;
-  c_expr cexpr = c_parser_expr_no_commas (parser, NULL);
-  cexpr = convert_lvalue_to_rvalue (expr_loc, cexpr, false, true);
-  tree expr = cexpr.value;
+  vec *sizes_vec
+= c_parser_expr_list (parser, true, true, NULL, NULL, NULL, NULL);
+  sizes = build_tree_list_vec (sizes_vec);
+  release_tree_vector (sizes_vec);
 
+  for (tree s = sizes; s; s = TREE_CHAIN (s))
+{
+  tree expr = TREE_VALUE (s);
   if (expr == error_mark_node)
{
  parens.skip_until_found_close (parser);
  return error_mark_node;
}
 
-  expr = c_fully_fold (expr, false, NULL);
-
   HOST_WIDE_INT n;
   if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
  || !tree_fits_shwi_p (expr)
@@ -26457,17 +26453,14 @@ c_parser_omp_tile_sizes (c_parser *parser, location_t 
loc)
{
  c_parser_error (parser, "% argument needs positive"
  " integral constant");
- expr = integer_one_node;
+ TREE_VALUE (s) = integer_one_node;
}
-
-  sizes = tree_cons (NULL_TREE, expr, sizes);
 }
-  while (c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN));
   parens.require_close (parser);
 
   gcc_assert (sizes);
   tree c = build_omp_clause (loc, OMP_CLAUSE_SIZES);
-  OMP_CLAUSE_SIZES_LIST (c) = nreverse (sizes);
+  OMP_CLAUSE_SIZES_LIST (c) = sizes;
 
   return c;
 }
diff --git a/gcc/testsuite/c-c++-common/gomp/tile-11.c 
b/gcc/testsuite/c-c++-common/gomp/tile-11.c
index 7659fc5a072b..bf38bbe5dae6 100644
--- a/gcc/testsuite/c-c++-common/gomp/tile-11.c
+++ b/gcc/testsuite/c-c++-common/gomp/tile-11.c
@@ -21,9 +21,9 @@ test (void)
   for (int i = 0; i < 100; ++i)
 dummy (i);
 
-  #pragma omp tile sizes(1,2 /* { dg-error "expected ',' before end of line" } 
*/
-  for (int i = 0; i < 100; ++i)
-dummy (i);
+  #pragma omp tile sizes(1,2 /* { dg-error "expected '\\\)' before end of 
line" "" { target c } } */
+  for (int i = 0; i < 100; ++i) /* { dg-error "not enough nested loops" "" { 
target c } } */
+dummy (i); /* { dg-error "expected ',' before end of line" "" { target c++ 
} .-2 } */
 
   #pragma omp tile sizes /* { dg-error "expected '\\\(' before end of line" } 
*/
   for (int i = 0; i < 100; ++i)
diff --git a/gcc/testsuite/c-c++-common/gomp/tile-12.c 
b/gcc/testsuite/c-c++-common/gomp/tile-12.c
index 8408d53b641e..f36bbdb0ef68 100644
--- a/gcc/testsuite/c-c++-common/gomp/tile-12.c
+++ b/gcc/testsuite/c-c++-common/gomp/tile-12.c
@@ -35,9 +35,9 @@ test (void)
 dummy (i);
 
   #pragma omp parallel for
-  #pragma omp tile sizes(1,2 /* { dg-error "expected ',' before end of line" } 
*/
-  for (int i = 0; i < 100; ++i)
-dummy (i);
+  #pragma omp tile sizes(1,2 /* { dg-error "expected '\\\)' before end of 
line" "" { target c } } */
+  for (int i = 0; i < 100; ++i) /* { dg-error "not enough nested loops" "" { 
target c } } */
+dummy (i); /* { dg-error "expected ',' before end of line" "" { target c++ 
} .-2 } */
 
   #pragma omp parallel for
   #pragma omp tile sizes /* { dg-error "expected '\\\(' before end of line" } 
*/


[gcc r15-1908] i386: Correct AVX10 CPUID emulation

2024-07-09 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:298a576f00c49b8f4529ea2f87b9943a32743250

commit r15-1908-g298a576f00c49b8f4529ea2f87b9943a32743250
Author: Haochen Jiang 
Date:   Tue Jul 9 16:31:02 2024 +0800

i386: Correct AVX10 CPUID emulation

AVX10 Documentaion has specified ecx value as 0 for AVX10 version and
vector size under 0x24 subleaf. Although for ecx=1, the bits are all
reserved for now, we still need to specify ecx as 0 to avoid dirty
value in ecx.

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features): Correct
AVX10 CPUID emulation to specify ecx value.

Diff:
---
 gcc/common/config/i386/cpuinfo.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 936039725ab6..2ae77d335d24 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -998,10 +998,10 @@ get_available_features (struct __processor_model 
*cpu_model,
}
 }
 
-  /* Get Advanced Features at level 0x24 (eax = 0x24).  */
+  /* Get Advanced Features at level 0x24 (eax = 0x24, ecx = 0).  */
   if (avx10_set && max_cpuid_level >= 0x24)
 {
-  __cpuid (0x24, eax, ebx, ecx, edx);
+  __cpuid_count (0x24, 0, eax, ebx, ecx, edx);
   version = ebx & 0xff;
   if (ebx & bit_AVX10_256)
switch (version)


[gcc r14-10397] i386: Correct AVX10 CPUID emulation

2024-07-09 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:74c15cb93b3830fee79f75805329d4299ff4a2f0

commit r14-10397-g74c15cb93b3830fee79f75805329d4299ff4a2f0
Author: Haochen Jiang 
Date:   Tue Jul 9 16:31:02 2024 +0800

i386: Correct AVX10 CPUID emulation

AVX10 Documentaion has specified ecx value as 0 for AVX10 version and
vector size under 0x24 subleaf. Although for ecx=1, the bits are all
reserved for now, we still need to specify ecx as 0 to avoid dirty
value in ecx.

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features): Correct
AVX10 CPUID emulation to specify ecx value.

Diff:
---
 gcc/common/config/i386/cpuinfo.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 017a952a5db0..56427474b7be 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -1014,10 +1014,10 @@ get_available_features (struct __processor_model 
*cpu_model,
}
 }
 
-  /* Get Advanced Features at level 0x24 (eax = 0x24).  */
+  /* Get Advanced Features at level 0x24 (eax = 0x24, ecx = 0).  */
   if (avx10_set && max_cpuid_level >= 0x24)
 {
-  __cpuid (0x24, eax, ebx, ecx, edx);
+  __cpuid_count (0x24, 0, eax, ebx, ecx, edx);
   version = ebx & 0xff;
   if (ebx & bit_AVX10_256)
switch (version)


[gcc r14-10398] Arm: Fix ldrd offset range [PR115153]

2024-07-09 Thread Wilco Dijkstra via Gcc-cvs
https://gcc.gnu.org/g:83332e3f808b146ca06dbc6a91d15bd3e5650658

commit r14-10398-g83332e3f808b146ca06dbc6a91d15bd3e5650658
Author: Wilco Dijkstra 
Date:   Fri Jul 5 17:31:25 2024 +0100

Arm: Fix ldrd offset range [PR115153]

The valid offset range of LDRD in arm_legitimate_index_p is increased to
-1024..1020 if NEON is enabled since VALID_NEON_DREG_MODE includes DImode.
Fix this by moving the LDRD check earlier.

gcc:
PR target/115153
* config/arm/arm.cc (arm_legitimate_index_p): Move LDRD case before
NEON.
(thumb2_legitimate_index_p): Update comments.
(output_move_neon): Use DFmode for vldr/vstr and non-checking
adjust_address.

gcc/testsuite:
PR target/115153
* gcc.target/arm/pr115153.c: Add new test.
* lib/target-supports.exp: Add arm_arch_v7ve_neon target support.

(cherry picked from commit 44e5ecfd261afe72aa04eba4bf1a9ec782579cab)

Diff:
---
 gcc/config/arm/arm.cc   | 59 +
 gcc/testsuite/gcc.target/arm/pr115153.c | 16 +
 gcc/testsuite/lib/target-supports.exp   |  2 ++
 3 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index b8c32db0a1d7..912f2c315769 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -8852,6 +8852,28 @@ arm_legitimate_index_p (machine_mode mode, rtx index, 
RTX_CODE outer,
&& INTVAL (index) > -1024
&& (INTVAL (index) & 3) == 0);
 
+  if (arm_address_register_rtx_p (index, strict_p)
+  && (GET_MODE_SIZE (mode) <= 4))
+return 1;
+
+  /* This handles DFmode only if !TARGET_HARD_FLOAT.  */
+  if (mode == DImode || mode == DFmode)
+{
+  if (code == CONST_INT)
+   {
+ HOST_WIDE_INT val = INTVAL (index);
+
+ /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
+If vldr is selected it uses arm_coproc_mem_operand.  */
+ if (TARGET_LDRD)
+   return val > -256 && val < 256;
+ else
+   return val > -4096 && val < 4092;
+   }
+
+  return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
+}
+
   /* For quad modes, we restrict the constant offset to be slightly less
  than what the instruction format permits.  We do this because for
  quad mode moves, we will actually decompose them into two separate
@@ -8864,7 +8886,7 @@ arm_legitimate_index_p (machine_mode mode, rtx index, 
RTX_CODE outer,
&& (INTVAL (index) & 3) == 0);
 
   /* We have no such constraint on double mode offsets, so we permit the
- full range of the instruction format.  */
+ full range of the instruction format.  Note DImode is included here.  */
   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
 return (code == CONST_INT
&& INTVAL (index) < 1024
@@ -8877,27 +8899,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, 
RTX_CODE outer,
&& INTVAL (index) > -1024
&& (INTVAL (index) & 3) == 0);
 
-  if (arm_address_register_rtx_p (index, strict_p)
-  && (GET_MODE_SIZE (mode) <= 4))
-return 1;
-
-  if (mode == DImode || mode == DFmode)
-{
-  if (code == CONST_INT)
-   {
- HOST_WIDE_INT val = INTVAL (index);
-
- /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
-If vldr is selected it uses arm_coproc_mem_operand.  */
- if (TARGET_LDRD)
-   return val > -256 && val < 256;
- else
-   return val > -4096 && val < 4092;
-   }
-
-  return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
-}
-
   if (GET_MODE_SIZE (mode) <= 4
   && ! (arm_arch4
&& (mode == HImode
@@ -9000,7 +9001,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, 
int strict_p)
&& (INTVAL (index) & 3) == 0);
 
   /* We have no such constraint on double mode offsets, so we permit the
- full range of the instruction format.  */
+ full range of the instruction format.  Note DImode is included here.  */
   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
 return (code == CONST_INT
&& INTVAL (index) < 1024
@@ -9011,6 +9012,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, 
int strict_p)
   && (GET_MODE_SIZE (mode) <= 4))
 return 1;
 
+  /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE.  */
   if (mode == DImode || mode == DFmode)
 {
   if (code == CONST_INT)
@@ -20859,10 +20861,9 @@ output_move_neon (rtx *operands)
int overlap = -1;
for (i = 0; i < nregs; i++)
  {
-   /* We're only using DImode here because it's a convenient
-  size.  */
-   ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
-   ops[1] = adjust_address (mem, DImode, 8 * i);
+   /* Use DFmode for vldr/vstr.  */
+   ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 

[gcc r14-10399] Aarch64, bugfix: Fix NEON bigendian addp intrinsic [PR114890]

2024-07-09 Thread Wilco Dijkstra via Gcc-cvs
https://gcc.gnu.org/g:72753ec82076d15443c32aac88a8c0fa0ab4bc2f

commit r14-10399-g72753ec82076d15443c32aac88a8c0fa0ab4bc2f
Author: Alfie Richards 
Date:   Thu Jul 4 09:09:19 2024 +0200

Aarch64, bugfix: Fix NEON bigendian addp intrinsic [PR114890]

This change removes code that switches the operands in bigendian mode 
erroneously.
This fixes the related test also.

gcc/ChangeLog:

PR target/114890
* config/aarch64/aarch64-simd.md: Remove bigendian operand swap.

gcc/testsuite/ChangeLog:

PR target/114890
* gcc.target/aarch64/vector_intrinsics_asm.c: Remove xfail.

(cherry picked from commit 11049cdf204bc96bc407e5dd44ed3b8a492f405a)

Diff:
---
 gcc/config/aarch64/aarch64-simd.md |   2 -
 .../gcc.target/aarch64/vector_intrinsics_asm.c | 371 +
 2 files changed, 371 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index f8bb973a278c..33ab0741e87c 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7363,8 +7363,6 @@
   nunits /= 2;
 rtx par_even = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
 rtx par_odd = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
-if (BYTES_BIG_ENDIAN)
-  std::swap (operands[1], operands[2]);
 emit_insn (gen_aarch64_addp_insn (operands[0], operands[1],
operands[2], par_even, par_odd));
 DONE;
diff --git a/gcc/testsuite/gcc.target/aarch64/vector_intrinsics_asm.c 
b/gcc/testsuite/gcc.target/aarch64/vector_intrinsics_asm.c
new file mode 100644
index ..e3dcd0830c84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vector_intrinsics_asm.c
@@ -0,0 +1,371 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include "arm_neon.h"
+
+// SIGNED VADD INTRINSICS
+
+/*
+**test_vadd_s8:
+** addpv0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
+ int8x8_t v3 = vpadd_s8(v1, v2);
+ return v3;
+}
+
+/*
+**test_vadd_s16:
+**addp v0\.4h, v0\.4h, v1\.4h
+**ret
+*/
+int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
+ int16x4_t v3 = vpadd_s16(v1, v2);
+ return v3;
+}
+
+/*
+**test_vadd_s32:
+** addpv0\.2s, v0\.2s, v1\.2s
+** ret
+*/
+int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
+ int32x2_t v3 = vpadd_s32(v1, v2);
+ return v3;
+}
+
+/*
+**test_vaddq_s8:
+**...
+** addpv0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
+ int8x16_t v3 = vpaddq_s8(v1, v2);
+ return v3;
+}
+
+/*
+**test_vaddq_s16:
+**...
+** addpv0\.8h, v0\.8h, v1\.8h
+** ret
+*/
+int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
+ int16x8_t v3 = vpaddq_s16(v1, v2);
+ return v3;
+}
+
+/*
+**test_vaddq_s32:
+**...
+** addpv0\.4s, v0\.4s, v1\.4s
+** ret
+*/
+int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) {
+ int32x4_t v3 = vpaddq_s32(v1, v2);
+ return v3;
+}
+
+/*
+**test_vaddq_s64:
+**...
+** addpv0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
+ int64x2_t v3 = vpaddq_s64(v1, v2);
+ return v3;
+}
+
+/*
+**test_vaddd_s64:
+**...
+** addp(d[0-9]+), v0\.2d
+** fmovx0, \1
+** ret
+*/
+int64_t test_vaddd_s64(int64x2_t v1) {
+ int64_t v2 = vpaddd_s64(v1);
+ return v2;
+}
+
+/*
+**test_vaddl_s8:
+**...
+** saddlp  v0\.4h, v0\.8b
+** ret
+*/
+int16x4_t test_vaddl_s8(int8x8_t v1) {
+ int16x4_t v2 = vpaddl_s8(v1);
+ return v2;
+}
+
+/*
+**test_vaddlq_s8:
+**...
+** saddlp  v0\.8h, v0\.16b
+** ret
+*/
+int16x8_t test_vaddlq_s8(int8x16_t v1) {
+ int16x8_t v2 = vpaddlq_s8(v1);
+ return v2;
+}
+/*
+**test_vaddl_s16:
+**...
+** saddlp  v0\.2s, v0\.4h
+** ret
+*/
+int32x2_t test_vaddl_s16(int16x4_t v1) {
+ int32x2_t v2 = vpaddl_s16(v1);
+ return v2;
+}
+
+/*
+**test_vaddlq_s16:
+**...
+** saddlp  v0\.4s, v0\.8h
+** ret
+*/
+int32x4_t test_vaddlq_s16(int16x8_t v1) {
+ int32x4_t v2 = vpaddlq_s16(v1);
+ return v2;
+}
+
+/*
+**test_vaddl_s32:
+**...
+** saddlp  v0\.1d, v0\.2s
+** ret
+*/
+int64x1_t test_vaddl_s32(int32x2_t v1) {
+ int64x1_t v2 = vpaddl_s32(v1);
+ return v2;
+}
+
+/*
+**test_vaddlq_s32:
+**...
+** saddlp  v0\.2d, v0\.4s
+** ret
+*/
+int64x2_t test_vaddlq_s32(int32x4_t v1) {
+ int64x2_t v2 = vpaddlq_s32(v1);
+ return v2;
+}
+
+// UNSIGNED VADD INTRINSICS
+
+/*
+**test_vadd_u8:
+**...
+** addpv0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
+ uint8x8_t v3 = vpadd_u8(v1, v2);
+ return v3;
+}
+
+/*
+**test_vadd_u16:
+**...
+** addpv0\.4h, v0\.4h, v1\.4h
+** ret
+*/
+uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
+ uint16x4_t v3 = vpadd_u16(v1, v2);
+ return v3;
+}
+
+/*
+**test_vadd_u32:
+**...
+** addpv0\

[gcc r11-11564] aarch64: PR target/115457 Implement missing __ARM_FEATURE_BF16 macro

2024-07-09 Thread Kyrylo Tkachov via Gcc-cvs
https://gcc.gnu.org/g:d32cfe3352f3863325f8452e83400063b1e71e5b

commit r11-11564-gd32cfe3352f3863325f8452e83400063b1e71e5b
Author: Kyrylo Tkachov 
Date:   Thu Jun 27 16:10:41 2024 +0530

aarch64: PR target/115457 Implement missing __ARM_FEATURE_BF16 macro

The ACLE asks the user to test for __ARM_FEATURE_BF16 before using the
 header but GCC doesn't set this up.
LLVM does, so this is an inconsistency between the compilers.

This patch enables that macro for TARGET_BF16_FP.
Bootstrapped and tested on aarch64-none-linux-gnu.

gcc/

PR target/115457
* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins):
Define __ARM_FEATURE_BF16 for TARGET_BF16_FP.

gcc/testsuite/

PR target/115457
* gcc.target/aarch64/acle/bf16_feature.c: New test.

Signed-off-by: Kyrylo Tkachov 
(cherry picked from commit c10942134fa759843ac1ed1424b86fcb8e6368ba)

Diff:
---
 gcc/config/aarch64/aarch64-c.c   |  2 ++
 gcc/testsuite/gcc.target/aarch64/acle/bf16_feature.c | 10 ++
 2 files changed, 12 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 05869463e4ba..f6d90affd374 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -200,6 +200,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
"__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile);
   aarch64_def_or_undef (TARGET_BF16_FP,
"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
+  aarch64_def_or_undef (TARGET_BF16_FP,
+   "__ARM_FEATURE_BF16", pfile);
   aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile);
 
   /* Not for ACLE, but required to keep "float.h" correct if we switch
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/bf16_feature.c 
b/gcc/testsuite/gcc.target/aarch64/acle/bf16_feature.c
new file mode 100644
index ..96584b4b9887
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/bf16_feature.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+
+#pragma GCC target "+bf16"
+#ifndef __ARM_FEATURE_BF16
+#error "__ARM_FEATURE_BF16 is not defined but should be!"
+#endif
+
+void
+foo (void) {}
+


[gcc r11-11565] aarch64: PR target/115475 Implement missing __ARM_FEATURE_SVE_BF16 macro

2024-07-09 Thread Kyrylo Tkachov via Gcc-cvs
https://gcc.gnu.org/g:ee69d6e1e3bed8c3799c29fad3299bfd2e14f64e

commit r11-11565-gee69d6e1e3bed8c3799c29fad3299bfd2e14f64e
Author: Kyrylo Tkachov 
Date:   Fri Jun 28 13:22:37 2024 +0530

aarch64: PR target/115475 Implement missing __ARM_FEATURE_SVE_BF16 macro

The ACLE requires __ARM_FEATURE_SVE_BF16 to be enabled when SVE and BF16
and the associated intrinsics are available.
GCC does support the required intrinsics for TARGET_SVE_BF16 so define
this macro too.

Bootstrapped and tested on aarch64-none-linux-gnu.

gcc/

PR target/115475
* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins):
Define __ARM_FEATURE_SVE_BF16 for TARGET_SVE_BF16.

gcc/testsuite/

PR target/115475
* gcc.target/aarch64/acle/bf16_sve_feature.c: New test.

Signed-off-by: Kyrylo Tkachov 
(cherry picked from commit 6492c7130d6ae9992298fc3d072e2589d1131376)

Diff:
---
 gcc/config/aarch64/aarch64-c.c   |  3 +++
 gcc/testsuite/gcc.target/aarch64/acle/bf16_sve_feature.c | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index f6d90affd374..ba732e4d877c 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -202,6 +202,9 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
   aarch64_def_or_undef (TARGET_BF16_FP,
"__ARM_FEATURE_BF16", pfile);
+  aarch64_def_or_undef (TARGET_SVE_BF16,
+   "__ARM_FEATURE_SVE_BF16", pfile);
+
   aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile);
 
   /* Not for ACLE, but required to keep "float.h" correct if we switch
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/bf16_sve_feature.c 
b/gcc/testsuite/gcc.target/aarch64/acle/bf16_sve_feature.c
new file mode 100644
index ..cb3ddac71a32
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/bf16_sve_feature.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+
+#pragma GCC target "+sve+bf16"
+#ifndef __ARM_FEATURE_SVE_BF16
+#error "__ARM_FEATURE_SVE_BF16 is not defined but should be!"
+#endif
+
+void
+foo (void) {}
+


[gcc r15-1909] RISC-V: testsuite: Properly gate LTO tests

2024-07-09 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:0717d50fc4ff983b79093bdef43b04e4584cc3cd

commit r15-1909-g0717d50fc4ff983b79093bdef43b04e4584cc3cd
Author: Christoph Müllner 
Date:   Fri Jul 5 09:53:34 2024 +0200

RISC-V: testsuite: Properly gate LTO tests

There are two test cases with the following skip directive:
  dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" }
This reads as: skip if both '-flto' and '-fno-fat-lto-objects'
are present.  This is not the case if only '-flto' is present.

Since both tests depend on instruction sequences (one does
check-function-bodies the other tests for an assembler error
message), they won't work reliably with fat LTO objects.

Let's change the skip line to gate the test on '-flto'
to avoid failing tests like this:

FAIL: gcc.target/riscv/interrupt-misaligned.c   -O2 -flto   
check-function-bodies interrupt
FAIL: gcc.target/riscv/interrupt-misaligned.c   -O2 -flto 
-flto-partition=none   check-function-bodies interrupt
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto   (test for errors, line 10)
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto   (test for errors, line 9)
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto -flto-partition=none   (test 
for errors, line 10)
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto -flto-partition=none   (test 
for errors, line 9)

gcc/testsuite/ChangeLog:

* gcc.target/riscv/interrupt-misaligned.c: Remove
"-fno-fat-lto-objects" from skip condition.
* gcc.target/riscv/pr93202.c: Likewise.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c | 2 +-
 gcc/testsuite/gcc.target/riscv/pr93202.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c 
b/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
index b5f8e6c2bbef..912f180e4d65 100644
--- a/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=rv64gc -mabi=lp64d -fno-schedule-insns 
-fno-schedule-insns2" } */
-/* { dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" } } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
 
 /*  Make sure no stack offset are misaligned.
 **  interrupt:
diff --git a/gcc/testsuite/gcc.target/riscv/pr93202.c 
b/gcc/testsuite/gcc.target/riscv/pr93202.c
index 5501191ea52c..5de003fac421 100644
--- a/gcc/testsuite/gcc.target/riscv/pr93202.c
+++ b/gcc/testsuite/gcc.target/riscv/pr93202.c
@@ -1,7 +1,7 @@
 /* PR inline-asm/93202 */
 /* { dg-do compile { target fpic } } */
 /* { dg-options "-fpic" } */
-/* { dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" } } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
 
 void
 foo (void)


[gcc] Created branch 'matz/heads/x86-ssw' in namespace 'refs/users'

2024-07-09 Thread Michael Matz via Gcc-cvs
The branch 'matz/heads/x86-ssw' was created in namespace 'refs/users' pointing 
to:

 c27b30552e6c... gomp: testsuite: improve compatibility of bad-array-section


[gcc(refs/users/matz/heads/x86-ssw)] x86: implement separate shrink wrapping

2024-07-09 Thread Michael Matz via Gcc-cvs
https://gcc.gnu.org/g:eb94eb73cf3993c1d544e6eb8c4dcb671f215b25

commit eb94eb73cf3993c1d544e6eb8c4dcb671f215b25
Author: Michael Matz 
Date:   Sun Jun 30 03:52:39 2024 +0200

x86: implement separate shrink wrapping

Diff:
---
 gcc/config/i386/i386.cc | 581 +++-
 gcc/config/i386/i386.h  |   2 +
 2 files changed, 533 insertions(+), 50 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4b6b665e5997..33e69e96008d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -6970,7 +6970,7 @@ ix86_compute_frame_layout (void)
 }
 
   frame->save_regs_using_mov
-= TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
+= (TARGET_PROLOGUE_USING_MOVE || flag_shrink_wrap_separate) && 
m->use_fast_prologue_epilogue;
 
   /* Skip return address and error code in exception handler.  */
   offset = INCOMING_FRAME_SP_OFFSET;
@@ -7120,7 +7120,8 @@ ix86_compute_frame_layout (void)
   /* Size prologue needs to allocate.  */
   to_allocate = offset - frame->sse_reg_save_offset;
 
-  if ((!to_allocate && frame->nregs <= 1)
+  if ((!to_allocate && frame->nregs <= 1
+   && !flag_shrink_wrap_separate)
   || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x8000))
/* If static stack checking is enabled and done with probes,
  the registers need to be saved before allocating the frame.  */
@@ -7417,6 +7418,8 @@ ix86_emit_save_regs (void)
   int regno;
   rtx_insn *insn;
 
+  gcc_assert (!crtl->shrink_wrapped_separate);
+
   if (!TARGET_APX_PUSH2POP2
   || !ix86_can_use_push2pop2 ()
   || cfun->machine->func_type != TYPE_NORMAL)
@@ -7589,7 +7592,8 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   {
-ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+   if (!cfun->machine->reg_wrapped_separately[regno])
+ ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
cfa_offset -= UNITS_PER_WORD;
   }
 }
@@ -7604,7 +7608,8 @@ ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT 
cfa_offset)
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   {
-   ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
+   if (!cfun->machine->reg_wrapped_separately[regno])
+ ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
cfa_offset -= GET_MODE_SIZE (V4SFmode);
   }
 }
@@ -9089,6 +9094,7 @@ ix86_expand_prologue (void)
= frame.sse_reg_save_offset - frame.reg_save_offset;
 
   gcc_assert (int_registers_saved);
+  gcc_assert (!m->frame_alloc_separately);
 
   /* No need to do stack checking as the area will be immediately
 written.  */
@@ -9106,6 +9112,7 @@ ix86_expand_prologue (void)
   && flag_stack_clash_protection
   && !ix86_target_stack_probe ())
 {
+  gcc_assert (!m->frame_alloc_separately);
   ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
   allocate = 0;
 }
@@ -9116,6 +9123,7 @@ ix86_expand_prologue (void)
 {
   const HOST_WIDE_INT probe_interval = get_probe_interval ();
 
+  gcc_assert (!m->frame_alloc_separately);
   if (STACK_CHECK_MOVING_SP)
{
  if (crtl->is_leaf
@@ -9172,9 +9180,16 @@ ix86_expand_prologue (void)
   else if (!ix86_target_stack_probe ()
   || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
 {
-  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
-GEN_INT (-allocate), -1,
-m->fs.cfa_reg == stack_pointer_rtx);
+  if (!m->frame_alloc_separately)
+   pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+  GEN_INT (-allocate), -1,
+  m->fs.cfa_reg == stack_pointer_rtx);
+  else
+   {
+ if (m->fs.cfa_reg == stack_pointer_rtx)
+   m->fs.cfa_offset -= allocate;
+ m->fs.sp_offset += allocate;
+   }
 }
   else
 {
@@ -9184,6 +9199,8 @@ ix86_expand_prologue (void)
   bool eax_live = ix86_eax_live_at_start_p ();
   bool r10_live = false;
 
+  gcc_assert (!m->frame_alloc_separately);
+
   if (TARGET_64BIT)
 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
 
@@ -9338,6 +9355,7 @@ ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
   struct machine_function *m = cfun->machine;
   rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
 
+  gcc_assert (!m->reg_wrapped_separately[REGNO (reg)]);
   ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
   m->fs.sp_offset -= UNITS_PER_WORD;
 
@@ -9396,6 +9414,9 @@ ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, 
bool ppx_p = false)
   const int offset = UNITS_PER_WORD * 2;
   rtx_insn

[gcc(refs/users/matz/heads/x86-ssw)] x86-ssw: don't clobber flags

2024-07-09 Thread Michael Matz via Gcc-cvs
https://gcc.gnu.org/g:5a9a70a5837aba373e3f36a89943c52e37a19809

commit 5a9a70a5837aba373e3f36a89943c52e37a19809
Author: Michael Matz 
Date:   Tue Jul 9 02:20:10 2024 +0200

x86-ssw: don't clobber flags

Diff:
---
 gcc/config/i386/i386.cc | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 33e69e96008d..734802dbed4f 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -10878,8 +10878,12 @@ ix86_components_for_bb (basic_block bb)
 }
 
 static void
-ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
+ix86_disqualify_components (sbitmap components, edge e, sbitmap, bool)
 {
+  /* If the flags are needed at the start of e->dest then we can't insert
+ our stack adjustment insns (they default to flag-clobbering add/sub).  */
+  if (bitmap_bit_p (DF_LIVE_IN (e->dest), FLAGS_REG))
+bitmap_clear_bit (components, SW_FRAME);
 }
 
 static void


[gcc(refs/users/matz/heads/x86-ssw)] x86-ssw: disable if DRAP reg is needed

2024-07-09 Thread Michael Matz via Gcc-cvs
https://gcc.gnu.org/g:f917195f8a4e1767e89ebb0c875abcbe4dcf97ff

commit f917195f8a4e1767e89ebb0c875abcbe4dcf97ff
Author: Michael Matz 
Date:   Tue Jul 9 02:37:55 2024 +0200

x86-ssw: disable if DRAP reg is needed

Diff:
---
 gcc/config/i386/i386.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 734802dbed4f..4aa37c2ffeaa 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -10805,7 +10805,8 @@ ix86_get_separate_components (void)
   sbitmap components;
 
   ix86_finalize_stack_frame_flags ();
-  if (!frame->save_regs_using_mov)
+  if (!frame->save_regs_using_mov
+  || crtl->drap_reg)
 return NULL;
 
   components = sbitmap_alloc (NCOMPONENTS);


[gcc(refs/users/matz/heads/x86-ssw)] x86-ssw: fix testcases

2024-07-09 Thread Michael Matz via Gcc-cvs
https://gcc.gnu.org/g:c5a72cc80939e42518f4021e0640d29c8b8495a7

commit c5a72cc80939e42518f4021e0640d29c8b8495a7
Author: Michael Matz 
Date:   Tue Jul 9 04:27:46 2024 +0200

x86-ssw: fix testcases

the separate-shrink-wrap infrastructure sometimes
considers components as handled when they aren't in fact
handled (e.g. never calling any emit_prologue_components or
emit_epilogue_components hooks for the component in question).

So track stuff ourselves.

Diff:
---
 gcc/config/i386/i386.cc | 34 --
 gcc/config/i386/i386.h  |  1 +
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4aa37c2ffeaa..23226d204a09 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -6970,7 +6970,7 @@ ix86_compute_frame_layout (void)
 }
 
   frame->save_regs_using_mov
-= (TARGET_PROLOGUE_USING_MOVE || flag_shrink_wrap_separate) && 
m->use_fast_prologue_epilogue;
+= (TARGET_PROLOGUE_USING_MOVE /*|| flag_shrink_wrap_separate*/) && 
m->use_fast_prologue_epilogue;
 
   /* Skip return address and error code in exception handler.  */
   offset = INCOMING_FRAME_SP_OFFSET;
@@ -7121,7 +7121,7 @@ ix86_compute_frame_layout (void)
   to_allocate = offset - frame->sse_reg_save_offset;
 
   if ((!to_allocate && frame->nregs <= 1
-   && !flag_shrink_wrap_separate)
+   /*&& !flag_shrink_wrap_separate*/)
   || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x8000))
/* If static stack checking is enabled and done with probes,
  the registers need to be saved before allocating the frame.  */
@@ -7418,7 +7418,7 @@ ix86_emit_save_regs (void)
   int regno;
   rtx_insn *insn;
 
-  gcc_assert (!crtl->shrink_wrapped_separate);
+  gcc_assert (!cfun->machine->anything_separately);
 
   if (!TARGET_APX_PUSH2POP2
   || !ix86_can_use_push2pop2 ()
@@ -8974,7 +8974,7 @@ ix86_expand_prologue (void)
   if (!int_registers_saved)
 {
   /* If saving registers via PUSH, do so now.  */
-  if (!frame.save_regs_using_mov)
+  if (!frame.save_regs_using_mov && !m->anything_separately)
{
  ix86_emit_save_regs ();
  int_registers_saved = true;
@@ -9489,7 +9489,7 @@ ix86_emit_restore_regs_using_pop (bool ppx_p)
 {
   unsigned int regno;
 
-  gcc_assert (!crtl->shrink_wrapped_separate);
+  gcc_assert (!cfun->machine->anything_separately);
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
   ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
@@ -9506,7 +9506,7 @@ ix86_emit_restore_regs_using_pop2 (void)
   int loaded_regnum = 0;
   bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
 
-  gcc_assert (!crtl->shrink_wrapped_separate);
+  gcc_assert (!cfun->machine->anything_separately);
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
   {
@@ -9894,7 +9894,7 @@ ix86_expand_epilogue (int style)
   /* EH_RETURN requires the use of moves to function properly.  */
   if (crtl->calls_eh_return)
 restore_regs_via_mov = true;
-  else if (crtl->shrink_wrapped_separate)
+  else if (m->anything_separately)
 {
   gcc_assert (!TARGET_SEH);
   restore_regs_via_mov = true;
@@ -10800,13 +10800,14 @@ separate_frame_alloc_p (void)
 static sbitmap
 ix86_get_separate_components (void)
 {
-  struct machine_function *m = cfun->machine;
-  struct ix86_frame *frame = &m->frame;
+  //struct machine_function *m = cfun->machine;
+  //struct ix86_frame *frame = &m->frame;
   sbitmap components;
 
   ix86_finalize_stack_frame_flags ();
-  if (!frame->save_regs_using_mov
-  || crtl->drap_reg)
+  if (/*!frame->save_regs_using_mov
+  ||*/ crtl->drap_reg
+  || cfun->machine->func_type != TYPE_NORMAL)
 return NULL;
 
   components = sbitmap_alloc (NCOMPONENTS);
@@ -11150,6 +11151,8 @@ ix86_process_components (sbitmap components, bool 
prologue_p)
   {
if (bitmap_bit_p (components, regno))
  {
+   m->reg_wrapped_separately[regno] = true;
+   m->anything_separately = true;
if (prologue_p)
  ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
else
@@ -11161,6 +11164,8 @@ ix86_process_components (sbitmap components, bool 
prologue_p)
   {
if (bitmap_bit_p (components, regno))
  {
+   m->reg_wrapped_separately[regno] = true;
+   m->anything_separately = true;
if (prologue_p)
  ix86_emit_save_reg_using_mov (V4SFmode, regno, sse_cfa_offset);
else
@@ -11181,6 +11186,7 @@ ix86_emit_prologue_components (sbitmap components)
   if (bitmap_bit_p (components, SW_FRAME))
 {
   cfun->machine->frame_alloc_separately = true;
+  cfun->machine->anything_separately = true;
   ix86_alloc_frame ();
 }
 }
@@ -111

[gcc(refs/users/matz/heads/x86-ssw)] x86-ssw: adjust testcase

2024-07-09 Thread Michael Matz via Gcc-cvs
https://gcc.gnu.org/g:cf6d794219dd0cf2ca3601e2d6e6b9e5f497a47a

commit cf6d794219dd0cf2ca3601e2d6e6b9e5f497a47a
Author: Michael Matz 
Date:   Tue Jul 9 06:01:22 2024 +0200

x86-ssw: adjust testcase

Diff:
---
 gcc/testsuite/gcc.target/x86_64/abi/callabi/leaf-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/x86_64/abi/callabi/leaf-2.c 
b/gcc/testsuite/gcc.target/x86_64/abi/callabi/leaf-2.c
index 2a54bc89cfc2..140389626659 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/callabi/leaf-2.c
+++ b/gcc/testsuite/gcc.target/x86_64/abi/callabi/leaf-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mabi=sysv" } */
+/* { dg-options "-O2 -mabi=sysv -fno-shrink-wrap-separate" } */
 
 extern int glb1, gbl2, gbl3;


[gcc(refs/users/matz/heads/x86-ssw)] x86-ssw: precise using of moves

2024-07-09 Thread Michael Matz via Gcc-cvs
https://gcc.gnu.org/g:d213bc5e67d903143608e0a7879c2577c33ca47e

commit d213bc5e67d903143608e0a7879c2577c33ca47e
Author: Michael Matz 
Date:   Tue Jul 9 06:01:47 2024 +0200

x86-ssw: precise using of moves

we need to differ between merely not wanting to use moves
and not being able to.  When the allocated frame is too
large we can't use moves freely and hence need to disable
separate shrink wrapping.  If we don't want to use moves
by default for speed or the like but nothing else prevents
them then this is no reason to disable separate shrink wrapping.

Diff:
---
 gcc/config/i386/i386.cc | 20 +++-
 gcc/config/i386/i386.h  |  1 +
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 23226d204a09..20f4dcd61870 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -7120,9 +7120,7 @@ ix86_compute_frame_layout (void)
   /* Size prologue needs to allocate.  */
   to_allocate = offset - frame->sse_reg_save_offset;
 
-  if ((!to_allocate && frame->nregs <= 1
-   /*&& !flag_shrink_wrap_separate*/)
-  || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x8000))
+  if ((TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x8000))
/* If static stack checking is enabled and done with probes,
  the registers need to be saved before allocating the frame.  */
   || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
@@ -7135,6 +7133,12 @@ ix86_compute_frame_layout (void)
   || (flag_stack_clash_protection
  && !ix86_target_stack_probe ()
  && to_allocate > get_probe_interval ()))
+{
+  frame->cannot_use_moves = true;
+}
+
+  if ((!to_allocate && frame->nregs <= 1)
+  || frame->cannot_use_moves)
 frame->save_regs_using_mov = false;
 
   if (ix86_using_red_zone ()
@@ -10800,13 +10804,13 @@ separate_frame_alloc_p (void)
 static sbitmap
 ix86_get_separate_components (void)
 {
-  //struct machine_function *m = cfun->machine;
-  //struct ix86_frame *frame = &m->frame;
+  struct machine_function *m = cfun->machine;
+  struct ix86_frame *frame = &m->frame;
   sbitmap components;
 
   ix86_finalize_stack_frame_flags ();
-  if (/*!frame->save_regs_using_mov
-  ||*/ crtl->drap_reg
+  if (frame->cannot_use_moves
+  || crtl->drap_reg
   || cfun->machine->func_type != TYPE_NORMAL)
 return NULL;
 
@@ -10868,9 +10872,7 @@ ix86_components_for_bb (basic_block bb)
{
  need_frame = true;
  break;
-
}
-
}
 }
   if (need_frame)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index dd73687a8e2c..bda3d97ab4cf 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2645,6 +2645,7 @@ struct GTY(()) ix86_frame
   /* When save_regs_using_mov is set, emit prologue using
  move instead of push instructions.  */
   bool save_regs_using_mov;
+  bool cannot_use_moves;
 
   /* Assume without checking that:
EXPENSIVE_P = expensive_function_p (EXPENSIVE_COUNT).  */


[gcc(refs/users/matz/heads/x86-ssw)] x86-ssw: Adjust testcase

2024-07-09 Thread Michael Matz via Gcc-cvs
https://gcc.gnu.org/g:495a687dc93a58110076700f48fb57fa79026bef

commit 495a687dc93a58110076700f48fb57fa79026bef
Author: Michael Matz 
Date:   Tue Jul 9 14:26:31 2024 +0200

x86-ssw: Adjust testcase

this testcase tries to (uselessly) shrink wrap frame allocation
in f0(), and then calls the prologue expander twice emitting the
messages looked for with the dejagnu directives more times than
expected.  Just disable separate shrink wrapping here.

Diff:
---
 gcc/testsuite/gcc.dg/stack-check-5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/stack-check-5.c 
b/gcc/testsuite/gcc.dg/stack-check-5.c
index 0243147939c1..b93dabdaea1d 100644
--- a/gcc/testsuite/gcc.dg/stack-check-5.c
+++ b/gcc/testsuite/gcc.dg/stack-check-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue 
-fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 
--param stack-clash-protection-guard-size=12" } */
+/* { dg-options "-O2 -fstack-clash-protection -fno-shrink-wrap-separate 
-fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param 
stack-clash-protection-probe-interval=12 --param 
stack-clash-protection-guard-size=12" } */
 /* { dg-require-effective-target supports_stack_clash_protection } */
 /* { dg-skip-if "" { *-*-* } { "-fstack-protector*" } { "" } } */


[gcc r15-1912] arm: clean up some legacy FPA related cruft.

2024-07-09 Thread Richard Earnshaw via Gcc-cvs
https://gcc.gnu.org/g:959c168e77f2e1a02b086536c69c99f7413e08bf

commit r15-1912-g959c168e77f2e1a02b086536c69c99f7413e08bf
Author: Richard Earnshaw 
Date:   Mon Jul 8 23:14:03 2024 +0100

arm: clean up some legacy FPA related cruft.

Support for the FPA on Arm was removed after gcc-4.7, but this little
bit of crufty code was left behind.  In particular the code to support
the 'N' modifier in assembly code was left behind and this lead to a
trail of other code that depended on it, even though most of the
constants that it supported had been removed in the original cleanup.

This patch removes most of the remaining cruft and simplifies the one
bit that remains: to determine whether an RTL construct contains 0.0 we
don't need to convert it to a real value, we can simply compare it to
CONST0_RTX of the appropriate mode.

gcc/

* config/arm/arm.cc (fp_consts_initited): Delete variable.
(value_fp0): Likewise.
(init_fp_table): Delete function.
(fp_const_from_val): Likewise.
(arm_const_double_rtx): Rework to avoid converting to 
REAL_VALUE_TYPE.
(arm_print_operand, case 'N'): Make use of this case an error.

Diff:
---
 gcc/config/arm/arm.cc | 61 +--
 1 file changed, 10 insertions(+), 51 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 459b7e648aba..93993d95eb96 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -117,7 +117,6 @@ static bool arm_assemble_integer (rtx, unsigned int, int);
 static void arm_print_operand (FILE *, rtx, int);
 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 static bool arm_print_operand_punct_valid_p (unsigned char code);
-static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 static arm_cc get_arm_condition_code (rtx);
 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 static const char *output_multi_immediate (rtx *, const char *, const char *,
@@ -12822,37 +12821,12 @@ arm_cortex_m7_branch_cost (bool speed_p, bool 
predictable_p)
   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
 }
 
-static bool fp_consts_inited = false;
-
-static REAL_VALUE_TYPE value_fp0;
-
-static void
-init_fp_table (void)
-{
-  REAL_VALUE_TYPE r;
-
-  r = REAL_VALUE_ATOF ("0", DFmode);
-  value_fp0 = r;
-  fp_consts_inited = true;
-}
-
 /* Return TRUE if rtx X is a valid immediate FP constant.  */
 int
 arm_const_double_rtx (rtx x)
 {
-  const REAL_VALUE_TYPE *r;
-
-  if (!fp_consts_inited)
-init_fp_table ();
-
-  r = CONST_DOUBLE_REAL_VALUE (x);
-  if (REAL_VALUE_MINUS_ZERO (*r))
-return 0;
-
-  if (real_equal (r, &value_fp0))
-return 1;
-
-  return 0;
+  return (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT
+ && x == CONST0_RTX (GET_MODE (x)));
 }
 
 /* VFPv3 has a fairly wide range of representable immediates, formed from
@@ -19793,17 +19767,6 @@ arm_reorg (void)
 
 /* Routines to output assembly language.  */
 
-/* Return string representation of passed in real value.  */
-static const char *
-fp_const_from_val (REAL_VALUE_TYPE *r)
-{
-  if (!fp_consts_inited)
-init_fp_table ();
-
-  gcc_assert (real_equal (r, &value_fp0));
-  return "0";
-}
-
 /* OPERANDS[0] is the entire list of insns that constitute pop,
OPERANDS[1] is the base register, RETURN_PC is true iff return insn
is in the list, UPDATE is true iff the list contains explicit
@@ -24160,8 +24123,8 @@ arm_print_condition (FILE *stream)
 /* Globally reserved letters: acln
Puncutation letters currently used: @_|?().!#
Lower case letters currently used: bcdefhimpqtvwxyz
-   Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
-   Letters previously used, but now deprecated/obsolete: sWXYZ.
+   Upper case letters currently used: ABCDEFGHIJKLMOPQRSTUV
+   Letters previously used, but now deprecated/obsolete: sNWXYZ.
 
Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
 
@@ -24174,8 +24137,6 @@ arm_print_condition (FILE *stream)
in these cases the instruction pattern will take care to make sure that
an instruction containing %d will follow, thereby undoing the effects of
doing this instruction unconditionally.
-   If CODE is 'N' then X is a floating point operand that must be negated
-   before output.
If CODE is 'B' then output a bitwise inverted value of X (a const int).
If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
If CODE is 'V', then the operand must be a CONST_INT representing
@@ -24226,14 +24187,6 @@ arm_print_operand (FILE *stream, rtx x, int code)
 case '#':
   return;
 
-case 'N':
-  {
-   REAL_VALUE_TYPE r;
-   r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
-   fprintf (stream, "%s", fp_const_from_val (&r));
-  }
-  return;
-
 /* An integer or symbol address without a preceding # sign.  */
 ca

[gcc r15-1913] diagnostics: use refs rather than pointers for diagnostic_{path, context}

2024-07-09 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:2d6e6a77e840b7b93d1f18708aa08ced8b46282e

commit r15-1913-g2d6e6a77e840b7b93d1f18708aa08ced8b46282e
Author: David Malcolm 
Date:   Tue Jul 9 11:22:32 2024 -0400

diagnostics: use refs rather than pointers for diagnostic_{path,context}

Use const & rather than const * in various places where it can't be null
and can't change.

No functional change intended.

gcc/ChangeLog:
* diagnostic-path.cc: Replace "const diagnostic_path *" with
"const diagnostic_path &" throughout, and "diagnostic_context *"
with "diagnostic context &".
* diagnostic.cc (diagnostic_context::show_any_path): Pass
reference in call to print_path.
* diagnostic.h (diagnostic_context::print_path): Convert param
to a reference.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/diagnostic-path.cc | 101 -
 gcc/diagnostic.cc  |   2 +-
 gcc/diagnostic.h   |   2 +-
 3 files changed, 52 insertions(+), 53 deletions(-)

diff --git a/gcc/diagnostic-path.cc b/gcc/diagnostic-path.cc
index e470bd29fdd9..f279aead6c59 100644
--- a/gcc/diagnostic-path.cc
+++ b/gcc/diagnostic-path.cc
@@ -225,7 +225,7 @@ class path_label : public range_label
 {
  public:
   path_label (const diagnostic_context &ctxt,
- const diagnostic_path *path,
+ const diagnostic_path &path,
  unsigned start_idx)
   : m_ctxt (ctxt), m_path (path), m_start_idx (start_idx), m_effects (*this)
   {}
@@ -233,7 +233,7 @@ class path_label : public range_label
   label_text get_text (unsigned range_idx) const final override
   {
 unsigned event_idx = m_start_idx + range_idx;
-const diagnostic_event &event = m_path->get_event (event_idx);
+const diagnostic_event &event = m_path.get_event (event_idx);
 
 /* Get the description of the event, perhaps with colorization:
normally, we don't colorize within a range_label, but this
@@ -305,7 +305,7 @@ class path_label : public range_label
   const diagnostic_event &get_event (unsigned range_idx) const
   {
 unsigned event_idx = m_start_idx + range_idx;
-return m_path->get_event (event_idx);
+return m_path.get_event (event_idx);
   }
 
   const diagnostic_event *get_prev_event (unsigned range_idx) const
@@ -313,11 +313,11 @@ class path_label : public range_label
 if (m_start_idx + range_idx == 0)
   return nullptr;
 unsigned event_idx = m_start_idx + range_idx - 1;
-return &m_path->get_event (event_idx);
+return &m_path.get_event (event_idx);
   }
 
   const diagnostic_context &m_ctxt;
-  const diagnostic_path *m_path;
+  const diagnostic_path &m_path;
   unsigned m_start_idx;
   path_label_effects m_effects;
 };
@@ -508,7 +508,7 @@ struct event_range
   };
 
   event_range (const diagnostic_context &ctxt,
-  const diagnostic_path *path, unsigned start_idx,
+  const diagnostic_path &path, unsigned start_idx,
   const diagnostic_event &initial_event,
   per_thread_summary &t,
   bool show_event_links)
@@ -559,7 +559,7 @@ struct event_range
unsigned new_ev_idx,
bool check_rich_locations)
   {
-if (!can_consolidate_events (*m_path,
+if (!can_consolidate_events (m_path,
 m_initial_event, m_start_idx,
 new_ev, new_ev_idx,
 check_rich_locations))
@@ -574,7 +574,7 @@ struct event_range
   = get_per_source_line_info (exploc.line);
 const diagnostic_event *prev_event = nullptr;
 if (new_ev_idx > 0)
-  prev_event = &m_path->get_event (new_ev_idx - 1);
+  prev_event = &m_path.get_event (new_ev_idx - 1);
 const bool has_in_edge = (prev_event
  ? prev_event->connect_to_next_event_p ()
  : false);
@@ -604,7 +604,7 @@ struct event_range
   /* Print the events in this range to DC, typically as a single
  call to the printer's diagnostic_show_locus.  */
 
-  void print (diagnostic_context *dc, pretty_printer *pp,
+  void print (diagnostic_context &dc, pretty_printer *pp,
  diagnostic_source_effect_info *effect_info)
   {
 location_t initial_loc = m_initial_event.get_location ();
@@ -612,13 +612,13 @@ struct event_range
 /* Emit a span indicating the filename (and line/column) if the
line has changed relative to the last call to
diagnostic_show_locus.  */
-if (dc->m_source_printing.enabled)
+if (dc.m_source_printing.enabled)
   {
expanded_location exploc
  = linemap_client_expand_location_to_spelling_point
  (line_table, initial_loc, LOCATION_ASPECT_CARET);
-   if (exploc.file != LOCATION_FILE (dc->m_last_location))
- diagnostic_start_span (dc) (dc, exploc);
+   if (exploc.file != LOCATION_FILE (dc.m_last_location

[gcc r15-1914] i386: Implement .SAT_TRUNC for unsigned integers

2024-07-09 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:d17889dbffd5dcdb2df22d42586ac0363704e1f1

commit r15-1914-gd17889dbffd5dcdb2df22d42586ac0363704e1f1
Author: Uros Bizjak 
Date:   Tue Jul 9 17:34:25 2024 +0200

i386: Implement .SAT_TRUNC for unsigned integers

The following testcase:

unsigned short foo (unsigned int x)
{
  _Bool overflow = x > (unsigned int)(unsigned short)(-1);
  return ((unsigned short)x | (unsigned short)-overflow);
}

currently compiles (-O2) to:

foo:
xorl%eax, %eax
cmpl$65535, %edi
seta%al
negl%eax
orl %edi, %eax
ret

We can expand through ustrunc{m}{n}2 optab to use carry flag from the
comparison and generate code using SBB:

foo:
cmpl$65535, %edi
sbbl%eax, %eax
orl %edi, %eax
ret

or CMOV instruction:

foo:
movl$65535, %eax
cmpl%eax, %edi
cmovnc  %edi, %eax
ret

gcc/ChangeLog:

* config/i386/i386.md (@cmp_1): Use SWI mode iterator.
(ustruncdi2): New expander.
(ustruncsi2): Ditto.
(ustrunchiqi2): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/sattrunc-1.c: New test.

Diff:
---
 gcc/config/i386/i386.md| 112 -
 gcc/testsuite/gcc.target/i386/sattrunc-1.c |  24 +++
 2 files changed, 134 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 214cb2e239ae..e2f30695d70e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1533,8 +1533,8 @@
 
 (define_expand "@cmp_1"
   [(set (reg:CC FLAGS_REG)
-   (compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
-   (match_operand:SWI48 1 "")))])
+   (compare:CC (match_operand:SWI 0 "nonimmediate_operand")
+   (match_operand:SWI 1 "")))])
 
 (define_mode_iterator SWI1248_AVX512BWDQ_64
   [(QI "TARGET_AVX512DQ") HI
@@ -9981,6 +9981,114 @@
   DONE;
 })
 
+(define_expand "ustruncdi2"
+  [(set (match_operand:SWI124 0 "register_operand")
+   (us_truncate:DI (match_operand:DI 1 "nonimmediate_operand")))]
+  "TARGET_64BIT"
+{
+  rtx op1 = force_reg (DImode, operands[1]);
+  rtx sat = force_reg (DImode, GEN_INT (GET_MODE_MASK (mode)));
+  rtx dst;
+
+  emit_insn (gen_cmpdi_1 (op1, sat));
+
+  if (TARGET_CMOVE)
+{
+  rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+const0_rtx);
+
+  dst = force_reg (mode, operands[0]);
+  emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
+ gen_lowpart (SImode, op1),
+ gen_lowpart (SImode, sat)));
+}
+  else
+{
+  rtx msk = gen_reg_rtx (mode);
+
+  emit_insn (gen_x86_movcc_0_m1_neg (msk));
+  dst = expand_simple_binop (mode, IOR,
+gen_lowpart (mode, op1), msk,
+operands[0], 1, OPTAB_WIDEN);
+}
+
+  if (!rtx_equal_p (dst, operands[0]))
+emit_move_insn (operands[0], dst);
+  DONE;
+})
+
+(define_expand "ustruncsi2"
+  [(set (match_operand:SWI12 0 "register_operand")
+   (us_truncate:SI (match_operand:SI 1 "nonimmediate_operand")))]
+  ""
+{
+  rtx op1 = force_reg (SImode, operands[1]);
+  rtx sat = force_reg (SImode, GEN_INT (GET_MODE_MASK (mode)));
+  rtx dst;
+
+  emit_insn (gen_cmpsi_1 (op1, sat));
+
+  if (TARGET_CMOVE)
+{
+  rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+const0_rtx);
+
+  dst = force_reg (mode, operands[0]);
+  emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
+ gen_lowpart (SImode, op1),
+ gen_lowpart (SImode, sat)));
+}
+  else
+{
+  rtx msk = gen_reg_rtx (mode);
+
+  emit_insn (gen_x86_movcc_0_m1_neg (msk));
+  dst = expand_simple_binop (mode, IOR,
+gen_lowpart (mode, op1), msk,
+operands[0], 1, OPTAB_WIDEN);
+}
+
+  if (!rtx_equal_p (dst, operands[0]))
+emit_move_insn (operands[0], dst);
+  DONE;
+})
+
+(define_expand "ustrunchiqi2"
+  [(set (match_operand:QI 0 "register_operand")
+   (us_truncate:HI (match_operand:HI 1 "nonimmediate_operand")))]
+  ""
+{
+  rtx op1 = force_reg (HImode, operands[1]);
+  rtx sat = force_reg (HImode, GEN_INT (GET_MODE_MASK (QImode)));
+  rtx dst;
+
+  emit_insn (gen_cmphi_1 (op1, sat));
+
+  if (TARGET_CMOVE)
+{
+  rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+const0_rtx);
+
+  dst = force_reg (QImode, operands[0]);
+  emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
+ gen_lowpart (SImode, op1),
+ ge

[gcc/devel/omp/gcc-14] (2 commits) Merge commit '43b730b9134af60a8f1c5b107d625f7127ff23c5' int

2024-07-09 Thread Paul-Antoine Arras via Gcc-cvs
The branch 'devel/omp/gcc-14' was updated to point to:

 0fb5d87ab56e... Merge commit '43b730b9134af60a8f1c5b107d625f7127ff23c5' int

It previously pointed to:

 735bbbfc6eaf... Fix scan dumps in readonly-1.c

Diff:

Summary of changes (added commits):
---

  0fb5d87... Merge commit '43b730b9134af60a8f1c5b107d625f7127ff23c5' int
  43b730b... Bump BASE-VER (*)

(*) This commit already exists in another branch.
Because the reference `refs/heads/devel/omp/gcc-14' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc/devel/omp/gcc-14] Merge commit '43b730b9134af60a8f1c5b107d625f7127ff23c5' into devel/omp/gcc-14

2024-07-09 Thread Paul-Antoine Arras via Gcc-cvs
https://gcc.gnu.org/g:0fb5d87ab56e1a4798b87294708791cc12de2546

commit 0fb5d87ab56e1a4798b87294708791cc12de2546
Merge: 735bbbfc6eaf 43b730b9134a
Author: Paul-Antoine Arras 
Date:   Tue Jul 9 18:17:59 2024 +0200

Merge commit '43b730b9134af60a8f1c5b107d625f7127ff23c5' into 
devel/omp/gcc-14

Diff:

 gcc/BASE-VER | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


[gcc] Deleted branch 'mikael/heads/cleanup_trans_preloop_setup_v01' in namespace 'refs/users'

2024-07-09 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/cleanup_trans_preloop_setup_v01' in namespace 
'refs/users' was deleted.
It previously pointed to:

 cfcb4489798c... fortran: Move definition of variable closer to its usages

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  cfcb448... fortran: Move definition of variable closer to its usages


[gcc] Created branch 'mikael/heads/factor_back_minmaxloc_v01' in namespace 'refs/users'

2024-07-09 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/factor_back_minmaxloc_v01' was created in namespace 
'refs/users' pointing to:

 a04c0d344553... Sauvegarde tests


[gcc(refs/users/mikael/heads/factor_back_minmaxloc_v01)] Sauvegarde tests

2024-07-09 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:a04c0d344553cc0b405977b3b9eac4ca504a299d

commit a04c0d344553cc0b405977b3b9eac4ca504a299d
Author: Mikael Morin 
Date:   Mon Jul 8 22:19:43 2024 +0200

Sauvegarde tests

Correction 11 18

Correction tests masque scalaire .false.

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  10 ++
 gcc/testsuite/gfortran.dg/maxloc_5.f90 | 257 +
 gcc/testsuite/gfortran.dg/minloc_5.f90 | 257 +
 3 files changed, 524 insertions(+)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 5ea10e840609..cadbd1774520 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5325,6 +5325,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_actual_arglist *actual;
   gfc_ss *arrayss;
   gfc_ss *maskss;
+  gfc_ss *backss;
   gfc_se arrayse;
   gfc_se maskse;
   gfc_expr *arrayexpr;
@@ -5390,6 +5391,11 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 && maskexpr->symtree->n.sym->attr.dummy
 && maskexpr->symtree->n.sym->attr.optional;
   backexpr = actual->next->next->expr;
+  if (backexpr)
+backss = gfc_get_scalar_ss (gfc_ss_terminator, backexpr);
+  else
+backss = nullptr;
+
   nonempty = NULL;
   if (maskexpr && maskexpr->rank != 0)
 {
@@ -5449,6 +5455,9 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   if (maskss)
 gfc_add_ss_to_loop (&loop, maskss);
 
+  if (backss)
+gfc_add_ss_to_loop (&loop, backss);
+
   gfc_add_ss_to_loop (&loop, arrayss);
 
   /* Initialize the loop.  */
@@ -5535,6 +5544,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_add_block_to_block (&block, &arrayse.pre);
 
   gfc_init_se (&backse, NULL);
+  backse.ss = backss;
   gfc_conv_expr_val (&backse, backexpr);
   gfc_add_block_to_block (&block, &backse.pre);
 
diff --git a/gcc/testsuite/gfortran.dg/maxloc_5.f90 
b/gcc/testsuite/gfortran.dg/maxloc_5.f90
new file mode 100644
index ..5d722450c8fb
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/maxloc_5.f90
@@ -0,0 +1,257 @@
+! { dg-do run }
+!
+! Check that the evaluation of MAXLOC's BACK argument is made only once
+! before the scalarisation loops.
+
+program p
+  implicit none
+  integer, parameter :: data10(*) = (/ 7, 4, 7, 6, 6, 4, 6, 3, 9, 8 /)
+  logical, parameter :: mask10(*) = (/ .false., .true., .false., &
+   .false., .true., .true.,  &
+   .true. , .true., .false., &
+   .false. /)
+  integer :: calls_count = 0
+  call check_int_const_shape
+  call check_int_const_shape_scalar_mask
+  call check_int_const_shape_array_mask
+  call check_int_const_shape_optional_mask_present
+  call check_int_const_shape_optional_mask_absent
+  call check_int_const_shape_empty
+  call check_int_alloc
+  call check_int_alloc_scalar_mask
+  call check_int_alloc_array_mask
+  call check_int_alloc_empty
+  call check_real_const_shape
+  call check_real_const_shape_scalar_mask
+  call check_real_const_shape_array_mask
+  call check_real_const_shape_optional_mask_present
+  call check_real_const_shape_optional_mask_absent
+  call check_real_const_shape_empty
+  call check_real_alloc
+  call check_real_alloc_scalar_mask
+  call check_real_alloc_array_mask
+  call check_real_alloc_empty
+contains
+  function get_scalar_false()
+logical :: get_scalar_false
+calls_count = calls_count + 1
+get_scalar_false = .false.
+  end function
+  subroutine check_int_const_shape()
+integer :: a(10)
+logical :: m(10)
+integer :: r
+a = data10
+calls_count = 0
+r = maxloc(a, dim = 1, back = get_scalar_false())
+if (calls_count /= 1) stop 11
+  end subroutine
+  subroutine check_int_const_shape_scalar_mask()
+integer :: a(10)
+integer :: r
+a = data10
+calls_count = 0
+! We only check the case of a .true. mask.
+! If the mask is .false., the back argument is not necessary to deduce
+! the value returned by maxloc, so the compiler is free to elide it,
+! and the value of calls_count is undefined in that case.
+r = maxloc(a, dim = 1, mask = .true., back = get_scalar_false())
+if (calls_count /= 1) stop 18
+  end subroutine
+  subroutine check_int_const_shape_array_mask()
+integer :: a(10)
+logical :: m(10)
+integer :: r
+a = data10
+m = mask10
+calls_count = 0
+r = maxloc(a, dim = 1, mask = m, back = get_scalar_false())
+if (calls_count /= 1) stop 32
+  end subroutine
+  subroutine call_maxloc_int(r, a, m, b)
+integer :: a(:)
+logical, optional :: m(:)
+logical, optional :: b
+integer :: r
+r = maxloc(a, dim = 1, mask = m, back = b)
+  end subroutine
+  subroutine check_int_const_shape_optional_mask_present()
+integer :: a(10)
+logical :: m(10)
+integ

[gcc r15-1915] rs6000, Remove __builtin_vsx_cmple* builtins

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:7ed9a4593baeaaef6850c7ebcda677dabf229df1

commit r15-1915-g7ed9a4593baeaaef6850c7ebcda677dabf229df1
Author: Carl Love 
Date:   Tue Jul 9 13:12:39 2024 -0400

rs6000, Remove __builtin_vsx_cmple* builtins

The built-ins __builtin_vsx_cmple_u16qi, __builtin_vsx_cmple_u2di,
__builtin_vsx_cmple_u4si and __builtin_vsx_cmple_u8hi should take
unsigned arguments and return an unsigned result.  The current definitions
take signed arguments and return signed results which is incorrect.

The signed and unsigned versions of __builtin_vsx_cmple* are not
documented in extend.texi.  Also there are no test cases for the
built-ins.

Users can use the existing vec_cmple as PVIPR defines instead of
__builtin_vsx_cmple_u16qi, __builtin_vsx_cmple_u2di,
__builtin_vsx_cmple_u4si and __builtin_vsx_cmple_u8hi,
__builtin_vsx_cmple_16qi, __builtin_vsx_cmple_2di,
__builtin_vsx_cmple_4si and __builtin_vsx_cmple_8hi,
__builtin_altivec_cmple_1ti, __builtin_altivec_cmple_u1ti.

Hence these built-ins are redundant and are removed by this patch.

gcc/ChangeLog:
* config/rs6000/rs6000-builtin.cc (RS6000_BIF_CMPLE_16QI,
RS6000_BIF_CMPLE_U16QI, RS6000_BIF_CMPLE_8HI,
RS6000_BIF_CMPLE_U8HI, RS6000_BIF_CMPLE_4SI, RS6000_BIF_CMPLE_U4SI,
RS6000_BIF_CMPLE_2DI, RS6000_BIF_CMPLE_U2DI, RS6000_BIF_CMPLE_1TI,
RS6000_BIF_CMPLE_U1TI): Remove case statements.
* config/rs6000/rs6000-builtins.def (__builtin_vsx_cmple_16qi,
__builtin_vsx_cmple_2di, __builtin_vsx_cmple_4si,
__builtin_vsx_cmple_8hi, __builtin_vsx_cmple_u16qi,
__builtin_vsx_cmple_u2di, __builtin_vsx_cmple_u4si,
__builtin_vsx_cmple_u8hi): Remove buit-in definitions.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc   | 13 -
 gcc/config/rs6000/rs6000-builtins.def | 30 --
 2 files changed, 43 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index bb9da68edc73..e68b94f3d52c 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -2030,19 +2030,6 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   fold_compare_helper (gsi, GT_EXPR, stmt);
   return true;
 
-case RS6000_BIF_CMPLE_16QI:
-case RS6000_BIF_CMPLE_U16QI:
-case RS6000_BIF_CMPLE_8HI:
-case RS6000_BIF_CMPLE_U8HI:
-case RS6000_BIF_CMPLE_4SI:
-case RS6000_BIF_CMPLE_U4SI:
-case RS6000_BIF_CMPLE_2DI:
-case RS6000_BIF_CMPLE_U2DI:
-case RS6000_BIF_CMPLE_1TI:
-case RS6000_BIF_CMPLE_U1TI:
-  fold_compare_helper (gsi, LE_EXPR, stmt);
-  return true;
-
 /* flavors of vec_splat_[us]{8,16,32}.  */
 case RS6000_BIF_VSPLTISB:
 case RS6000_BIF_VSPLTISH:
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 736890fe6cb8..73064c9636e2 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1337,30 +1337,6 @@
   const vss __builtin_vsx_cmpge_u8hi (vus, vus);
 CMPGE_U8HI vector_nltuv8hi {}
 
-  const vsc __builtin_vsx_cmple_16qi (vsc, vsc);
-CMPLE_16QI vector_ngtv16qi {}
-
-  const vsll __builtin_vsx_cmple_2di (vsll, vsll);
-CMPLE_2DI vector_ngtv2di {}
-
-  const vsi __builtin_vsx_cmple_4si (vsi, vsi);
-CMPLE_4SI vector_ngtv4si {}
-
-  const vss __builtin_vsx_cmple_8hi (vss, vss);
-CMPLE_8HI vector_ngtv8hi {}
-
-  const vsc __builtin_vsx_cmple_u16qi (vsc, vsc);
-CMPLE_U16QI vector_ngtuv16qi {}
-
-  const vsll __builtin_vsx_cmple_u2di (vsll, vsll);
-CMPLE_U2DI vector_ngtuv2di {}
-
-  const vsi __builtin_vsx_cmple_u4si (vsi, vsi);
-CMPLE_U4SI vector_ngtuv4si {}
-
-  const vss __builtin_vsx_cmple_u8hi (vss, vss);
-CMPLE_U8HI vector_ngtuv8hi {}
-
   const vd __builtin_vsx_concat_2df (double, double);
 CONCAT_2DF vsx_concat_v2df {}
 
@@ -3117,12 +3093,6 @@
   const vbq __builtin_altivec_cmpge_u1ti (vuq, vuq);
 CMPGE_U1TI vector_nltuv1ti {}
 
-  const vbq __builtin_altivec_cmple_1ti (vsq, vsq);
-CMPLE_1TI vector_ngtv1ti {}
-
-  const vbq __builtin_altivec_cmple_u1ti (vuq, vuq);
-CMPLE_U1TI vector_ngtuv1ti {}
-
   const unsigned long long __builtin_altivec_cntmbb (vuc, const int<1>);
 VCNTMBB vec_cntmb_v16qi {}


[gcc r15-1916] rs6000, Remove __builtin_vsx_xvcv{sp{sx, u}ws, dpuxds_uns}

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:224cc560a6ac19c9454038efe6230096b46f4806

commit r15-1916-g224cc560a6ac19c9454038efe6230096b46f4806
Author: Carl Love 
Date:   Tue Jul 9 13:17:16 2024 -0400

rs6000, Remove __builtin_vsx_xvcv{sp{sx,u}ws,dpuxds_uns}

The built-in __builtin_vsx_xvcvspsxws is covered by built-in vec_signed
built-in that is documented in the PVIPR.  The __builtin_vsx_xvcvspsxws
built-in is not documented and there are no test cases for it.

The built-in __builtin_vsx_xvcvdpuxds_uns is redundant as it is covered by
vec_unsigned, remove.

The __builtin_vsx_xvcvspuxws is redundant as it is covered by
vec_unsigned, remove.

The built-in __builtin_vsx_xvcvdpsxws is redundant as it is covered by
vec_signed{e,o}, remove.

The built-in __builtin_vsx_xvcvdpuxws is redundant as it is covered by
vec_unsigned{e,o}, remove.

This patch removes the redundant built-ins.

gcc/ChangeLog:
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xvcvspsxws,
__builtin_vsx_xvcvdpuxds_uns, __builtin_vsx_xvcvspuxws,
__builtin_vsx_xvcvdpsxws, __builtin_vsx_xvcvdpuxws): Remove
built-in definitions.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def | 15 ---
 1 file changed, 15 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 73064c9636e2..465a43038fd2 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1688,36 +1688,21 @@
   const vsll __builtin_vsx_xvcvdpsxds_scale (vd, const int);
 XVCVDPSXDS_SCALE vsx_xvcvdpsxds_scale {}
 
-  const vsi __builtin_vsx_xvcvdpsxws (vd);
-XVCVDPSXWS vsx_xvcvdpsxws {}
-
   const vsll __builtin_vsx_xvcvdpuxds (vd);
 XVCVDPUXDS vsx_fixuns_truncv2dfv2di2 {}
 
   const vsll __builtin_vsx_xvcvdpuxds_scale (vd, const int);
 XVCVDPUXDS_SCALE vsx_xvcvdpuxds_scale {}
 
-  const vull __builtin_vsx_xvcvdpuxds_uns (vd);
-XVCVDPUXDS_UNS vsx_fixuns_truncv2dfv2di2 {}
-
-  const vsi __builtin_vsx_xvcvdpuxws (vd);
-XVCVDPUXWS vsx_xvcvdpuxws {}
-
   const vd __builtin_vsx_xvcvspdp (vf);
 XVCVSPDP vsx_xvcvspdp {}
 
   const vsll __builtin_vsx_xvcvspsxds (vf);
 XVCVSPSXDS vsx_xvcvspsxds {}
 
-  const vsi __builtin_vsx_xvcvspsxws (vf);
-XVCVSPSXWS vsx_fix_truncv4sfv4si2 {}
-
   const vsll __builtin_vsx_xvcvspuxds (vf);
 XVCVSPUXDS vsx_xvcvspuxds {}
 
-  const vsi __builtin_vsx_xvcvspuxws (vf);
-XVCVSPUXWS vsx_fixuns_truncv4sfv4si2 {}
-
   const vd __builtin_vsx_xvcvsxddp (vsll);
 XVCVSXDDP vsx_floatv2div2df2 {}


[gcc r15-1917] rs6000, fix error in unsigned vector float to unsigned int built-in definitions

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:6031e34af130d114a7a3de0108fdb39360e8b1b3

commit r15-1917-g6031e34af130d114a7a3de0108fdb39360e8b1b3
Author: Carl Love 
Date:   Tue Jul 9 13:17:28 2024 -0400

rs6000, fix error in unsigned vector float to unsigned int built-in 
definitions

The built-in __builtin_vsx_vunsigned_v2df is supposed to take a vector of
doubles and return a vector of unsigned long long ints.  Similarly
__builtin_vsx_vunsigned_v4sf takes a vector of floats an is supposed to
return a vector of unsinged ints.  The definitions are using the signed
version of the instructions not the unsigned version of the instruction.
The results should also be unsigned.  The built-ins are used by the
overloaded vec_unsigned built-in which has an unsigned result.

Similarly the built-ins __builtin_vsx_vunsignede_v2df and
__builtin_vsx_vunsignedo_v2df are supposed to return an unsigned result.
If the floating point argument is negative, the unsigned result is zero.
The built-ins are used in the overloaded built-in vec_unsignede and
vec_unsignedo respectively.

Add a test cases for a negative floating point arguments for each of the
above built-ins.

gcc/ChangeLog:
* config/rs6000/rs6000-builtins.def (__builtin_vsx_vunsigned_v2df,
__builtin_vsx_vunsigned_v4sf, __builtin_vsx_vunsignede_v2df,
__builtin_vsx_vunsignedo_v2df): Change the result type to unsigned.

gcc/testsuite/ChangeLog:
* gcc.target/powerpc/builtins-3-runnable.c: Add tests for
vec_unsignede and vec_unsignedo with negative arguments.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def  | 12 -
 .../gcc.target/powerpc/builtins-3-runnable.c   | 30 +++---
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 465a43038fd2..f0aee29f1a1d 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1580,16 +1580,16 @@
   const vsi __builtin_vsx_vsignedo_v2df (vd);
 VEC_VSIGNEDO_V2DF vsignedo_v2df {}
 
-  const vsll __builtin_vsx_vunsigned_v2df (vd);
-VEC_VUNSIGNED_V2DF vsx_xvcvdpsxds {}
+  const vull __builtin_vsx_vunsigned_v2df (vd);
+VEC_VUNSIGNED_V2DF vsx_xvcvdpuxds {}
 
-  const vsi __builtin_vsx_vunsigned_v4sf (vf);
-VEC_VUNSIGNED_V4SF vsx_xvcvspsxws {}
+  const vui __builtin_vsx_vunsigned_v4sf (vf);
+VEC_VUNSIGNED_V4SF vsx_xvcvspuxws {}
 
-  const vsi __builtin_vsx_vunsignede_v2df (vd);
+  const vui __builtin_vsx_vunsignede_v2df (vd);
 VEC_VUNSIGNEDE_V2DF vunsignede_v2df {}
 
-  const vsi __builtin_vsx_vunsignedo_v2df (vd);
+  const vui __builtin_vsx_vunsignedo_v2df (vd);
 VEC_VUNSIGNEDO_V2DF vunsignedo_v2df {}
 
   const vf __builtin_vsx_xscvdpsp (double);
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3-runnable.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-3-runnable.c
index 0231a1fd086b..5dcdfbee7916 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-3-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-3-runnable.c
@@ -313,6 +313,14 @@ int main()
test_unsigned_int_result (ALL, vec_uns_int_result,
  vec_uns_int_expected);
 
+   /* Convert single precision float to  unsigned int.  Negative
+  arguments.  */
+   vec_flt0 = (vector float){-14.930, -834.49, -3.3, -5.4};
+   vec_uns_int_expected = (vector unsigned int){0, 0, 0, 0};
+   vec_uns_int_result = vec_unsigned (vec_flt0);
+   test_unsigned_int_result (ALL, vec_uns_int_result,
+ vec_uns_int_expected);
+
/* Convert double precision float to long long unsigned int */
vec_dble0 = (vector double){124.930, 8134.49};
vec_ll_uns_int_expected = (vector long long unsigned int){124, 8134};
@@ -320,10 +328,18 @@ int main()
test_ll_unsigned_int_result (vec_ll_uns_int_result,
 vec_ll_uns_int_expected);
 
+   /* Convert double precision float to long long unsigned int. Negative
+  arguments.  */
+   vec_dble0 = (vector double){-24.93, -134.9};
+   vec_ll_uns_int_expected = (vector long long unsigned int){0, 0};
+   vec_ll_uns_int_result = vec_unsigned (vec_dble0);
+   test_ll_unsigned_int_result (vec_ll_uns_int_result,
+vec_ll_uns_int_expected);
+
/* Convert double precision vector float to vector unsigned int,
-  even words */
-   vec_dble0 = (vector double){3124.930, 8234.49};
-   vec_uns_int_expected = (vector unsigned int){3124, 0, 8234, 0};
+  even words.  Negative arguments */
+   vec_dble0 = (vector double){-124.930, -234.49};
+   vec_uns_int_expected = (vector unsigned int){0, 0, 0, 0};
vec_uns_int_result = vec_unsignede (vec_dble0);
test_unsigned_int_result (EVEN, vec_uns_int

[gcc r15-1918] rs6000, extend the current vec_{un, }signed{e, o} built-ins

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:b620845fff7d38bca3b79ca9f5a5d049eba9e7f8

commit r15-1918-gb620845fff7d38bca3b79ca9f5a5d049eba9e7f8
Author: Carl Love 
Date:   Tue Jul 9 13:17:44 2024 -0400

rs6000, extend the current vec_{un,}signed{e,o} built-ins

The built-ins __builtin_vsx_xvcvspsxds and __builtin_vsx_xvcvspuxds
convert a vector of floats to a vector of signed/unsigned long long ints.
Extend the existing vec_{un,}signed{e,o} built-ins to handle the argument
vector of floats to return a vector of even/odd signed/unsigned integers.

The define expands vsignede_v4sf, vsignedo_v4sf, vunsignede_v4sf,
vunsignedo_v4sf are added to support the new vec_{un,}signed{e,o}
built-ins.

The built-ins __builtin_vsx_xvcvspsxds and __builtin_vsx_xvcvspuxds are
now for internal use only. They are not documented and they do not
have test cases.

Add testcases and update documentation.

gcc/ChangeLog:
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xvcvspsxds,
__builtin_vsx_xvcvspuxds): Rename to __builtin_vsignede_v4sf,
__builtin_vunsignede_v4sf respectively.
(XVCVSPSXDS, XVCVSPUXDS): Rename to VEC_VSIGNEDE_V4SF,
VEC_VUNSIGNEDE_V4SF respectively.
(__builtin_vsignedo_v4sf, __builtin_vunsignedo_v4sf): New
built-in definitions.
* config/rs6000/rs6000-overload.def (vec_signede, vec_signedo,
vec_unsignede, vec_unsignedo): Add new overloaded specifications.
* config/rs6000/vsx.md (vsignede_v4sf, vsignedo_v4sf,
vunsignede_v4sf, vunsignedo_v4sf): New define_expands.
* doc/extend.texi (vec_signedo, vec_signede, vec_unsignedo,
vec_unsignede): Add documentation for new overloaded built-ins to
convert vector float to vector {un,}signed long long.

gcc/testsuite/ChangeLog:
* gcc.target/powerpc/builtins-3-runnable.c
(test_unsigned_int_result, test_ll_unsigned_int_result): Add
new argument.
(vec_signede, vec_signedo, vec_unsignede, vec_unsignedo): New
tests for the overloaded built-ins.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def  | 14 ++--
 gcc/config/rs6000/rs6000-overload.def  |  8 +++
 gcc/config/rs6000/vsx.md   | 84 ++
 gcc/doc/extend.texi| 10 +++
 .../gcc.target/powerpc/builtins-3-runnable.c   | 49 +++--
 5 files changed, 154 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f0aee29f1a1d..5f353f5faeb2 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1697,11 +1697,17 @@
   const vd __builtin_vsx_xvcvspdp (vf);
 XVCVSPDP vsx_xvcvspdp {}
 
-  const vsll __builtin_vsx_xvcvspsxds (vf);
-XVCVSPSXDS vsx_xvcvspsxds {}
+  const vsll __builtin_vsignede_v4sf (vf);
+VEC_VSIGNEDE_V4SF vsignede_v4sf {}
 
-  const vsll __builtin_vsx_xvcvspuxds (vf);
-XVCVSPUXDS vsx_xvcvspuxds {}
+  const vsll __builtin_vsignedo_v4sf (vf);
+VEC_VSIGNEDO_V4SF vsignedo_v4sf {}
+
+  const vull __builtin_vunsignede_v4sf (vf);
+VEC_VUNSIGNEDE_V4SF vunsignede_v4sf {}
+
+  const vull __builtin_vunsignedo_v4sf (vf);
+VEC_VUNSIGNEDO_V4SF vunsignedo_v4sf {}
 
   const vd __builtin_vsx_xvcvsxddp (vsll);
 XVCVSXDDP vsx_floatv2div2df2 {}
diff --git a/gcc/config/rs6000/rs6000-overload.def 
b/gcc/config/rs6000/rs6000-overload.def
index 84bd9ae6554a..4d857bb1af3e 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -3307,10 +3307,14 @@
 [VEC_SIGNEDE, vec_signede, __builtin_vec_vsignede]
   vsi __builtin_vec_vsignede (vd);
 VEC_VSIGNEDE_V2DF
+  vsll __builtin_vec_vsignede (vf);
+VEC_VSIGNEDE_V4SF
 
 [VEC_SIGNEDO, vec_signedo, __builtin_vec_vsignedo]
   vsi __builtin_vec_vsignedo (vd);
 VEC_VSIGNEDO_V2DF
+  vsll __builtin_vec_vsignedo (vf);
+VEC_VSIGNEDO_V4SF
 
 [VEC_SIGNEXTI, vec_signexti, __builtin_vec_signexti]
   vsi __builtin_vec_signexti (vsc);
@@ -4433,10 +4437,14 @@
 [VEC_UNSIGNEDE, vec_unsignede, __builtin_vec_vunsignede]
   vui __builtin_vec_vunsignede (vd);
 VEC_VUNSIGNEDE_V2DF
+  vull __builtin_vec_vunsignede (vf);
+VEC_VUNSIGNEDE_V4SF
 
 [VEC_UNSIGNEDO, vec_unsignedo, __builtin_vec_vunsignedo]
   vui __builtin_vec_vunsignedo (vd);
 VEC_VUNSIGNEDO_V2DF
+  vull __builtin_vec_vunsignedo (vf);
+VEC_VUNSIGNEDO_V4SF
 
 [VEC_VEE, vec_extract_exp, __builtin_vec_extract_exp]
   vui __builtin_vec_extract_exp (vf);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 23ce5c740510..8001d1778b89 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -2704,6 +2704,90 @@
   DONE;
 })
 
+;; Convert float vector even elements to signed long long vector
+(define_expand "vsignede_v4sf"
+  [(match_operand:V2D

[gcc r15-1919] rs6000, Remove redundant vector float/double type conversions

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:fd9fdb33ae252ec34cc33675433eb56637905257

commit r15-1919-gfd9fdb33ae252ec34cc33675433eb56637905257
Author: Carl Love 
Date:   Tue Jul 9 13:29:31 2024 -0400

rs6000, Remove redundant vector float/double type conversions

The following built-ins are redundant as they are covered by another
overloaded built-in.

  __builtin_vsx_xvcvspdp covered by vec_double{e,o}
  __builtin_vsx_xvcvdpsp covered by vec_float{e,o}
  __builtin_vsx_xvcvsxwdp covered by vec_double{e,o}
  __builtin_vsx_xvcvuxddp_uns covered by vec_double

Remove the redundant built-ins. They are not documented nor do they have
test cases.

gcc/ChangeLog:
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xvcvspdp,
__builtin_vsx_xvcvdpsp, __builtin_vsx_xvcvsxwdp,
__builtin_vsx_xvcvuxddp_uns): Remove.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def | 12 
 1 file changed, 12 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 5f353f5faeb2..cd629c654986 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1679,9 +1679,6 @@
   const signed int __builtin_vsx_xvcmpgtsp_p (signed int, vf, vf);
 XVCMPGTSP_P vector_gt_v4sf_p {pred}
 
-  const vf __builtin_vsx_xvcvdpsp (vd);
-XVCVDPSP vsx_xvcvdpsp {}
-
   const vsll __builtin_vsx_xvcvdpsxds (vd);
 XVCVDPSXDS vsx_fix_truncv2dfv2di2 {}
 
@@ -1694,9 +1691,6 @@
   const vsll __builtin_vsx_xvcvdpuxds_scale (vd, const int);
 XVCVDPUXDS_SCALE vsx_xvcvdpuxds_scale {}
 
-  const vd __builtin_vsx_xvcvspdp (vf);
-XVCVSPDP vsx_xvcvspdp {}
-
   const vsll __builtin_vsignede_v4sf (vf);
 VEC_VSIGNEDE_V4SF vsignede_v4sf {}
 
@@ -1718,9 +1712,6 @@
   const vf __builtin_vsx_xvcvsxdsp (vsll);
 XVCVSXDSP vsx_xvcvsxdsp {}
 
-  const vd __builtin_vsx_xvcvsxwdp (vsi);
-XVCVSXWDP vsx_xvcvsxwdp {}
-
   const vf __builtin_vsx_xvcvsxwsp (vsi);
 XVCVSXWSP vsx_floatv4siv4sf2 {}
 
@@ -1730,9 +1721,6 @@
   const vd __builtin_vsx_xvcvuxddp_scale (vsll, const int<5>);
 XVCVUXDDP_SCALE vsx_xvcvuxddp_scale {}
 
-  const vd __builtin_vsx_xvcvuxddp_uns (vull);
-XVCVUXDDP_UNS vsx_floatunsv2div2df2 {}
-
   const vf __builtin_vsx_xvcvuxdsp (vull);
 XVCVUXDSP vsx_xvcvuxdsp {}


[gcc r15-1920] rs6000, remove duplicated built-ins of vecmergl and vec_mergeh

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:8d6326eeb773076643d9babf55f7cca19968313e

commit r15-1920-g8d6326eeb773076643d9babf55f7cca19968313e
Author: Carl Love 
Date:   Tue Jul 9 13:31:12 2024 -0400

rs6000, remove duplicated built-ins of vecmergl and vec_mergeh

The following undocumented built-ins are same as existing documented
overloaded builtins.

  const vf __builtin_vsx_xxmrghw (vf, vf);
same as  vf __builtin_vec_mergeh (vf, vf); (overloaded vec_mergeh)

  const vsi __builtin_vsx_xxmrghw_4si (vsi, vsi);
same as vsi __builtin_vec_mergeh (vsi, vsi);   (overloaded vec_mergeh)

  const vf __builtin_vsx_xxmrglw (vf, vf);
same as vf __builtin_vec_mergel (vf, vf);  (overloaded vec_mergel)

  const vsi __builtin_vsx_xxmrglw_4si (vsi, vsi);
same as vsi __builtin_vec_mergel (vsi, vsi);   (overloaded vec_mergel)

This patch removes the duplicate built-in definitions so only the
documented built-ins will be available for use.  The case statements in
rs6000_gimple_fold_builtin are removed as they are no longer needed.  The
patch removes the now unused define_expands for vsx_xxmrghw_ and
vsx_xxmrglw_.

gcc/ChangeLog:
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xxmrghw,
__builtin_vsx_xxmrghw_4si, __builtin_vsx_xxmrglw,
__builtin_vsx_xxmrglw_4si, __builtin_vsx_xxsel_16qi): Remove
built-in definition.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin):
remove case entries RS6000_BIF_XXMRGLW_4SI,
RS6000_BIF_XXMRGLW_4SF, RS6000_BIF_XXMRGHW_4SI,
RS6000_BIF_XXMRGHW_4SF.
* config/rs6000/vsx.md (vsx_xxmrghw_, vsx_xxmrglw_):
Remove unused define_expands.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc   |  4 
 gcc/config/rs6000/rs6000-builtins.def | 12 --
 gcc/config/rs6000/vsx.md  | 45 ---
 3 files changed, 61 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index e68b94f3d52c..646e740774eb 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -2100,20 +2100,16 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 /* vec_mergel (integrals).  */
 case RS6000_BIF_VMRGLH:
 case RS6000_BIF_VMRGLW:
-case RS6000_BIF_XXMRGLW_4SI:
 case RS6000_BIF_VMRGLB:
 case RS6000_BIF_VEC_MERGEL_V2DI:
-case RS6000_BIF_XXMRGLW_4SF:
 case RS6000_BIF_VEC_MERGEL_V2DF:
   fold_mergehl_helper (gsi, stmt, 1);
   return true;
 /* vec_mergeh (integrals).  */
 case RS6000_BIF_VMRGHH:
 case RS6000_BIF_VMRGHW:
-case RS6000_BIF_XXMRGHW_4SI:
 case RS6000_BIF_VMRGHB:
 case RS6000_BIF_VEC_MERGEH_V2DI:
-case RS6000_BIF_XXMRGHW_4SF:
 case RS6000_BIF_VEC_MERGEH_V2DF:
   fold_mergehl_helper (gsi, stmt, 0);
   return true;
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index cd629c654986..e89319badd0e 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1880,18 +1880,6 @@
   const signed int __builtin_vsx_xvtsqrtsp_fg (vf);
 XVTSQRTSP_FG vsx_tsqrtv4sf2_fg {}
 
-  const vf __builtin_vsx_xxmrghw (vf, vf);
-XXMRGHW_4SF vsx_xxmrghw_v4sf {}
-
-  const vsi __builtin_vsx_xxmrghw_4si (vsi, vsi);
-XXMRGHW_4SI vsx_xxmrghw_v4si {}
-
-  const vf __builtin_vsx_xxmrglw (vf, vf);
-XXMRGLW_4SF vsx_xxmrglw_v4sf {}
-
-  const vsi __builtin_vsx_xxmrglw_4si (vsi, vsi);
-XXMRGLW_4SI vsx_xxmrglw_v4si {}
-
   const vsc __builtin_vsx_xxpermdi_16qi (vsc, vsc, const int<2>);
 XXPERMDI_16QI vsx_xxpermdi_v16qi {}
 
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 8001d1778b89..7892477fa922 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4896,51 +4896,6 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; V4SF/V4SI interleave
-(define_expand "vsx_xxmrghw_"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
-(vec_select:VSX_W
- (vec_concat:
-   (match_operand:VSX_W 1 "vsx_register_operand" "wa")
-   (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
- (parallel [(const_int 0) (const_int 4)
-(const_int 1) (const_int 5)])))]
-  "VECTOR_MEM_VSX_P (mode)"
-{
-  if (BYTES_BIG_ENDIAN)
-emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0],
- operands[1],
- operands[2]));
-  else
-emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0],
- operands[2],
- operands[1]));
-  DONE;
-}
-  [(set_attr "type" "vecperm")])
-
-(define_expand "vsx_xxmrglw_"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
-   (vec_select

[gcc r15-1921] rs6000, add overloaded vec_sel with int128 arguments

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:807bed024d03f73bc1cf47ac8f23da9307c88464

commit r15-1921-g807bed024d03f73bc1cf47ac8f23da9307c88464
Author: Carl Love 
Date:   Tue Jul 9 13:31:22 2024 -0400

rs6000, add overloaded vec_sel with int128 arguments

Extend the vec_sel built-in to take three signed/unsigned/bool int128
arguments and return a signed/unsigned/bool int128 result.

Extending the vec_sel built-in makes the existing buit-ins
__builtin_vsx_xxsel_1ti and __builtin_vsx_xxsel_1ti_uns obsolete.  The
patch removes these built-ins.

The patch adds documentation and test cases for the new overloaded
vec_sel built-ins.

gcc/ChangeLog:
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xxsel_1ti,
__builtin_vsx_xxsel_1ti_uns): Remove built-in definitions.
* config/rs6000/rs6000-overload.def (vec_sel): Add new
overloaded vector signed, unsigned and bool 128-bit definitions.
* doc/extend.texi (vec_sel): Add documentation for new instances
with signed, unsigned and bool 129-bit bool arguments.

gcc/testsuite/ChangeLog:
* gcc.target/powerpc/builtins-10-runnable.c: New runnable test
file.
* gcc.target/powerpc/builtins-10.c: New compile only test file.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def  |   6 -
 gcc/config/rs6000/rs6000-overload.def  |  12 ++
 gcc/doc/extend.texi|  20 ++
 .../gcc.target/powerpc/builtins-10-runnable.c  | 220 +
 gcc/testsuite/gcc.target/powerpc/builtins-10.c |  63 ++
 5 files changed, 315 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index e89319badd0e..c62df9e4b685 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1907,12 +1907,6 @@
   const vuc __builtin_vsx_xxsel_16qi_uns (vuc, vuc, vuc);
 XXSEL_16QI_UNS vector_select_v16qi_uns {}
 
-  const vsq __builtin_vsx_xxsel_1ti (vsq, vsq, vsq);
-XXSEL_1TI vector_select_v1ti {}
-
-  const vsq __builtin_vsx_xxsel_1ti_uns (vsq, vsq, vsq);
-XXSEL_1TI_UNS vector_select_v1ti_uns {}
-
   const vd __builtin_vsx_xxsel_2df (vd, vd, vd);
 XXSEL_2DF vector_select_v2df {}
 
diff --git a/gcc/config/rs6000/rs6000-overload.def 
b/gcc/config/rs6000/rs6000-overload.def
index 4d857bb1af3e..855b9aa73cce 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -3266,6 +3266,18 @@
 VSEL_2DI_UNS  VSEL_2DI_BB
   vbll __builtin_vec_sel (vbll, vbll, vull);
 VSEL_2DI_UNS  VSEL_2DI_BU
+  vsq __builtin_vec_sel (vsq, vsq, vbq);
+VSEL_1TI  VSEL_1TI_B
+  vsq __builtin_vec_sel (vsq, vsq, vuq);
+VSEL_1TI  VSEL_1TI_U
+  vuq __builtin_vec_sel (vuq, vuq, vbq);
+VSEL_1TI_UNS  VSEL_1TI_UB
+  vuq __builtin_vec_sel (vuq, vuq, vuq);
+VSEL_1TI_UNS  VSEL_1TI_UU
+  vbq __builtin_vec_sel (vbq, vbq, vbq);
+VSEL_1TI_UNS  VSEL_1TI_BB
+  vbq __builtin_vec_sel (vbq, vbq, vuq);
+VSEL_1TI_UNS  VSEL_1TI_BU
   vf __builtin_vec_sel (vf, vf, vbi);
 VSEL_4SF  VSEL_4SF_B
   vf __builtin_vec_sel (vf, vf, vui);
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 98e392ae..6ab69840f3f7 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21444,6 +21444,26 @@ Additional built-in functions are available for the 
64-bit PowerPC
 family of processors, for efficient use of 128-bit floating point
 (@code{__float128}) values.
 
+Vector select
+
+@smallexample
+vector signed __int128 vec_sel (vector signed __int128,
+   vector signed __int128, vector bool __int128);
+vector signed __int128 vec_sel (vector signed __int128,
+   vector signed __int128, vector unsigned __int128);
+vector unsigned __int128 vec_sel (vector unsigned __int128,
+   vector unsigned __int128, vector bool __int128);
+vector unsigned __int128 vec_sel (vector unsigned __int128,
+   vector unsigned __int128, vector unsigned __int128);
+vector bool __int128 vec_sel (vector bool __int128,
+   vector bool __int128, vector bool __int128);
+vector bool __int128 vec_sel (vector bool __int128,
+   vector bool __int128, vector unsigned __int128);
+@end smallexample
+
+The instance is an extension of the exiting overloaded built-in @code{vec_sel}
+that is documented in the PVIPR.
+
 @node Basic PowerPC Built-in Functions Available on ISA 2.06
 @subsubsection Basic PowerPC Built-in Functions Available on ISA 2.06
 
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-10-runnable.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-10-runnable.c
new file mode 100644
index ..dede08358e13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-10-runnable.c
@@ -0,0 +1,220 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vmx_hw } */
+/* { dg-options "-maltivec -O2 " } */
+
+#include 
+

[gcc r15-1922] rs6000, remove the vec_xxsel built-ins, they are duplicates

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:52d56983d516975d945318b81e1d2600676bb780

commit r15-1922-g52d56983d516975d945318b81e1d2600676bb780
Author: Carl Love 
Date:   Tue Jul 9 13:31:34 2024 -0400

rs6000, remove the vec_xxsel built-ins, they are duplicates

The following undocumented built-ins are covered by the existing overloaded
vec_sel built-in definitions.

  const vsc __builtin_vsx_xxsel_16qi (vsc, vsc, vsc);
same as vsc __builtin_vec_sel (vsc, vsc, vuc);  (overloaded vec_sel)

  const vuc __builtin_vsx_xxsel_16qi_uns (vuc, vuc, vuc);
same as vuc __builtin_vec_sel (vuc, vuc, vuc);  (overloaded vec_sel)

  const vd __builtin_vsx_xxsel_2df (vd, vd, vd);
same as  vd __builtin_vec_sel (vd, vd, vull);   (overloaded vec_sel)

  const vsll __builtin_vsx_xxsel_2di (vsll, vsll, vsll);
same as vsll __builtin_vec_sel (vsll, vsll, vsll);  (overloaded vec_sel)

  const vull __builtin_vsx_xxsel_2di_uns (vull, vull, vull);
same as vull __builtin_vec_sel (vull, vull, vsll);  (overloaded vec_sel)

  const vf __builtin_vsx_xxsel_4sf (vf, vf, vf);
same as vf __builtin_vec_sel (vf, vf, vsi)  (overloaded vec_sel)

  const vsi __builtin_vsx_xxsel_4si (vsi, vsi, vsi);
same as vsi __builtin_vec_sel (vsi, vsi, vbi);  (overloaded vec_sel)

  const vui __builtin_vsx_xxsel_4si_uns (vui, vui, vui);
same as vui __builtin_vec_sel (vui, vui, vui);  (overloaded vec_sel)

  const vss __builtin_vsx_xxsel_8hi (vss, vss, vss);
same as vss __builtin_vec_sel (vss, vss, vbs);  (overloaded vec_sel)

  const vus __builtin_vsx_xxsel_8hi_uns (vus, vus, vus);
same as vus __builtin_vec_sel (vus, vus, vus);  (overloaded vec_sel)

This patch removed the duplicate built-in definitions so users will only
use the documented vec_sel built-in.  The __builtin_vsx_xxsel_[4si, 8hi,
16qi, 4sf, 2df] tests are also removed.

gcc/ChangeLog:
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xxsel_16qi,
__builtin_vsx_xxsel_16qi_uns, __builtin_vsx_xxsel_2df,
__builtin_vsx_xxsel_2di,__builtin_vsx_xxsel_2di_uns,
__builtin_vsx_xxsel_4sf,__builtin_vsx_xxsel_4si,
__builtin_vsx_xxsel_4si_uns,__builtin_vsx_xxsel_8hi,
__builtin_vsx_xxsel_8hi_uns): Removebuilt-in definitions.

gcc/testsuite/ChangeLog:
* gcc.target/powerpc/vsx-builtin-3.c (__builtin_vsx_xxsel_4si,
__builtin_vsx_xxsel_8hi, __builtin_vsx_xxsel_16qi,
__builtin_vsx_xxsel_4sf, __builtin_vsx_xxsel_2df,
__builtin_vsx_xxsel): Change built-in call to overloaded built-in
call vec_sel.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def| 30 
 gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c | 36 +---
 2 files changed, 19 insertions(+), 47 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index c62df9e4b685..80fdc959698b 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1901,36 +1901,6 @@
   const vss __builtin_vsx_xxpermdi_8hi (vss, vss, const int<2>);
 XXPERMDI_8HI vsx_xxpermdi_v8hi {}
 
-  const vsc __builtin_vsx_xxsel_16qi (vsc, vsc, vsc);
-XXSEL_16QI vector_select_v16qi {}
-
-  const vuc __builtin_vsx_xxsel_16qi_uns (vuc, vuc, vuc);
-XXSEL_16QI_UNS vector_select_v16qi_uns {}
-
-  const vd __builtin_vsx_xxsel_2df (vd, vd, vd);
-XXSEL_2DF vector_select_v2df {}
-
-  const vsll __builtin_vsx_xxsel_2di (vsll, vsll, vsll);
-XXSEL_2DI vector_select_v2di {}
-
-  const vull __builtin_vsx_xxsel_2di_uns (vull, vull, vull);
-XXSEL_2DI_UNS vector_select_v2di_uns {}
-
-  const vf __builtin_vsx_xxsel_4sf (vf, vf, vf);
-XXSEL_4SF vector_select_v4sf {}
-
-  const vsi __builtin_vsx_xxsel_4si (vsi, vsi, vsi);
-XXSEL_4SI vector_select_v4si {}
-
-  const vui __builtin_vsx_xxsel_4si_uns (vui, vui, vui);
-XXSEL_4SI_UNS vector_select_v4si_uns {}
-
-  const vss __builtin_vsx_xxsel_8hi (vss, vss, vss);
-XXSEL_8HI vector_select_v8hi {}
-
-  const vus __builtin_vsx_xxsel_8hi_uns (vus, vus, vus);
-XXSEL_8HI_UNS vector_select_v8hi_uns {}
-
   const vsc __builtin_vsx_xxsldwi_16qi (vsc, vsc, const int<2>);
 XXSLDWI_16QI vsx_xxsldwi_v16qi {}
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
index 79d9343d12a2..5e3051841fee 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
@@ -37,6 +37,8 @@
 /* { dg-final { scan-assembler "xvcvsxdsp" } } */
 /* { dg-final { scan-assembler "xvcvuxdsp" } } */
 
+#include 
+
 extern __vector int si[][4];
 extern __vector short ss[][4];
 extern __vector signed char sc[][4];
@@ -61,23 +63,23 @@ int do_sel(void)
 {
   int i = 0;
 
-  si[i][0] = __builtin_vsx_xxse

[gcc r15-1923] rs6000, remove __builtin_vsx_vperm_* built-ins

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:7121926b20c2f62ff49e769e26b994180b0a16af

commit r15-1923-g7121926b20c2f62ff49e769e26b994180b0a16af
Author: Carl Love 
Date:   Tue Jul 9 13:31:52 2024 -0400

rs6000, remove __builtin_vsx_vperm_* built-ins

The undocumented built-ins:
  __builtin_vsx_vperm_16qi_uns,
  __builtin_vsx_vperm_1ti,
  __builtin_vsx_vperm_1ti_uns,
  __builtin_vsx_vperm_2df,
  __builtin_vsx_vperm_2di,
  __builtin_vsx_vperm_2di_uns,
  __builtin_vsx_vperm_4sf,
  __builtin_vsx_vperm_4si,
  __builtin_vsx_vperm_4si_uns

are duplicats of the __builtin_altivec_* built-ins that are used by
the overloaded vec_perm built-in that is documented in the PVIPR.

gcc/ChangeLog:
* config/rs6000/rs6000-builtins.def (__builtin_vsx_vperm_16qi_uns,
__builtin_vsx_vperm_1ti, __builtin_vsx_vperm_1ti_uns,
__builtin_vsx_vperm_2df, __builtin_vsx_vperm_2di,
__builtin_vsx_vperm_2di_uns, __builtin_vsx_vperm_4sf,
__builtin_vsx_vperm_4si, __builtin_vsx_vperm_4si_uns): Remove
built-in definitions and comments.

gcc/testsuite/ChangeLog:
* gcc.target/powerpc/vsx-builtin-3.c (__builtin_vsx_vperm_16qi_uns,
__builtin_vsx_vperm_1ti, __builtin_vsx_vperm_1ti_uns,
__builtin_vsx_vperm_2df, __builtin_vsx_vperm_2di,
__builtin_vsx_vperm_2di_uns, __builtin_vsx_vperm_4sf,
__builtin_vsx_vperm_4si, __builtin_vsx_vperm_4si_uns,
__builtin_vsx_vperm): Change call to built-in to the  overloaded
built-in vec_perm.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def| 33 
 gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c | 22 
 2 files changed, 11 insertions(+), 44 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 80fdc959698b..4b1f5522cf55 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1529,39 +1529,6 @@
   const vf __builtin_vsx_uns_floato_v2di (vsll);
 UNS_FLOATO_V2DI unsfloatov2di {}
 
-; These are duplicates of __builtin_altivec_* counterparts, and are being
-; kept for backwards compatibility.  The reason for their existence is
-; unclear.  TODO: Consider deprecation/removal at some point.
-  const vsc __builtin_vsx_vperm_16qi (vsc, vsc, vuc);
-VPERM_16QI_X altivec_vperm_v16qi {}
-
-  const vuc __builtin_vsx_vperm_16qi_uns (vuc, vuc, vuc);
-VPERM_16QI_UNS_X altivec_vperm_v16qi_uns {}
-
-  const vsq __builtin_vsx_vperm_1ti (vsq, vsq, vsc);
-VPERM_1TI_X altivec_vperm_v1ti {}
-
-  const vsq __builtin_vsx_vperm_1ti_uns (vsq, vsq, vsc);
-VPERM_1TI_UNS_X altivec_vperm_v1ti_uns {}
-
-  const vd __builtin_vsx_vperm_2df (vd, vd, vuc);
-VPERM_2DF_X altivec_vperm_v2df {}
-
-  const vsll __builtin_vsx_vperm_2di (vsll, vsll, vuc);
-VPERM_2DI_X altivec_vperm_v2di {}
-
-  const vull __builtin_vsx_vperm_2di_uns (vull, vull, vuc);
-VPERM_2DI_UNS_X altivec_vperm_v2di_uns {}
-
-  const vf __builtin_vsx_vperm_4sf (vf, vf, vuc);
-VPERM_4SF_X altivec_vperm_v4sf {}
-
-  const vsi __builtin_vsx_vperm_4si (vsi, vsi, vuc);
-VPERM_4SI_X altivec_vperm_v4si {}
-
-  const vui __builtin_vsx_vperm_4si_uns (vui, vui, vuc);
-VPERM_4SI_UNS_X altivec_vperm_v4si_uns {}
-
   const vss __builtin_vsx_vperm_8hi (vss, vss, vuc);
 VPERM_8HI_X altivec_vperm_v8hi {}
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
index 5e3051841fee..60f91aad23c2 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
@@ -88,17 +88,17 @@ int do_perm(void)
 {
   int i = 0;
 
-  si[i][0] = __builtin_vsx_vperm_4si (si[i][1], si[i][2], uc[i][3]); i++;
-  ss[i][0] = __builtin_vsx_vperm_8hi (ss[i][1], ss[i][2], uc[i][3]); i++;
-  sc[i][0] = __builtin_vsx_vperm_16qi (sc[i][1], sc[i][2], uc[i][3]); i++;
-  f[i][0] = __builtin_vsx_vperm_4sf (f[i][1], f[i][2], uc[i][3]); i++;
-  d[i][0] = __builtin_vsx_vperm_2df (d[i][1], d[i][2], uc[i][3]); i++;
-
-  si[i][0] = __builtin_vsx_vperm (si[i][1], si[i][2], uc[i][3]); i++;
-  ss[i][0] = __builtin_vsx_vperm (ss[i][1], ss[i][2], uc[i][3]); i++;
-  sc[i][0] = __builtin_vsx_vperm (sc[i][1], sc[i][2], uc[i][3]); i++;
-  f[i][0] = __builtin_vsx_vperm (f[i][1], f[i][2], uc[i][3]); i++;
-  d[i][0] = __builtin_vsx_vperm (d[i][1], d[i][2], uc[i][3]); i++;
+  si[i][0] = vec_perm (si[i][1], si[i][2], uc[i][3]); i++;
+  ss[i][0] = vec_perm (ss[i][1], ss[i][2], uc[i][3]); i++;
+  sc[i][0] = vec_perm (sc[i][1], sc[i][2], uc[i][3]); i++;
+  f[i][0] = vec_perm (f[i][1], f[i][2], uc[i][3]); i++;
+  d[i][0] = vec_perm (d[i][1], d[i][2], uc[i][3]); i++;
+
+  si[i][0] = vec_perm (si[i][1], si[i][2], uc[i][3]); i++;
+  ss[i][0] = vec_perm (ss[i][1], ss[i][2], uc[i][3]); i++;
+  sc[i][0] = vec_perm (sc[i][1], sc[i][2], uc[i][3]); i++;
+  f[i][0] = 

[gcc r15-1924] rs6000, remove __builtin_vsx_xvnegdp and __builtin_vsx_xvnegsp built-ins

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:ca4842f4f47dac33fb43def287d8ec5663fdf63f

commit r15-1924-gca4842f4f47dac33fb43def287d8ec5663fdf63f
Author: Carl Love 
Date:   Tue Jul 9 13:32:02 2024 -0400

rs6000, remove __builtin_vsx_xvnegdp and __builtin_vsx_xvnegsp built-ins

The undocumented __builtin_vsx_xvnegdp and __builtin_vsx_xvnegsp are
redundant.  The overloaded vec_neg built-in provides the same
functionality.  The two built-ins are not documented nor are there any
test cases for them.

Remove the definitions so users will use the overloaded vec_neg built-in
which is documented in the PVIPR.

gcc/ChangeLog:
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xvnegdp,
__builtin_vsx_xvnegsp): Remove built-in definitions.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def | 6 --
 1 file changed, 6 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 4b1f5522cf55..f1351cb3505e 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1739,12 +1739,6 @@
   const vf __builtin_vsx_xvnabssp (vf);
 XVNABSSP vsx_nabsv4sf2 {}
 
-  const vd __builtin_vsx_xvnegdp (vd);
-XVNEGDP negv2df2 {}
-
-  const vf __builtin_vsx_xvnegsp (vf);
-XVNEGSP negv4sf2 {}
-
   const vd __builtin_vsx_xvnmadddp (vd, vd, vd);
 XVNMADDDP nfmav2df4 {}


[gcc r15-1925] rs6000, extend vec_xxpermdi built-in for __int128 args

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:c5b4bfe9bdb6fb614255b9a3b092a0b55076f862

commit r15-1925-gc5b4bfe9bdb6fb614255b9a3b092a0b55076f862
Author: Carl Love 
Date:   Tue Jul 9 13:32:10 2024 -0400

rs6000, extend vec_xxpermdi built-in for __int128 args

Add a new signed and unsigned int128 overloaded vector instances for
vec_xxpermdi:

   __int128 vec_xxpermdi (__int128, __int128, const int);
   __uint128 vec_xxpermdi (__uint128, __uint128, const int);

Update the documentation to include a reference to the new vector built-in
instances of vec_xxpermdi.

Add test cases for the new overloaded instances.

gcc/ChangeLog:
* config/rs6000/rs6000-overload.def (vec_xxpermdi): Add new
overloaded built-in instances of vector signed and unsigned
int128.
* doc/extend.texi: Add documentation for built-in instances of
vector signed and unsigned int128.

gcc/testsuite/ChangeLog:gcc/testsuite/ChangeLog:
* gcc.target/powerpc/vec_perm-runnable-i128.c: New test file.

Diff:
---
 gcc/config/rs6000/rs6000-overload.def  |   4 +
 gcc/doc/extend.texi|   4 +
 .../gcc.target/powerpc/vec_perm-runnable-i128.c| 229 +
 3 files changed, 237 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-overload.def 
b/gcc/config/rs6000/rs6000-overload.def
index 855b9aa73cce..c4ecafc6f7ef 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -4936,6 +4936,10 @@
 XXPERMDI_2DI  XXPERMDI_VSLL
   vull __builtin_vsx_xxpermdi (vull, vull, const int);
 XXPERMDI_2DI  XXPERMDI_VULL
+  vsq __builtin_vsx_xxpermdi (vsq, vsq, const int);
+XXPERMDI_1TI  XXPERMDI_SQ
+  vuq __builtin_vsx_xxpermdi (vuq, vuq, const int);
+XXPERMDI_1TI  XXPERMDI_UQ
   vf __builtin_vsx_xxpermdi (vf, vf, const int);
 XXPERMDI_4SF  XXPERMDI_VF
   vd __builtin_vsx_xxpermdi (vd, vd, const int);
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 6ab69840f3f7..0b572afca720 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -22634,6 +22634,10 @@ void vec_vsx_st (vector bool char, int, signed char *);
 
 vector double vec_xxpermdi (vector double, vector double, const int);
 vector float vec_xxpermdi (vector float, vector float, const int);
+vector __int128 vec_xxpermdi (vector __int128,
+  vector __int128, const int);
+vector __uint128 vec_xxpermdi (vector __uint128,
+   vector __uint128, const int);
 vector long long vec_xxpermdi (vector long long, vector long long, const int);
 vector unsigned long long vec_xxpermdi (vector unsigned long long,
 vector unsigned long long, const int);
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c 
b/gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c
new file mode 100644
index ..0e0d77bcb845
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c
@@ -0,0 +1,229 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vmx_hw } */
+/* { dg-options "-maltivec -O2 " } */
+
+#include 
+
+#define DEBUG 0
+
+#if DEBUG
+#include 
+void print_i128 (unsigned __int128 val)
+{
+  printf(" 0x%016llx%016llx",
+ (unsigned long long)(val >> 64),
+ (unsigned long long)(val & 0x));
+}
+#endif
+
+extern void abort (void);
+
+union convert_union {
+  vector signed __int128s128;
+  vector unsigned __int128  u128;
+  char  val[16];
+} convert;
+
+int check_u128_result(vector unsigned __int128 vresult_u128,
+ vector unsigned __int128 expected_vresult_u128)
+{
+  /* Use a for loop to check each byte manually so the test case will
+ run with ISA 2.06.
+
+ Return 1 if they match, 0 otherwise.  */
+
+  int i;
+
+  union convert_union result;
+  union convert_union expected;
+
+  result.u128 = vresult_u128;
+  expected.u128 = expected_vresult_u128;
+
+  /* Check if each byte of the result and expected match. */
+  for (i = 0; i < 16; i++)
+{
+  if (result.val[i] != expected.val[i])
+   return 0;
+}
+  return 1;
+}
+
+int check_s128_result(vector signed __int128 vresult_s128,
+ vector signed __int128 expected_vresult_s128)
+{
+  /* Convert the arguments to unsigned, then check equality.  */
+  union convert_union result;
+  union convert_union expected;
+
+  result.s128 = vresult_s128;
+  expected.s128 = expected_vresult_s128;
+
+  return check_u128_result (result.u128, expected.u128);
+}
+
+
+int
+main (int argc, char *argv [])
+{
+  int i;
+  
+  vector signed __int128 src_va_s128;
+  vector signed __int128 src_vb_s128;
+  vector signed __int128 vresult_s128;
+  vector signed __int128 expected_vresult_s128;
+
+  vector unsigned __int128 src_va_u128;
+  vector unsigned __int128 src_vb_u128;
+  vector unsigned __int128 src_vc_u128;
+  vector unsigned __int128 vre

[gcc r15-1926] rs6000, remove __builtin_vsx_xvcmpeqsp_p built-in

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:5db91b3073895700380ae34c63b103a33fa635a6

commit r15-1926-g5db91b3073895700380ae34c63b103a33fa635a6
Author: Carl Love 
Date:   Tue Jul 9 13:32:19 2024 -0400

rs6000, remove __builtin_vsx_xvcmpeqsp_p built-in

The built-in __builtin_vsx_xvcmpeqsp_p is a duplicate of the overloaded
__builtin_altivec_vcmpeqfp_p built-in.  The built-in is undocumented and
there are no test cases for it.  The patch removes built-in
__builtin_vsx_xvcmpeqsp_p.

gcc/ChangeLog:
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xvcmpeqsp_p):
Remove built-in definition.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f1351cb3505e..cec50e0d0fd4 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1619,9 +1619,6 @@
   const vf __builtin_vsx_xvcmpeqsp (vf, vf);
 XVCMPEQSP vector_eqv4sf {}
 
-  const signed int __builtin_vsx_xvcmpeqsp_p (signed int, vf, vf);
-XVCMPEQSP_P vector_eq_v4sf_p {pred}
-
   const vd __builtin_vsx_xvcmpgedp (vd, vd);
 XVCMPGEDP vector_gev2df {}


[gcc r15-1927] rs6000, remove vector set and vector init built-ins.

2024-07-09 Thread Carl Love via Gcc-cvs
https://gcc.gnu.org/g:e611189899bb885a27ef8d17f77c02ada6c69069

commit r15-1927-ge611189899bb885a27ef8d17f77c02ada6c69069
Author: Carl Love 
Date:   Tue Jul 9 13:32:28 2024 -0400

rs6000, remove vector set and vector init built-ins.

The vector init built-ins:

  __builtin_vec_init_v16qi, __builtin_vec_init_v8hi,
  __builtin_vec_init_v4si, __builtin_vec_init_v4sf,
  __builtin_vec_init_v2di, __builtin_vec_init_v2df,
  __builtin_vec_init_v1ti

perform the same operation as initializing the vector in C code.  For
example:

  result_v4si = __builtin_vec_init_v4si (1, 2, 3, 4);
  result_v4si = {1, 2, 3, 4};

These two constructs were tested and verified they generate identical
assembly instructions with no optimization and -O3 optimization.

The vector set built-ins:

  __builtin_vec_set_v16qi, __builtin_vec_set_v8hi.
  __builtin_vec_set_v4si, __builtin_vec_set_v4sf,
  __builtin_vec_set_v1ti, __builtin_vec_set_v2di,
  __builtin_vec_set_v2df

perform the same operation as setting a specific element in the vector in
C code.  For example:

  src_v4si = __builtin_vec_set_v4si (src_v4si, int_val, index);
  src_v4si[index] = int_val;

The built-in actually generates more instructions than the inline C code
with no optimization but is identical with -O3 optimizations.

All of the above built-ins that are removed do not have test cases and
are not documented.

Built-ins __builtin_vec_set_v1ti __builtin_vec_set_v2di,
__builtin_vec_set_v2df are not removed as they are used in function
resolve_vec_insert() in file rs6000-c.cc.

The built-ins are removed as they don't provide any benefit over just
using C code.

The code to define the bif_init_bit, bif_is_init, as well as their uses
are removed.  The function altivec_expand_vec_init_builtin is also removed.

gcc/ChangeLog:
* config/rs6000/rs6000-builtin.cc (altivec_expand_vec_init_builtin):
Remove the function.
(rs6000_expand_builtin): Remove the if bif_is_int check to call
the altivec_expand_vec_init_builtin function.
* config/rs6000/rs6000-builtins.def: Remove the attribute string
comment for init.
(__builtin_vec_init_v16qi,
__builtin_vec_init_v4sf, __builtin_vec_init_v4si,
__builtin_vec_init_v8hi, __builtin_vec_init_v1ti,
__builtin_vec_init_v2df, __builtin_vec_init_v2di,
__builtin_vec_set_v16qi, __builtin_vec_set_v4sf,
__builtin_vec_set_v4si, __builtin_vec_set_v8hi): Remove
built-in definitions.
* config/rs6000/rs6000-gen-builtins.cc: Remove comment for init
attribute string.
(struct attrinfo): Remove isinit entry.
(parse_bif_attrs): Remove the if statement to check for attribute
init.
(ifdef DEBUG): Remove print for init attribute string.
(write_decls): Remove print for define bif_init_bit and
define for bif_is_init.
(write_bif_static_init): Remove if bifp->attrs.isinit statement.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc  | 40 
 gcc/config/rs6000/rs6000-builtins.def| 45 +++-
 gcc/config/rs6000/rs6000-gen-builtins.cc | 29 +++-
 3 files changed, 14 insertions(+), 100 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 646e740774eb..0a24d20a58c4 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -2313,43 +2313,6 @@ altivec_expand_predicate_builtin (enum insn_code icode, 
tree exp, rtx target)
   return target;
 }
 
-/* Expand vec_init builtin.  */
-static rtx
-altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
-{
-  machine_mode tmode = TYPE_MODE (type);
-  machine_mode inner_mode = GET_MODE_INNER (tmode);
-  int i, n_elt = GET_MODE_NUNITS (tmode);
-
-  gcc_assert (VECTOR_MODE_P (tmode));
-  gcc_assert (n_elt == call_expr_nargs (exp));
-
-  if (!target || !register_operand (target, tmode))
-target = gen_reg_rtx (tmode);
-
-  /* If we have a vector compromised of a single element, such as V1TImode, do
- the initialization directly.  */
-  if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
-{
-  rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
-  emit_move_insn (target, gen_lowpart (tmode, x));
-}
-  else
-{
-  rtvec v = rtvec_alloc (n_elt);
-
-  for (i = 0; i < n_elt; ++i)
-   {
- rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
- RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
-   }
-
-  rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
-}
-
-  return target;
-}
-
 /* Return the integer constant in ARG.  Constrain it to be in th

[gcc r14-10400] libstdc++: Define __glibcxx_assert_fail for non-verbose build [PR115585]

2024-07-09 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:85a39a8aaf683766f8160b982ed4d7b8c44aede0

commit r14-10400-g85a39a8aaf683766f8160b982ed4d7b8c44aede0
Author: Jonathan Wakely 
Date:   Fri Jun 28 15:14:15 2024 +0100

libstdc++: Define __glibcxx_assert_fail for non-verbose build [PR115585]

When the library is configured with --disable-libstdcxx-verbose the
assertions just abort instead of calling __glibcxx_assert_fail, and so I
didn't export that function for the non-verbose build. However, that
option is documented to not change the library ABI, so we still need to
export the symbol from the library. It could be needed by programs
compiled against the headers from a verbose build.

The non-verbose definition can just call abort so that it doesn't pull
in I/O symbols, which are unwanted in a non-verbose build.

libstdc++-v3/ChangeLog:

PR libstdc++/115585
* src/c++11/assert_fail.cc (__glibcxx_assert_fail): Add
definition for non-verbose builds.

(cherry picked from commit 52370c839edd04df86d3ff2b71fcdca0c7376a7f)

Diff:
---
 libstdc++-v3/src/c++11/assert_fail.cc | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/src/c++11/assert_fail.cc 
b/libstdc++-v3/src/c++11/assert_fail.cc
index 6d99c7958f3e..76c8a5a5c2f9 100644
--- a/libstdc++-v3/src/c++11/assert_fail.cc
+++ b/libstdc++-v3/src/c++11/assert_fail.cc
@@ -22,10 +22,10 @@
 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 // .
 
-#include   // for std::fprintf, stderr
 #include  // for std::abort
 
 #ifdef _GLIBCXX_VERBOSE_ASSERT
+#include   // for std::fprintf, stderr
 namespace std
 {
   [[__noreturn__]]
@@ -41,4 +41,12 @@ namespace std
 abort();
   }
 }
+#else
+namespace std
+{
+  [[__noreturn__]]
+  void
+  __glibcxx_assert_fail(const char*, int, const char*, const char*) noexcept
+  { abort(); }
+}
 #endif


[gcc r14-10401] libstdc++: Fix _Atomic(T) macro in [PR115807]

2024-07-09 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:c94c8ff5f5769ab59cbdd46c8669aa7e3ac83a44

commit r14-10401-gc94c8ff5f5769ab59cbdd46c8669aa7e3ac83a44
Author: Jonathan Wakely 
Date:   Sun Jul 7 12:22:42 2024 +0100

libstdc++: Fix _Atomic(T) macro in  [PR115807]

The definition of the _Atomic(T) macro needs to refer to ::std::atomic,
not some other std::atomic relative to the current namespace.

libstdc++-v3/ChangeLog:

PR libstdc++/115807
* include/c_compatibility/stdatomic.h (_Atomic): Ensure it
refers to std::atomic in the global namespace.
* testsuite/29_atomics/headers/stdatomic.h/115807.cc: New test.

(cherry picked from commit 40d234dd6439e8c8cfbf3f375a61906aed35c80d)

Diff:
---
 libstdc++-v3/include/c_compatibility/stdatomic.h   |  2 +-
 .../testsuite/29_atomics/headers/stdatomic.h/115807.cc | 14 ++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/c_compatibility/stdatomic.h 
b/libstdc++-v3/include/c_compatibility/stdatomic.h
index 52daa16c9b1e..7236260795e4 100644
--- a/libstdc++-v3/include/c_compatibility/stdatomic.h
+++ b/libstdc++-v3/include/c_compatibility/stdatomic.h
@@ -35,7 +35,7 @@
 #ifdef __cpp_lib_stdatomic_h // C++ >= 23
 #include 
 
-#define _Atomic(_Tp) std::atomic<_Tp>
+#define _Atomic(_Tp) ::std::atomic<_Tp>
 
 using std::memory_order;
 using std::memory_order_relaxed;
diff --git a/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc 
b/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc
new file mode 100644
index ..14f320fe8357
--- /dev/null
+++ b/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc
@@ -0,0 +1,14 @@
+// { dg-do compile { target c++23 } }
+#include 
+namespace other {
+  namespace std {
+int atomic = 0;
+  }
+  _Atomic(long) a{};
+}
+
+#include 
+
+namespace non::std {
+  static_assert( ::std::is_same_v<_Atomic(int), ::std::atomic> );
+}


[gcc r13-8899] libstdc++: Define __glibcxx_assert_fail for non-verbose build [PR115585]

2024-07-09 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:3cd410fe4f48ffd841fcd5442d1f2d6350666330

commit r13-8899-g3cd410fe4f48ffd841fcd5442d1f2d6350666330
Author: Jonathan Wakely 
Date:   Fri Jun 28 15:14:15 2024 +0100

libstdc++: Define __glibcxx_assert_fail for non-verbose build [PR115585]

When the library is configured with --disable-libstdcxx-verbose the
assertions just abort instead of calling __glibcxx_assert_fail, and so I
didn't export that function for the non-verbose build. However, that
option is documented to not change the library ABI, so we still need to
export the symbol from the library. It could be needed by programs
compiled against the headers from a verbose build.

The non-verbose definition can just call abort so that it doesn't pull
in I/O symbols, which are unwanted in a non-verbose build.

libstdc++-v3/ChangeLog:

PR libstdc++/115585
* src/c++11/assert_fail.cc (__glibcxx_assert_fail): Add
definition for non-verbose builds.

(cherry picked from commit 52370c839edd04df86d3ff2b71fcdca0c7376a7f)

Diff:
---
 libstdc++-v3/src/c++11/assert_fail.cc | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/src/c++11/assert_fail.cc 
b/libstdc++-v3/src/c++11/assert_fail.cc
index 540e953da2e8..774ffa701189 100644
--- a/libstdc++-v3/src/c++11/assert_fail.cc
+++ b/libstdc++-v3/src/c++11/assert_fail.cc
@@ -22,10 +22,10 @@
 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 // .
 
-#include   // for std::fprintf, stderr
 #include  // for std::abort
 
 #ifdef _GLIBCXX_VERBOSE_ASSERT
+#include   // for std::fprintf, stderr
 namespace std
 {
   [[__noreturn__]]
@@ -41,4 +41,12 @@ namespace std
 abort();
   }
 }
+#else
+namespace std
+{
+  [[__noreturn__]]
+  void
+  __glibcxx_assert_fail(const char*, int, const char*, const char*) noexcept
+  { abort(); }
+}
 #endif


[gcc r13-8900] libstdc++: Fix _Atomic(T) macro in [PR115807]

2024-07-09 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:c36ef56fc1df456a2174d7cb42967a0e871597c2

commit r13-8900-gc36ef56fc1df456a2174d7cb42967a0e871597c2
Author: Jonathan Wakely 
Date:   Sun Jul 7 12:22:42 2024 +0100

libstdc++: Fix _Atomic(T) macro in  [PR115807]

The definition of the _Atomic(T) macro needs to refer to ::std::atomic,
not some other std::atomic relative to the current namespace.

libstdc++-v3/ChangeLog:

PR libstdc++/115807
* include/c_compatibility/stdatomic.h (_Atomic): Ensure it
refers to std::atomic in the global namespace.
* testsuite/29_atomics/headers/stdatomic.h/115807.cc: New test.

(cherry picked from commit 40d234dd6439e8c8cfbf3f375a61906aed35c80d)

Diff:
---
 libstdc++-v3/include/c_compatibility/stdatomic.h   |  2 +-
 .../testsuite/29_atomics/headers/stdatomic.h/115807.cc | 14 ++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/c_compatibility/stdatomic.h 
b/libstdc++-v3/include/c_compatibility/stdatomic.h
index b565a1c1ab18..2bb23decf8d1 100644
--- a/libstdc++-v3/include/c_compatibility/stdatomic.h
+++ b/libstdc++-v3/include/c_compatibility/stdatomic.h
@@ -34,7 +34,7 @@
 
 #define __cpp_lib_stdatomic_h 202011L
 
-#define _Atomic(_Tp) std::atomic<_Tp>
+#define _Atomic(_Tp) ::std::atomic<_Tp>
 
 using std::memory_order;
 using std::memory_order_relaxed;
diff --git a/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc 
b/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc
new file mode 100644
index ..14f320fe8357
--- /dev/null
+++ b/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc
@@ -0,0 +1,14 @@
+// { dg-do compile { target c++23 } }
+#include 
+namespace other {
+  namespace std {
+int atomic = 0;
+  }
+  _Atomic(long) a{};
+}
+
+#include 
+
+namespace non::std {
+  static_assert( ::std::is_same_v<_Atomic(int), ::std::atomic> );
+}


[gcc r15-1928] c: Fix ICE for incorrect code in comptypes_verify [PR115696]

2024-07-09 Thread Martin Uecker via Gcc-cvs
https://gcc.gnu.org/g:592a746533a278a5fd3e7b5dff004e1846ef26a4

commit r15-1928-g592a746533a278a5fd3e7b5dff004e1846ef26a4
Author: Martin Uecker 
Date:   Sat Jun 29 15:36:18 2024 +0200

c: Fix ICE for incorrect code in comptypes_verify [PR115696]

The new verification code produces an ICE for incorrect code.  Add the
same logic as already used in comptypes to to bail out under certain
conditions.

PR c/115696

gcc/c/
* c-typeck.cc (comptypes_verify): Bail out for
identical, empty, and erroneous input types.

gcc/testsuite/
* gcc.dg/pr115696.c: New test.

Diff:
---
 gcc/c/c-typeck.cc   | 4 
 gcc/testsuite/gcc.dg/pr115696.c | 7 +++
 2 files changed, 11 insertions(+)

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index ffcab7df4d3b..e486ac04f9cf 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -1175,6 +1175,10 @@ common_type (tree t1, tree t2)
 static bool
 comptypes_verify (tree type1, tree type2)
 {
+  if (type1 == type2 || !type1 || !type2
+  || TREE_CODE (type1) == ERROR_MARK || TREE_CODE (type2) == ERROR_MARK)
+return true;
+
   if (TYPE_CANONICAL (type1) != TYPE_CANONICAL (type2)
   && !TYPE_STRUCTURAL_EQUALITY_P (type1)
   && !TYPE_STRUCTURAL_EQUALITY_P (type2))
diff --git a/gcc/testsuite/gcc.dg/pr115696.c b/gcc/testsuite/gcc.dg/pr115696.c
new file mode 100644
index ..50b8ebc24f08
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115696.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-Wno-implicit-int" } */
+
+a();   /* { dg-warning "no type or storage" } */
+a; /* { dg-error "redeclared" } */
+   /* { dg-warning "no type or storage" "" { target *-*-* } .-1 } */
+a();   /* { dg-warning "no type or storage" } */


[gcc r15-1929] c: Fix ICE for redeclaration of structs with different alignment [PR114727]

2024-07-09 Thread Martin Uecker via Gcc-cvs
https://gcc.gnu.org/g:7825c07bbaf503c47ecedd87e3d64be003b24f2c

commit r15-1929-g7825c07bbaf503c47ecedd87e3d64be003b24f2c
Author: Martin Uecker 
Date:   Sat Jun 29 15:53:43 2024 +0200

c: Fix ICE for redeclaration of structs with different alignment [PR114727]

For redeclarations of struct in C23, if one has an alignment attribute
that makes the alignment different, we later get an ICE in verify_types.
This patches disallows such redeclarations by declaring such types to
be different.

PR c/114727

gcc/c/
* c-typeck.cc (tagged_types_tu_compatible): Add test.

gcc/testsuite/
* gcc.dg/pr114727.c: New test.

Diff:
---
 gcc/c/c-typeck.cc   | 3 +++
 gcc/testsuite/gcc.dg/pr114727.c | 6 ++
 2 files changed, 9 insertions(+)

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index e486ac04f9cf..455dc374b481 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -1603,6 +1603,9 @@ tagged_types_tu_compatible_p (const_tree t1, const_tree 
t2,
  != TYPE_REVERSE_STORAGE_ORDER (t2)))
 return false;
 
+  if (TYPE_USER_ALIGN (t1) != TYPE_USER_ALIGN (t2))
+data->different_types_p = true;
+
   /* For types already being looked at in some active
  invocation of this function, assume compatibility.
  The cache is built as a linked list on the stack
diff --git a/gcc/testsuite/gcc.dg/pr114727.c b/gcc/testsuite/gcc.dg/pr114727.c
new file mode 100644
index ..12949590ce09
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114727.c
@@ -0,0 +1,6 @@
+/* { dg-do compile }
+ * { dg-options "-std=c23 -g" } */
+
+#define Y [[gnu::aligned(128)]]
+extern struct Y foo { int x; } x;
+struct foo { int x; }; /* { dg-error "redefinition" } */


[gcc r15-1930] Fix test errors after r15-1394 for sizeof(int)==sizeof(long) [PR115545]

2024-07-09 Thread Martin Uecker via Gcc-cvs
https://gcc.gnu.org/g:5b46f196cdb62af0e611315ea411938d756a0ad1

commit r15-1930-g5b46f196cdb62af0e611315ea411938d756a0ad1
Author: Martin Uecker 
Date:   Sun Jun 23 09:10:20 2024 +0200

Fix test errors after r15-1394 for sizeof(int)==sizeof(long) [PR115545]

Some tests added to test the type of redeclarations of enumerators
in r15-1394 fail on architectures where sizeof(long) == sizeof(int).
Adapt tests to use long long and/or accept that long long is selected
as type for the enumerator.

PR testsuite/115545

gcc/testsuite/

* gcc.dg/pr115109.c: Adapt test.
* gcc.dg/c23-tag-enum-6.c: Adapt test.
* gcc.dg/c23-tag-enum-7.c: Adapt test.

Diff:
---
 gcc/testsuite/gcc.dg/c23-tag-enum-6.c |  4 ++--
 gcc/testsuite/gcc.dg/c23-tag-enum-7.c | 12 ++--
 gcc/testsuite/gcc.dg/pr115109.c   |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/c23-tag-enum-6.c 
b/gcc/testsuite/gcc.dg/c23-tag-enum-6.c
index 29aef7ee3fdf..d8d304d9b3df 100644
--- a/gcc/testsuite/gcc.dg/c23-tag-enum-6.c
+++ b/gcc/testsuite/gcc.dg/c23-tag-enum-6.c
@@ -7,10 +7,10 @@ enum E : int { a = 1, b = 2 };
 enum E : int { b = _Generic(a, enum E: 2), a = 1 };
 
 enum H { x = 1 };
-enum H { x = 2UL + UINT_MAX }; /* { dg-error "outside the range" } */
+enum H { x = 2ULL + UINT_MAX };/* { dg-error "outside the range" } */
 
 enum K : int { z = 1 };
-enum K : int { z = 2UL + UINT_MAX };   /* { dg-error "outside the range" } */
+enum K : int { z = 2ULL + UINT_MAX };  /* { dg-error "outside the range" } */
 
 enum F { A = 0, B = UINT_MAX };
 enum F { B = UINT_MAX, A };/* { dg-error "outside the range" } */
diff --git a/gcc/testsuite/gcc.dg/c23-tag-enum-7.c 
b/gcc/testsuite/gcc.dg/c23-tag-enum-7.c
index d4c787c8f716..974735bf2ef4 100644
--- a/gcc/testsuite/gcc.dg/c23-tag-enum-7.c
+++ b/gcc/testsuite/gcc.dg/c23-tag-enum-7.c
@@ -4,23 +4,23 @@
 #include 
 
 // enumerators are all representable in int
-enum E { a = 1UL, b = _Generic(a, int: 2) };
+enum E { a = 1ULL, b = _Generic(a, int: 2) };
 static_assert(_Generic(a, int: 1));
 static_assert(_Generic(b, int: 1));
-enum E { a = 1UL, b = _Generic(a, int: 2) };
+enum E { a = 1ULL, b = _Generic(a, int: 2) };
 static_assert(_Generic(a, int: 1));
 static_assert(_Generic(b, int: 1));
 
 // enumerators are not representable in int
-enum H { c = 1UL << (UINT_WIDTH + 1), d = 2 };
+enum H { c = 1ULL << (UINT_WIDTH + 1), d = 2 };
 static_assert(_Generic(c, enum H: 1));
 static_assert(_Generic(d, enum H: 1));
-enum H { c = 1UL << (UINT_WIDTH + 1), d = _Generic(c, enum H: 2) };
+enum H { c = 1ULL << (UINT_WIDTH + 1), d = _Generic(c, enum H: 2) };
 static_assert(_Generic(c, enum H: 1));
 static_assert(_Generic(d, enum H: 1));
 
 // there is an overflow in the first declaration
-enum K { e = UINT_MAX, f, g = _Generic(e, unsigned int: 0) + _Generic(f, 
unsigned long: 1) };
+enum K { e = UINT_MAX, f, g = _Generic(e, unsigned int: 0) + _Generic(f, 
unsigned long: 1, unsigned long long: 1) };
 static_assert(_Generic(e, enum K: 1));
 static_assert(_Generic(f, enum K: 1));
 static_assert(_Generic(g, enum K: 1));
@@ -30,7 +30,7 @@ static_assert(_Generic(f, enum K: 1));
 static_assert(_Generic(g, enum K: 1));
 
 // there is an overflow in the first declaration
-enum U { k = INT_MAX, l, m = _Generic(k, int: 0) + _Generic(l, long: 1) };
+enum U { k = INT_MAX, l, m = _Generic(k, int: 0) + _Generic(l, long: 1, long 
long: 1) };
 static_assert(_Generic(k, enum U: 1));
 static_assert(_Generic(l, enum U: 1));
 static_assert(_Generic(m, enum U: 1));
diff --git a/gcc/testsuite/gcc.dg/pr115109.c b/gcc/testsuite/gcc.dg/pr115109.c
index 4baee0f34453..8245ff7fadb7 100644
--- a/gcc/testsuite/gcc.dg/pr115109.c
+++ b/gcc/testsuite/gcc.dg/pr115109.c
@@ -3,6 +3,6 @@
 
 #include 
 
-enum E { a = 1UL << (ULONG_WIDTH - 5), b = 2 };
-enum E { a = 1ULL << (ULONG_WIDTH - 5), b = _Generic(a, enum E: 2) };
+enum E { a = 1ULL << (ULLONG_WIDTH - 5), b = 2 };
+enum E { a = 1ULL << (ULLONG_WIDTH - 5), b = _Generic(a, enum E: 2) };


[gcc r12-10608] libstdc++: Fix _Atomic(T) macro in [PR115807]

2024-07-09 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:987e70f4290451abd98eda8b82e97b4ad25ee3c2

commit r12-10608-g987e70f4290451abd98eda8b82e97b4ad25ee3c2
Author: Jonathan Wakely 
Date:   Sun Jul 7 12:22:42 2024 +0100

libstdc++: Fix _Atomic(T) macro in  [PR115807]

The definition of the _Atomic(T) macro needs to refer to ::std::atomic,
not some other std::atomic relative to the current namespace.

libstdc++-v3/ChangeLog:

PR libstdc++/115807
* include/c_compatibility/stdatomic.h (_Atomic): Ensure it
refers to std::atomic in the global namespace.
* testsuite/29_atomics/headers/stdatomic.h/115807.cc: New test.

(cherry picked from commit 40d234dd6439e8c8cfbf3f375a61906aed35c80d)

Diff:
---
 libstdc++-v3/include/c_compatibility/stdatomic.h   |  2 +-
 .../testsuite/29_atomics/headers/stdatomic.h/115807.cc | 14 ++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/c_compatibility/stdatomic.h 
b/libstdc++-v3/include/c_compatibility/stdatomic.h
index b565a1c1ab18..2bb23decf8d1 100644
--- a/libstdc++-v3/include/c_compatibility/stdatomic.h
+++ b/libstdc++-v3/include/c_compatibility/stdatomic.h
@@ -34,7 +34,7 @@
 
 #define __cpp_lib_stdatomic_h 202011L
 
-#define _Atomic(_Tp) std::atomic<_Tp>
+#define _Atomic(_Tp) ::std::atomic<_Tp>
 
 using std::memory_order;
 using std::memory_order_relaxed;
diff --git a/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc 
b/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc
new file mode 100644
index ..14f320fe8357
--- /dev/null
+++ b/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/115807.cc
@@ -0,0 +1,14 @@
+// { dg-do compile { target c++23 } }
+#include 
+namespace other {
+  namespace std {
+int atomic = 0;
+  }
+  _Atomic(long) a{};
+}
+
+#include 
+
+namespace non::std {
+  static_assert( ::std::is_same_v<_Atomic(int), ::std::atomic> );
+}


[gcc r15-1932] RISC-V: fix zcmp popretz [PR113715]

2024-07-09 Thread Fei Gao via Gcc-cvs
https://gcc.gnu.org/g:7a345d0314f8cf0f15ca3664b1e4430d65764570

commit r15-1932-g7a345d0314f8cf0f15ca3664b1e4430d65764570
Author: Fei Gao 
Date:   Tue Jul 9 10:00:29 2024 +

RISC-V: fix zcmp popretz [PR113715]

No functional changes compared with V1, just spaces to table conversion
in testcases to pass check-function-bodies.

V1 passed regression locally but suprisingly failed in pre-commit CI, after
picking the patch from patchwork, I realize table got coverted to spaces
before sending the patch.

Root cause:

https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b27d323a368033f0b37e93c57a57a35fd9997864
Commit above tries in targetm.gen_epilogue () to detect if
there's li a0,0 insn at the end of insn chain, if so, cm.popret
is replaced by cm.popretz and li a0,0 insn is deleted.

Insertion of the generated epilogue sequence
into the insn chain doesn't happen at this moment.
If later shrink-wrap decides NOT to insert the epilogue sequence at the end
of insn chain, then the li a0,0 insn has already been mistakeny removed.

Fix this issue by removing generation of cm.popretz in epilogue,
leaving the assignment to a0 and use insn with cm.popret.

That's likely going to result in some kind of code size regression,
but not a correctness regression.

Optimization can be done in future.

Signed-off-by: Fei Gao 

gcc/ChangeLog:
PR target/113715

* config/riscv/riscv.cc (riscv_zcmp_can_use_popretz): Removed.
(riscv_gen_multi_pop_insn): Remove generation of cm.popretz.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rv32e_zcmp.c: Adapt TC.
* gcc.target/riscv/rv32i_zcmp.c: Likewise.

Diff:
---
 gcc/config/riscv/riscv.cc   | 53 -
 gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c |  3 +-
 gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c |  3 +-
 3 files changed, 4 insertions(+), 55 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 38ed773c222d..61fa74e93220 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -8167,52 +8167,6 @@ riscv_adjust_libcall_cfi_epilogue ()
   return dwarf;
 }
 
-/* return true if popretz pattern can be matched.
-   set (reg 10 a0) (const_int 0)
-   use (reg 10 a0)
-   NOTE_INSN_EPILOGUE_BEG  */
-static rtx_insn *
-riscv_zcmp_can_use_popretz (void)
-{
-  rtx_insn *insn = NULL, *use = NULL, *clear = NULL;
-
-  /* sequence stack for NOTE_INSN_EPILOGUE_BEG*/
-  struct sequence_stack *outer_seq = get_current_sequence ()->next;
-  if (!outer_seq)
-return NULL;
-  insn = outer_seq->first;
-  if (!insn || !NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)
-return NULL;
-
-  /* sequence stack for the insn before NOTE_INSN_EPILOGUE_BEG*/
-  outer_seq = outer_seq->next;
-  if (outer_seq)
-insn = outer_seq->last;
-
-  /* skip notes  */
-  while (insn && NOTE_P (insn))
-{
-  insn = PREV_INSN (insn);
-}
-  use = insn;
-
-  /* match use (reg 10 a0)  */
-  if (use == NULL || !INSN_P (use) || GET_CODE (PATTERN (use)) != USE
-  || !REG_P (XEXP (PATTERN (use), 0))
-  || REGNO (XEXP (PATTERN (use), 0)) != A0_REGNUM)
-return NULL;
-
-  /* match set (reg 10 a0) (const_int 0 [0])  */
-  clear = PREV_INSN (use);
-  if (clear != NULL && INSN_P (clear) && GET_CODE (PATTERN (clear)) == SET
-  && REG_P (SET_DEST (PATTERN (clear)))
-  && REGNO (SET_DEST (PATTERN (clear))) == A0_REGNUM
-  && SET_SRC (PATTERN (clear)) == const0_rtx)
-return clear;
-
-  return NULL;
-}
-
 static void
 riscv_gen_multi_pop_insn (bool use_multi_pop_normal, unsigned mask,
  unsigned multipop_size)
@@ -8223,13 +8177,6 @@ riscv_gen_multi_pop_insn (bool use_multi_pop_normal, 
unsigned mask,
   if (!use_multi_pop_normal)
 insn = emit_insn (
   riscv_gen_multi_push_pop_insn (POP_IDX, multipop_size, regs_count));
-  else if (rtx_insn *clear_a0_insn = riscv_zcmp_can_use_popretz ())
-{
-  delete_insn (NEXT_INSN (clear_a0_insn));
-  delete_insn (clear_a0_insn);
-  insn = emit_jump_insn (
-   riscv_gen_multi_push_pop_insn (POPRETZ_IDX, multipop_size, regs_count));
-}
   else
 insn = emit_jump_insn (
   riscv_gen_multi_push_pop_insn (POPRET_IDX, multipop_size, regs_count));
diff --git a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c 
b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
index 50e443573ad9..0af4d7199f68 100644
--- a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
+++ b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
@@ -259,7 +259,8 @@ foo (void)
 **test_popretz:
 ** cm.push {ra}, -16
 ** callf1
-** cm.popretz  {ra}, 16
+** li  a0,0
+** cm.popret   {ra}, 16
 */
 long
 test_popretz ()
diff --git a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c 
b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
index 1e1a8be87054..723889f49df6 1

[gcc r14-10403] RISC-V: backport fix zcmp popretz [PR113715]

2024-07-09 Thread Fei Gao via Gcc-cvs
https://gcc.gnu.org/g:efa30f619361b043616e624e57366a50982e11df

commit r14-10403-gefa30f619361b043616e624e57366a50982e11df
Author: Fei Gao 
Date:   Tue Jul 9 10:00:29 2024 +

RISC-V: backport fix zcmp popretz [PR113715]

Root cause:

https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b27d323a368033f0b37e93c57a57a35fd9997864
Commit above tries in targetm.gen_epilogue () to detect if
there's li a0,0 insn at the end of insn chain, if so, cm.popret
is replaced by cm.popretz and li a0,0 insn is deleted.

Insertion of the generated epilogue sequence
into the insn chain doesn't happen at this moment.
If later shrink-wrap decides NOT to insert the epilogue sequence at the end
of insn chain, then the li a0,0 insn has already been mistakeny removed.

Fix this issue by removing generation of cm.popretz in epilogue,
leaving the assignment to a0 and use insn with cm.popret.

That's likely going to result in some kind of code size regression,
but not a correctness regression.

Optimization can be done in future.

Signed-off-by: Fei Gao 

gcc/ChangeLog:
PR target/113715

* config/riscv/riscv.cc (riscv_zcmp_can_use_popretz): Removed.
(riscv_gen_multi_pop_insn): Remove generation of cm.popretz.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rv32e_zcmp.c: Adapt TC.
* gcc.target/riscv/rv32i_zcmp.c: Likewise.

Diff:
---
 gcc/config/riscv/riscv.cc   | 53 -
 gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c |  3 +-
 gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c |  3 +-
 3 files changed, 4 insertions(+), 55 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 6fcb6fb4ad39..a599ced658c3 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7761,52 +7761,6 @@ riscv_adjust_libcall_cfi_epilogue ()
   return dwarf;
 }
 
-/* return true if popretz pattern can be matched.
-   set (reg 10 a0) (const_int 0)
-   use (reg 10 a0)
-   NOTE_INSN_EPILOGUE_BEG  */
-static rtx_insn *
-riscv_zcmp_can_use_popretz (void)
-{
-  rtx_insn *insn = NULL, *use = NULL, *clear = NULL;
-
-  /* sequence stack for NOTE_INSN_EPILOGUE_BEG*/
-  struct sequence_stack *outer_seq = get_current_sequence ()->next;
-  if (!outer_seq)
-return NULL;
-  insn = outer_seq->first;
-  if (!insn || !NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)
-return NULL;
-
-  /* sequence stack for the insn before NOTE_INSN_EPILOGUE_BEG*/
-  outer_seq = outer_seq->next;
-  if (outer_seq)
-insn = outer_seq->last;
-
-  /* skip notes  */
-  while (insn && NOTE_P (insn))
-{
-  insn = PREV_INSN (insn);
-}
-  use = insn;
-
-  /* match use (reg 10 a0)  */
-  if (use == NULL || !INSN_P (use) || GET_CODE (PATTERN (use)) != USE
-  || !REG_P (XEXP (PATTERN (use), 0))
-  || REGNO (XEXP (PATTERN (use), 0)) != A0_REGNUM)
-return NULL;
-
-  /* match set (reg 10 a0) (const_int 0 [0])  */
-  clear = PREV_INSN (use);
-  if (clear != NULL && INSN_P (clear) && GET_CODE (PATTERN (clear)) == SET
-  && REG_P (SET_DEST (PATTERN (clear)))
-  && REGNO (SET_DEST (PATTERN (clear))) == A0_REGNUM
-  && SET_SRC (PATTERN (clear)) == const0_rtx)
-return clear;
-
-  return NULL;
-}
-
 static void
 riscv_gen_multi_pop_insn (bool use_multi_pop_normal, unsigned mask,
  unsigned multipop_size)
@@ -7817,13 +7771,6 @@ riscv_gen_multi_pop_insn (bool use_multi_pop_normal, 
unsigned mask,
   if (!use_multi_pop_normal)
 insn = emit_insn (
   riscv_gen_multi_push_pop_insn (POP_IDX, multipop_size, regs_count));
-  else if (rtx_insn *clear_a0_insn = riscv_zcmp_can_use_popretz ())
-{
-  delete_insn (NEXT_INSN (clear_a0_insn));
-  delete_insn (clear_a0_insn);
-  insn = emit_jump_insn (
-   riscv_gen_multi_push_pop_insn (POPRETZ_IDX, multipop_size, regs_count));
-}
   else
 insn = emit_jump_insn (
   riscv_gen_multi_push_pop_insn (POPRET_IDX, multipop_size, regs_count));
diff --git a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c 
b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
index 50e443573ad9..0af4d7199f68 100644
--- a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
+++ b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
@@ -259,7 +259,8 @@ foo (void)
 **test_popretz:
 ** cm.push {ra}, -16
 ** callf1
-** cm.popretz  {ra}, 16
+** li  a0,0
+** cm.popret   {ra}, 16
 */
 long
 test_popretz ()
diff --git a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c 
b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
index 1e1a8be87054..723889f49df6 100644
--- a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
+++ b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
@@ -259,7 +259,8 @@ foo (void)
 **test_popretz:
 ** cm.push {ra}, -16
 ** callf1
-** cm.popretz  {ra}, 16
+** li  a0,0
+** cm.popret   {ra}, 16
 */
 long
 test_popretz (

[gcc r15-1933] Remove expanding complex EQ/NE inside a GIMPLE_RETURN [PR115721]

2024-07-09 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:8f8bddb58ad7746b2a19c43e32641d161fa15caf

commit r15-1933-g8f8bddb58ad7746b2a19c43e32641d161fa15caf
Author: Andrew Pinski 
Date:   Tue Jul 9 14:00:34 2024 -0700

Remove expanding complex EQ/NE inside a GIMPLE_RETURN [PR115721]

This code has been dead at least since the move over to tuples
in 0-88576-g726a989a8b74bf, when gimple returns could only have
a simple expression in it. So let's remove it.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR tree-optimization/115721
* tree-complex.cc (expand_complex_comparison): Remove
support for GIMPLE_RETURN.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-complex.cc | 8 
 1 file changed, 8 deletions(-)

diff --git a/gcc/tree-complex.cc b/gcc/tree-complex.cc
index d2d9d76827db..dfb45b9d91cf 100644
--- a/gcc/tree-complex.cc
+++ b/gcc/tree-complex.cc
@@ -1622,14 +1622,6 @@ expand_complex_comparison (gimple_stmt_iterator *gsi, 
tree ar, tree ai,
 
   switch (gimple_code (stmt))
 {
-case GIMPLE_RETURN:
-  {
-   greturn *return_stmt = as_a  (stmt);
-   type = TREE_TYPE (gimple_return_retval (return_stmt));
-   gimple_return_set_retval (return_stmt, fold_convert (type, cc));
-  }
-  break;
-
 case GIMPLE_ASSIGN:
   type = TREE_TYPE (gimple_assign_lhs (stmt));
   gimple_assign_set_rhs_from_tree (gsi, fold_convert (type, cc));


[gcc r15-1934] testsuite: Allow matching `{_1, { 0, 0, 0, 0 }}` for vect/slp-gap-1.c

2024-07-09 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:6fce4664d4a2e44843bd1464930696c819906d0f

commit r15-1934-g6fce4664d4a2e44843bd1464930696c819906d0f
Author: Andrew Pinski 
Date:   Tue Jul 9 17:13:24 2024 -0700

testsuite: Allow matching `{_1, { 0,0,0,0 }}` for vect/slp-gap-1.c

While working on adding V4QI support to the aarch64 backend,
vect/slp-gap-1.c started to fail but only because the regex
was failing. Before it was loading use SI (int) and afterwards,
we started to use V4QI. The generated code was the same and the
generated gimple was almost the same. The regex was searching
for `zero-padding trick` and it was still doing that but instead
of directly 0, it was V4QI 0 (or rather `{ 0, 0, 0 }`).
This extends regex to support both.

Tested on x86_64-linux-gnu and aarch64-linux-gnu (with the support added).

gcc/testsuite/ChangeLog:

* gcc.dg/vect/slp-gap-1.c: Support matching `{_1, { 0, 0, 0, 0 }}`
in addition to `{_1, 0}`.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/vect/slp-gap-1.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/slp-gap-1.c 
b/gcc/testsuite/gcc.dg/vect/slp-gap-1.c
index 9856da7a7f48..74bde8219d9a 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-gap-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-gap-1.c
@@ -14,5 +14,7 @@ void pixel_sub_wxh(int16_t * __restrict diff, uint8_t *pix1, 
uint8_t *pix2) {
 
 /* We can vectorize this without peeling for gaps and thus without epilogue,
but the only thing we can reliably scan is the zero-padding trick for the
-   partial loads.  */
-/* { dg-final { scan-tree-dump-times "\{_\[0-9\]\+, 0" 6 "vect" { target { 
vect64 && { vect_unpack && vect_perm } } } } } */
+   partial loads.  
+   Note this will match `{_1, 0}` or `{_1, {0, 0, 0, 0}}`. Both are the same
+   in the end, the difference is the load is either via SI or V4QI. */
+/* { dg-final { scan-tree-dump-times "\{_\[0-9\]\+, (?:0\|{ 0(?:, 0)\+ )}" 6 
"vect" { target { vect64 && { vect_unpack && vect_perm } } } } } */