date:20241212

[PATCH v1 3/4] Match: Refactor the signed SAT_* match for saturated value [NFC]

2024-12-12 Thread pan2 . li

From: Pan Li 

This patch would like to refactor the all signed SAT_* patterns for
the saturated value.  Aka, overflow to INT_MAX when > 0 and downflow
to INT_MIN when < 0.  Thus, we can remove sorts of duplicated expression
in different patterns.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Extract saturated value match for signed SAT_*.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 38 +-
 1 file changed, 17 insertions(+), 21 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 5b30a1e9990..18098920007 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3314,6 +3314,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 }
 (if (wi::eq_p (trunc_max, int_cst_1) && wi::eq_p (max, int_cst_2)))
 
+(if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))
+ /* SAT_VAL = (-(T)(X < 0) ^ MAX)  */
+ (match (signed_integer_sat_val @0)
+  (bit_xor:c (nop_convert? (negate
+   (nop_convert? (convert (lt @0 integer_zerop)
+max_value)))
+
 (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))
  (match (signed_integer_sat_add @0 @1)
   /* T SUM = (T)((UT)X + (UT)Y)
@@ -3322,7 +3329,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (nop_convert @1
(bit_not (bit_xor:c @0 @1)))
 integer_zerop)
-(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+(signed_integer_sat_val @0)
 @2))
  (match (signed_integer_sat_add @0 @1)
   /* T SUM = (T)((UT)X + (UT)Y)
@@ -3340,17 +3347,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (nop_convert @1
integer_zerop)
(ge (bit_xor:c @0 @1) integer_zerop))
-(bit_xor:c (nop_convert (negate (nop_convert (convert
- (lt @0 integer_zerop)
-   max_value)
+(signed_integer_sat_val @0)
 @2))
  (match (signed_integer_sat_add @0 @1)
/* SUM = .ADD_OVERFLOW (X, Y)
   SAT_S_ADD = IMAGPART_EXPR (SUM) != 0 ? (-(T)(X < 0) ^ MAX) : SUM  */
   (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
-(bit_xor:c (nop_convert?
-(negate (nop_convert? (convert (lt @0 integer_zerop)
-   max_value)
+(signed_integer_sat_val @0)
 (realpart @2)))
  (match (signed_integer_sat_add @0 @1)
   /* T SUM = (T)((UT)X + (UT)Y)
@@ -3359,9 +3362,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (nop_convert @1
integer_zerop)
(bit_not (lt (bit_xor:c @0 @1) integer_zerop)))
-(bit_xor:c (nop_convert (negate (nop_convert (convert
-  (lt @0 integer_zerop)
-   max_value)
+(signed_integer_sat_val @0)
 @2))
  (match (signed_integer_sat_add @0 @1)
   /* T SUM = (T)((UT)X + (UT)IMM);
@@ -3370,10 +3371,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cond^ (lt (bit_and:c (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0)
   INTEGER_CST@1)))
(bit_xor:c @0 INTEGER_CST@3)) integer_zerop)
-(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+(signed_integer_sat_val @0)
 @2)
-  (if (wi::bit_and (wi::to_wide (@1), wi::to_wide (@3)) == 0)))
-)
+  (if (wi::bit_and (wi::to_wide (@1), wi::to_wide (@3)) == 0
 
 (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))
  (match (signed_integer_sat_sub @0 @1)
@@ -3383,7 +3383,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(bit_xor @0 (nop_convert@2 (minus (nop_convert @0)
  (nop_convert @1)
 integer_zerop)
-(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+(signed_integer_sat_val @0)
 @2))
  (match (signed_integer_sat_sub @0 @1)
   /* T Z = (T)((UT)X - (UT)Y);
@@ -3393,7 +3393,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (nop_convert @1)
 integer_zerop)
 @2
-(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)))
+(signed_integer_sat_val @0)))
  (match (signed_integer_sat_sub @0 @1)
   /* T Z = (T)((UT)X - (UT)Y);
  SAT_S_SUB = (X ^ Y) < 0 & (X ^ Z) < 0 ? (-(T)(X < 0) ^ MAX) : Z  */
@@ -3401,17 +3401,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (nop_convert @1
integer_zerop)
(lt (bit_xor:c @0 @1) integer_zerop))
-(bit_xor:c (nop_convert (negate (nop_convert (con

[PATCH v1 1/4] Match: Refactor the signed SAT_SUB match patterns [NFC]

2024-12-12 Thread pan2 . li

From: Pan Li 

This patch would like to refactor the all signed SAT_ADD patterns,
aka:
* Extract type check outside.
* Re-arrange the related match pattern forms together.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Refactor sorts of signed SAT_SUB match patterns.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 98 +---
 1 file changed, 40 insertions(+), 58 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index dd5302015c7..1ef504f141f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3375,6 +3375,46 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (wi::bit_and (wi::to_wide (@1), wi::to_wide (@3)) == 0)))
 )
 
+(if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))
+ (match (signed_integer_sat_sub @0 @1)
+  /* T Z = (T)((UT)X - (UT)Y);
+ SAT_S_SUB = (X ^ Y) & (X ^ Z) < 0 ? (-(T)(X < 0) ^ MAX) : Z  */
+  (cond^ (lt (bit_and:c (bit_xor:c @0 @1)
+   (bit_xor @0 (nop_convert@2 (minus (nop_convert @0)
+ (nop_convert @1)
+integer_zerop)
+(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+@2))
+ (match (signed_integer_sat_sub @0 @1)
+  /* T Z = (T)((UT)X - (UT)Y);
+ SAT_S_SUB = (X ^ Y) & (X ^ Z) >= 0 ? Z : (-(T)(X < 0) ^ MAX)  */
+  (cond^ (ge (bit_and:c (bit_xor:c @0 @1)
+   (bit_xor @0 (nop_convert@2 (minus (nop_convert @0)
+ (nop_convert @1)
+integer_zerop)
+@2
+(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)))
+ (match (signed_integer_sat_sub @0 @1)
+  /* T Z = (T)((UT)X - (UT)Y);
+ SAT_S_SUB = (X ^ Y) < 0 & (X ^ Z) < 0 ? (-(T)(X < 0) ^ MAX) : Z  */
+  (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (minus (nop_convert @0)
+ (nop_convert @1
+   integer_zerop)
+   (lt (bit_xor:c @0 @1) integer_zerop))
+(bit_xor:c (nop_convert (negate (nop_convert (convert
+  (lt @0 integer_zerop)
+   max_value)
+@2))
+ (match (signed_integer_sat_sub @0 @1)
+  /* Z = .SUB_OVERFLOW (X, Y)
+ SAT_S_SUB = IMAGPART (Z) != 0 ? (-(T)(X < 0) ^ MAX) : REALPART (Z)  */
+  (cond^ (ne (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
+(bit_xor:c (nop_convert?
+   (negate (nop_convert? (convert (lt @0 integer_zerop)
+   max_value)
+(realpart @2))
+  (if (types_match (type, @0, @1)
+
 /* The boundary condition for case 10: IMM = 1:
SAT_U_SUB = X >= IMM ? (X - IMM) : 0.
simplify (X != 0 ? X + ~0 : 0) to X - (X != 0).  */
@@ -3386,64 +3426,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(with { tree itype = TREE_TYPE (@2); }
 (convert (minus @2 (convert:itype @1))
 
-/* Signed saturation sub, case 1:
-   T minus = (T)((UT)X - (UT)Y);
-   SAT_S_SUB = (X ^ Y) & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus;
-
-   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
-(match (signed_integer_sat_sub @0 @1)
- (cond^ (lt (bit_and:c (bit_xor:c @0 @1)
-  (bit_xor @0 (nop_convert@2 (minus (nop_convert @0)
-(nop_convert @1)
-   integer_zerop)
-   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
-   @2)
- (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
-
-/* Signed saturation sub, case 2:
-   T minus = (T)((UT)X - (UT)Y);
-   SAT_S_SUB = (X ^ Y) & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus;
-
-   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
-(match (signed_integer_sat_sub @0 @1)
- (cond^ (ge (bit_and:c (bit_xor:c @0 @1)
-  (bit_xor @0 (nop_convert@2 (minus (nop_convert @0)
-(nop_convert @1)
-   integer_zerop)
-   @2
-   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value))
- (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
-
-/* Signed saturation sub, case 3:
-   Z = .SUB_OVERFLOW (X, Y)
-   SAT_S_SUB = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : REALPART_EXPR 
(Z);
-
-   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
-(match (signed_integer_sat_sub @0 @1)
- (cond^ (ne (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
-   (bit_xor:c (nop_convert?
-   (negate (nop_convert? (convert (lt @0 integer_zerop)
-  max_value)
-   (realpart @2))
- (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
-  && types_match (type, @0, @1
-
-/* Signed saturation sub, case 4:
-   T minus = (T)((UT)X - (UT)Y);
-   SAT_S_SUB = (X ^ Y) < 0 & (X ^ minus) < 0 ? (-(T)(X < 0) ^

[PATCH v1 2/4] Match: Refactor the signed SAT_TRUNC match patterns [NFC]

2024-12-12 Thread pan2 . li

From: Pan Li 

This patch would like to refactor the all signed SAT_TRUNC patterns,
aka:
* Extract type check outside.
* Re-arrange the related match pattern forms together.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Refactor sorts of signed SAT_TRUNC match patterns

Signed-off-by: Pan Li 
---
 gcc/match.pd | 65 ++--
 1 file changed, 32 insertions(+), 33 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 1ef504f141f..5b30a1e9990 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3415,6 +3415,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (realpart @2))
   (if (types_match (type, @0, @1)
 
+(if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))
+ (match (signed_integer_sat_trunc @0)
+  /* SAT_S_TRUNC(X) = (unsigned)X + NT_MAX + 1  > Unsigned_MAX ? (NT)X  */
+  (cond^ (gt (plus:c (convert@4 @0) INTEGER_CST@1) INTEGER_CST@2)
+(bit_xor:c (nop_convert?
+(negate (nop_convert? (convert (lt @0 integer_zerop)
+   INTEGER_CST@3)
+(convert @0))
+  (if (!TYPE_UNSIGNED (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@4)))
+   (with
+{
+ unsigned itype_prec = TYPE_PRECISION (TREE_TYPE (@0));
+ unsigned otype_prec = TYPE_PRECISION (type);
+ wide_int offset = wi::uhwi (HOST_WIDE_INT_1U << (otype_prec - 1),
+itype_prec); // Aka 128 for int8_t
+ wide_int limit_0 = wi::mask (otype_prec, false, itype_prec); // Aka 255
+ wide_int limit_1 = wi::uhwi ((HOST_WIDE_INT_1U << otype_prec) - 3,
+ itype_prec); // Aka 253
+ wide_int limit_2 = wi::uhwi ((HOST_WIDE_INT_1U << otype_prec) - 2,
+ itype_prec); // Aka 254
+ wide_int otype_max = wi::mask (otype_prec - 1, false, otype_prec);
+ wide_int itype_max = wi::mask (otype_prec - 1, false, itype_prec);
+ wide_int int_cst_1 = wi::to_wide (@1);
+ wide_int int_cst_2 = wi::to_wide (@2);
+ wide_int int_cst_3 = wi::to_wide (@3);
+}
+(if (((wi::eq_p (int_cst_1, offset) && wi::eq_p (int_cst_2, limit_0))
+|| (wi::eq_p (int_cst_1, itype_max) && wi::eq_p (int_cst_2, limit_2))
+|| (wi::eq_p (int_cst_1, offset) && wi::eq_p (int_cst_2, limit_2))
+|| (wi::eq_p (int_cst_1, itype_max) && wi::eq_p (int_cst_2, limit_1)))
+&& wi::eq_p (int_cst_3, otype_max)))
+
 /* The boundary condition for case 10: IMM = 1:
SAT_U_SUB = X >= IMM ? (X - IMM) : 0.
simplify (X != 0 ? X + ~0 : 0) to X - (X != 0).  */
@@ -3426,39 +3458,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(with { tree itype = TREE_TYPE (@2); }
 (convert (minus @2 (convert:itype @1))
 
-/* Signed saturation truncate, case 1 and case 2, sizeof (WT) > sizeof (NT).
-   SAT_S_TRUNC(X) = (unsigned)X + NT_MAX + 1  > Unsigned_MAX ? (NT)X.  */
-(match (signed_integer_sat_trunc @0)
- (cond^ (gt (plus:c (convert@4 @0) INTEGER_CST@1) INTEGER_CST@2)
-   (bit_xor:c (nop_convert?
-   (negate (nop_convert? (convert (lt @0 integer_zerop)
-  INTEGER_CST@3)
-   (convert @0))
- (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
-  && !TYPE_UNSIGNED (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@4)))
- (with
-  {
-   unsigned itype_prec = TYPE_PRECISION (TREE_TYPE (@0));
-   unsigned otype_prec = TYPE_PRECISION (type);
-   wide_int offset = wi::uhwi (HOST_WIDE_INT_1U << (otype_prec - 1),
-  itype_prec); // Aka 128 for int8_t
-   wide_int limit_0 = wi::mask (otype_prec, false, itype_prec); // Aka 255
-   wide_int limit_1 = wi::uhwi ((HOST_WIDE_INT_1U << otype_prec) - 3,
-   itype_prec); // Aka 253
-   wide_int limit_2 = wi::uhwi ((HOST_WIDE_INT_1U << otype_prec) - 2,
-   itype_prec); // Aka 254
-   wide_int otype_max = wi::mask (otype_prec - 1, false, otype_prec);
-   wide_int itype_max = wi::mask (otype_prec - 1, false, itype_prec);
-   wide_int int_cst_1 = wi::to_wide (@1);
-   wide_int int_cst_2 = wi::to_wide (@2);
-   wide_int int_cst_3 = wi::to_wide (@3);
-  }
-  (if (((wi::eq_p (int_cst_1, offset) && wi::eq_p (int_cst_2, limit_0))
-|| (wi::eq_p (int_cst_1, itype_max) && wi::eq_p (int_cst_2, limit_2))
-|| (wi::eq_p (int_cst_1, offset) && wi::eq_p (int_cst_2, limit_2))
-|| (wi::eq_p (int_cst_1, itype_max) && wi::eq_p (int_cst_2, limit_1)))
-   && wi::eq_p (int_cst_3, otype_max))
-
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
-- 
2.43.0

[PATCH v1 4/4] Match: Update the comments for indicating SAT_* pattern

2024-12-12 Thread pan2 . li

From: Pan Li 

Given the SAT_* patterns are grouped for each alu and signed or not,
add leading comments to indicate the beginning of the pattern.

gcc/ChangeLog:

* match.pd: Update comments for sat_* pattern.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 18098920007..aa006b9e282 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3099,6 +3099,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| POINTER_TYPE_P (itype))
   && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))
 
+/* Saturation add for unsigned integer.  */
 (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type))
  (match (usadd_overflow_mask @0 @1)
   /* SAT_U_ADD = (X + Y) | -(X > (X + Y)).
@@ -3173,6 +3174,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 integer_minus_onep (realpart @2))
   (if (types_match (type, @0) && int_fits_type_p (@1, type)
 
+/* Saturation sub for unsigned integer.  */
 (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type))
  (match (unsigned_integer_sat_sub @0 @1)
   /* SAT_U_SUB = X > Y ? X - Y : 0  */
@@ -3262,6 +3264,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 }
 (if (wi::eq_p (sum, wi::uhwi (0, precision
 
+/* Saturation truncate for unsigned integer.  */
 (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type))
  (match (unsigned_integer_sat_trunc @0)
   /* SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1)))  */
@@ -3321,6 +3324,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(nop_convert? (convert (lt @0 integer_zerop)
 max_value)))
 
+/* Saturation add for signed integer.  */
 (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))
  (match (signed_integer_sat_add @0 @1)
   /* T SUM = (T)((UT)X + (UT)Y)
@@ -3375,6 +3379,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 @2)
   (if (wi::bit_and (wi::to_wide (@1), wi::to_wide (@3)) == 0
 
+/* Saturation sub for signed integer.  */
 (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))
  (match (signed_integer_sat_sub @0 @1)
   /* T Z = (T)((UT)X - (UT)Y);
@@ -3411,6 +3416,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (realpart @2))
   (if (types_match (type, @0, @1)
 
+/* Saturation truncate for signed integer.  */
 (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))
  (match (signed_integer_sat_trunc @0)
   /* SAT_S_TRUNC(X) = (unsigned)X + NT_MAX + 1  > Unsigned_MAX ? (NT)X  */
-- 
2.43.0

Re: [PATCH 00/15] arm: [MVE intrinsics] Rework store_scatter and load_gather intrinsics

2024-12-12 Thread Christophe Lyon

On Wed, 11 Dec 2024 at 17:54, Richard Earnshaw (lists)
 wrote:
>
> On 07/11/2024 09:18, Christophe Lyon wrote:
> > This patch series re-implements the store_scatter and load_gather
> > intrinsincs using the new framework, similarly to previous series.
> >
> > A few points worth mentioning:
> >
> > - unlike other intrinsics, these ones have the predicate after the
> >mode in their names, hence the need for patch #1
> >
> > - when checking the 'offset' argument of the *_base_* intrinsics, we
> >need ranges with negative lower bounds, unlike what we needed so far
> >(SVE does not have such negative bounds AFAIK), hence the need for
> >patch #5 and the use of 'ss64' instead of 'su64' in signatures.
> >
> > - because of some pecularities in ACLE expected output wrt data type
> >suffix (.16 vs .u16 vs .f16 for instance), I chose to update a few
> >tests in patches #12 and #13, and to introduce a dedicated iterator
> >in other cases (patch#10, using and fixing an existing iterator
> >would have impact on Neon tests).  I chose the approach which seemed
> >the less invasive, but maybe we should aim at more consistency and
> >update ACLE instead?
> >
>
> Thanks for this, it's a nice cleanup.
>
> This patch series is OK.  In fact, I think you should consider further
> changes to move things from arm_mve.h inside the pragma as pre-approved,
> once stage 1 re-opens, and provided they don't need to go significantly
> beyond the type of changes needed here.

Thanks, I'll push this series soon.

What about v2 of the latest MVE intrinsics series I posted recently (I
sent v1 before the end of the previous stage 1)?
https://gcc.gnu.org/pipermail/gcc-patches/2024-December/671219.html

>
> One thing we should perhaps consider in future (ie not right now) is
> whether we really need poly types in the Arm back-end code.  Patch 8
> contains:
>
>
> +  machine_mode memory_vector_mode (const function_instance &fi) const
> override
> +  {
> +poly_uint64 nunits = GET_MODE_NUNITS (fi.vector_mode (0));
> +return arm_mve_data_mode (m_to_int_mode, nunits).require ();
> +  }
>
> But since poly types on Arm should degenerate into simple host wide
> ints, this feels a bit like overkill.
>
Indeed, there are various cleanup / improvements to consider.

Thanks,

Christophe

> R.
>
> > Thanks,
> >
> > Christophe
> >
> > Christophe Lyon (15):
> >arm: [MVE intrinsics] add mode_after_pred helper in function_shape
> >arm: [MVE intrinsics] add store_scatter_offset shape
> >arm: [MVE intrinsics] rework vstr?q_scatter_offset
> >arm: [MVE intrinsics] rework vstr_scatter_shifted_offset
> >arm: [MVE intrinsics] Check immediate is a multiple in a range
> >arm: [MVE intrinsics] Add store_scatter_base shape
> >arm: [MVE intrinsics] rework vstr scatter_base
> >arm: [MVE intrinsics] rework vstr scatter_base_wb
> >arm: [MVE intrinsics] add load_ext_gather_offset shape
> >arm: [MVE intrinsics] rework vldr gather_offset
> >arm: [MVE intrinsics] rework vldr gather_shifted_offset
> >arm: [MVE intrinsics] add load_gather_base shape
> >arm: [MVE intrinsics] rework vldr gather_base
> >arm: [MVE intrinsics] rework vldr gather_base_wb
> >arm: [MVE intrinsics] remove useless call_properties implementations.
> >
> >   gcc/config/arm/arm-builtins.cc|  146 -
> >   gcc/config/arm/arm-mve-builtins-base.cc   |  279 +-
> >   gcc/config/arm/arm-mve-builtins-base.def  |   28 +
> >   gcc/config/arm/arm-mve-builtins-base.h|   18 +
> >   gcc/config/arm/arm-mve-builtins-shapes.cc |  249 ++
> >   gcc/config/arm/arm-mve-builtins-shapes.h  |4 +
> >   gcc/config/arm/arm-mve-builtins.cc|   99 +-
> >   gcc/config/arm/arm-mve-builtins.h |5 +
> >   gcc/config/arm/arm_mve.h  | 3096 ++---
> >   gcc/config/arm/arm_mve_builtins.def   |  122 -
> >   gcc/config/arm/iterators.md   |   63 +-
> >   gcc/config/arm/mve.md | 2150 ++--
> >   gcc/config/arm/unspecs.md |   78 +-
> >   .../mve/intrinsics/vldrdq_gather_base_s64.c   |4 +-
> >   .../mve/intrinsics/vldrdq_gather_base_u64.c   |4 +-
> >   .../intrinsics/vldrdq_gather_base_wb_s64.c|4 +-
> >   .../intrinsics/vldrdq_gather_base_wb_u64.c|4 +-
> >   17 files changed, 1348 insertions(+), 5005 deletions(-)
> >
>

[PATCH] c++, gimplify: Clear zero padding in empty types [PR118002]

2024-12-12 Thread Jakub Jelinek

Hi!

I believe we need to clear padding bits even in empty types when using
zero initialization,
https://eel.is/c++draft/dcl.init.general#6.2
doesn't have an exception for empty types.
I came to this when playing with an optimization for PR116416 to improve
tree-ssa/pr78687.C testcase back.

Initially I had in the patch also
--- gcc/cp/cp-gimplify.cc.jj2024-12-11 12:46:32.958466985 +0100
+++ gcc/cp/cp-gimplify.cc   2024-12-11 16:23:11.598860505 +0100
@@ -674,7 +674,10 @@ cp_gimplify_expr (tree *expr_p, gimple_s
  TREE_OPERAND (*expr_p, 1) = build1 (VIEW_CONVERT_EXPR,
  TREE_TYPE (op0), op1);

-   else if (simple_empty_class_p (TREE_TYPE (op0), op1, code))
+   else if (simple_empty_class_p (TREE_TYPE (op0), op1, code)
+&& (TREE_CODE (*expr_p) != INIT_EXPR
+|| TREE_CODE (op1) != CONSTRUCTOR
+|| !CONSTRUCTOR_ZERO_PADDING_BITS (op1)))
  {
while (TREE_CODE (op1) == TARGET_EXPR)
  /* We're disconnecting the initializer from its target,
hunk but that regressed the g++.dg/init/empty1.C testcase, where
the empty bases are overlaid with other data members and the test
wants to ensure that the non-static data members aren't overwritten
when initializing the base padding.
On the other side, with this patch and the cp-gimplify.cc hunk plus
the optimization I'm going to post we change
-  D.10177 = {};
+  D.10177._storage.D.9582.D.9163._tail.D.9221._tail.D.9280._head = {};
in the gimple dump (option_2 is zero initialized there), while with
just this patch and the optimization and no cp-gimplify.cc hunk
  D.10177 = {};
is simply removed altogether and no clearing done.
So, I'm not 100% sure if what the patch below does is 100% safe not to
overwrite the overlaid stuff, but at least testsuite doesn't reveal
anything further, and on the other side clears padding in everything it
should.

Earlier version of this patch (with the cp-gimplify.cc hunk and
without the TYPE_SIZE/integer_zerop subconditions) has been bootstrapped
and regtested on x86_64-linux and i686-linux, this version just tested
on the set of tests which regressed.

2024-12-12  Jakub Jelinek  

PR c++/118002
gcc/
* gimplify.cc (gimplify_init_constructor, gimplify_modify_expr):
Don't optimize away INIT_EXPRs of empty classes with rhs CONSTRUCTOR
with CONSTRUCTOR_ZERO_PADDING_BITS.
gcc/testsuite/
* g++.dg/cpp0x/zero-init2.C: New test.

--- gcc/gimplify.cc.jj  2024-12-07 11:35:49.475439705 +0100
+++ gcc/gimplify.cc 2024-12-12 09:38:03.865543272 +0100
@@ -6094,8 +6094,15 @@ gimplify_init_constructor (tree *expr_p,
  not emitted an assignment, do so now.   */
   if (*expr_p
   /* If the type is an empty type, we don't need to emit the
-assignment. */
-  && !is_empty_type (TREE_TYPE (TREE_OPERAND (*expr_p, 0
+assignment.  Except when rhs is a CONSTRUCTOR with
+CONSTRUCTOR_ZERO_PADDING_BITS.  */
+  && (!is_empty_type (TREE_TYPE (TREE_OPERAND (*expr_p, 0)))
+ || (is_init_expr
+ && TREE_CODE (TREE_OPERAND (*expr_p, 1)) == CONSTRUCTOR
+ && CONSTRUCTOR_ZERO_PADDING_BITS (TREE_OPERAND (*expr_p, 1))
+ && TYPE_SIZE (TREE_TYPE (TREE_OPERAND (*expr_p, 0)))
+ && !integer_zerop (TYPE_SIZE (TREE_TYPE (TREE_OPERAND (*expr_p,
+0)))
 {
   tree lhs = TREE_OPERAND (*expr_p, 0);
   tree rhs = TREE_OPERAND (*expr_p, 1);
@@ -6685,7 +6692,14 @@ gimplify_modify_expr (tree *expr_p, gimp
   /* Don't do this for calls that return addressable types, expand_call
 relies on those having a lhs.  */
   && !(TREE_ADDRESSABLE (TREE_TYPE (*from_p))
-  && TREE_CODE (*from_p) == CALL_EXPR))
+  && TREE_CODE (*from_p) == CALL_EXPR)
+  /* And similarly don't do that for rhs being CONSTRUCTOR with
+CONSTRUCTOR_ZERO_PADDING_BITS set.  */
+  && !(TREE_CODE (*expr_p) == INIT_EXPR
+  && TREE_CODE (*to_p) == CONSTRUCTOR
+  && CONSTRUCTOR_ZERO_PADDING_BITS (*to_p)
+  && TYPE_SIZE (TREE_TYPE (*from_p))
+  && !integer_zerop (TYPE_SIZE (TREE_TYPE (*from_p)
 {
   gimplify_stmt (from_p, pre_p);
   gimplify_stmt (to_p, pre_p);
--- gcc/testsuite/g++.dg/cpp0x/zero-init2.C.jj  2024-12-11 16:50:26.513845473 
+0100
+++ gcc/testsuite/g++.dg/cpp0x/zero-init2.C 2024-12-11 16:50:45.879572789 
+0100
@@ -0,0 +1,37 @@
+// PR c++/118002
+// { dg-do run { target c++11 } }
+// { dg-options "-O0" }
+
+struct S {};
+struct T { S a, b, c, d, e, f, g, h; };
+struct U { T i, j, k, l, m, n, o, p; };
+
+[[gnu::noipa]] void
+foo (struct U *)
+{
+}
+
+[[gnu::noipa]] void
+bar ()
+{
+  U u[4];
+  __builtin_memset (&u, -1, sizeof (U) * 4);
+  foo (&u[0]);
+}
+
+[[gnu::noipa]] void
+baz ()
+{
+  U u = U ();
+  foo (&u);
+  for (int i = 0; i < sizeof (U); ++i)

[COMMITTED 03/30] ada: Add SIGPROT handler for CheriBSD

2024-12-12 Thread Marc Poulhiès

From: Daniel King 

gcc/ada/ChangeLog:

* init.c (__gnat_error_handler): Handle SIGPROT
(__gnat_install_handler): Install SIGPROT handler

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/init.c | 79 ++
 1 file changed, 79 insertions(+)

diff --git a/gcc/ada/init.c b/gcc/ada/init.c
index 8019c09f80e..d0ee6553981 100644
--- a/gcc/ada/init.c
+++ b/gcc/ada/init.c
@@ -88,6 +88,14 @@ extern struct Exception_Data numeric_error;
 extern struct Exception_Data program_error;
 extern struct Exception_Data storage_error;
 
+/* Exception IDs for CHERI Ada exceptions (see Interfaces.CHERI.Exceptions) */
+#ifdef __CHERI__
+extern Exception_Id capability_bound_error_id;
+extern Exception_Id capability_permission_error_id;
+extern Exception_Id capability_sealed_error_id;
+extern Exception_Id capability_tag_error_id;
+#endif
+
 /* For the Cert run time we use the regular raise exception routine because
__gnat_raise_from_signal_handler is not available.  */
 #ifdef CERT
@@ -1678,10 +1686,16 @@ __gnat_is_vms_v7 (void)
 #include 
 #include 
 
+#ifdef __CHERI__
 static void
+__gnat_error_handler (int sig,
+ siginfo_t *si,
+ void *ucontext ATTRIBUTE_UNUSED)
+#else
 __gnat_error_handler (int sig,
  siginfo_t *si ATTRIBUTE_UNUSED,
  void *ucontext ATTRIBUTE_UNUSED)
+#endif /* __CHERI__ */
 {
   struct Exception_Data *exception;
   const char *msg;
@@ -1708,6 +1722,67 @@ __gnat_error_handler (int sig,
   msg = "SIGBUS: possible stack overflow";
   break;
 
+#ifdef __CHERI__
+case SIGPROT:
+  switch (si->si_code)
+{
+case PROT_CHERI_TAG:
+  exception = capability_tag_error_id;
+  msg = "Capability tag fault";
+  break;
+
+case PROT_CHERI_SEALED:
+  exception = capability_sealed_error_id;
+  msg = "Capability sealed fault";
+  break;
+
+case PROT_CHERI_UNALIGNED_BASE:
+  exception = &storage_error;
+  msg = "SIGPROT: unaligned base address";
+  break;
+
+case PROT_CHERI_BOUNDS:
+  exception = capability_bound_error_id;
+  msg = "Capability bounds fault";
+  break;
+
+case PROT_CHERI_IMPRECISE:
+  exception = capability_bound_error_id;
+  msg = "Imprecise capability bounds fault";
+  break;
+
+case PROT_CHERI_TYPE:
+  exception = capability_permission_error_id;
+  msg = "Capability type mismatch fault";
+  break;
+
+case PROT_CHERI_PERM:
+  exception = capability_permission_error_id;
+  msg = "Capability permission fault";
+  break;
+
+case PROT_CHERI_STORELOCAL:
+  exception = capability_permission_error_id;
+  msg = "Capability store-local fault";
+  break;
+
+case PROT_CHERI_CINVOKE:
+  exception = capability_permission_error_id;
+  msg = "CInvoke fault";
+  break;
+
+case PROT_CHERI_SYSREG:
+  exception = capability_permission_error_id;
+  msg = "Capability system register fault";
+  break;
+
+default:
+  exception = &program_error;
+  msg = "SIGPROT: unhandled signal code";
+}
+  break;
+#endif /* __CHERI__ */
+
 default:
   exception = &program_error;
   msg = "unhandled signal";
@@ -1735,6 +1810,10 @@ __gnat_install_handler (void)
   (void) sigaction (SIGSEGV, &act, NULL);
   (void) sigaction (SIGBUS,  &act, NULL);
 
+#ifdef __CHERI__
+  (void) sigaction (SIGPROT, &act, NULL);
+#endif
+
   __gnat_handler_installed = 1;
 }
 
-- 
2.43.0

[COMMITTED 08/30] ada: Update documentation for External_Initialization

2024-12-12 Thread Marc Poulhiès

From: Ronan Desplanques 

This fixes an omission in the recent change that was made to file lookup
for External_Initialization.

gcc/ada/ChangeLog:

* doc/gnat_rm/gnat_language_extensions.rst: Update
External_Initialization section.
* gnat_rm.texi: Regenerate.
* gnat_ugn.texi: Regenerate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/doc/gnat_rm/gnat_language_extensions.rst | 2 +-
 gcc/ada/gnat_rm.texi | 4 ++--
 gcc/ada/gnat_ugn.texi| 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/ada/doc/gnat_rm/gnat_language_extensions.rst 
b/gcc/ada/doc/gnat_rm/gnat_language_extensions.rst
index 9b3de825aca..32fa6fb8e8b 100644
--- a/gcc/ada/doc/gnat_rm/gnat_language_extensions.rst
+++ b/gcc/ada/doc/gnat_rm/gnat_language_extensions.rst
@@ -1720,6 +1720,6 @@ Example:
 
 - mandatory ``Path``: the path the compiler uses to access the binary resource.
 
-``Path`` is resolved according to the same rules the compiler uses for loading 
the source files.
+If ``Path`` is a relative path, it is interpreted relatively to the directory 
of the file that contains the aspect specification.
 
 .. attention:: The maximum size of loaded files is limited to 2\ :sup:`31` 
bytes.
diff --git a/gcc/ada/gnat_rm.texi b/gcc/ada/gnat_rm.texi
index 849404c8e87..ee22978b27c 100644
--- a/gcc/ada/gnat_rm.texi
+++ b/gcc/ada/gnat_rm.texi
@@ -19,7 +19,7 @@
 
 @copying
 @quotation
-GNAT Reference Manual , Nov 18, 2024
+GNAT Reference Manual , Dec 12, 2024
 
 AdaCore
 
@@ -30979,7 +30979,7 @@ end P;
 mandatory @code{Path}: the path the compiler uses to access the binary 
resource.
 @end itemize
 
-@code{Path} is resolved according to the same rules the compiler uses for 
loading the source files.
+If @code{Path} is a relative path, it is interpreted relatively to the 
directory of the file that contains the aspect specification.
 
 @cartouche
 @quotation Attention 
diff --git a/gcc/ada/gnat_ugn.texi b/gcc/ada/gnat_ugn.texi
index a03dc7cae4b..c856dddba90 100644
--- a/gcc/ada/gnat_ugn.texi
+++ b/gcc/ada/gnat_ugn.texi
@@ -19,7 +19,7 @@
 
 @copying
 @quotation
-GNAT User's Guide for Native Platforms , Nov 26, 2024
+GNAT User's Guide for Native Platforms , Dec 12, 2024
 
 AdaCore
 
@@ -29839,8 +29839,8 @@ to permit their use in free software.
 
 @printindex ge
 
-@anchor{gnat_ugn/gnat_utility_programs switches-related-to-project-files}@w{   
   }
 @anchor{d2}@w{  }
+@anchor{gnat_ugn/gnat_utility_programs switches-related-to-project-files}@w{   
   }
 
 @c %**end of body
 @bye
-- 
2.43.0

[COMMITTED 01/30] ada: Ensure minimum stack size for preallocated task stacks

2024-12-12 Thread Marc Poulhiès

From: Johannes Kliemann 

On targets with preallocated task stacks the minimum stack size is
defined as a constant in System.Parameters. When adding preallocated
tasks to the expanded code the compiler does not have direct access to
that value. Instead generate the expression
Max (Task_Size, Minimum_Task_Size) in the expanded tree and let it be
resolved later in the compilation process.

gcc/ada/ChangeLog:

* exp_ch9.adb (Expand_N_Task_Type_Declaration): Take
Minimum_Stack_Size into account when preallocating task stacks.
* rtsfind.ads (RE_Id, RE_Unit_Table): Add RE_Minimum_Stack_Size.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch9.adb | 29 +
 gcc/ada/rtsfind.ads |  2 ++
 2 files changed, 31 insertions(+)

diff --git a/gcc/ada/exp_ch9.adb b/gcc/ada/exp_ch9.adb
index cf4d4d82256..df51856f9c9 100644
--- a/gcc/ada/exp_ch9.adb
+++ b/gcc/ada/exp_ch9.adb
@@ -11945,6 +11945,35 @@ package body Exp_Ch9 is
else
   Task_Size := New_Copy_Tree (Expr_N);
end if;
+
+   --  On targets with a preallocated task stack the minimum stack
+   --  size is defined in System.Parameters. Since we do not have
+   --  access to the value of that definition here we replace the
+   --  static task size with the static expression
+   --  Size_Type'Max (Task_Size, Minimum_Stack_Size).
+   --  The compiler will evaluate this expression and replace the
+   --  task size with the Minimum_Stack_Size if needed. It is
+   --  important for this expression to be static to avoid
+   --  introducing implicit heap allocations that would break code
+   --  with the No_Implicit_Heap_Allocations restriction.
+   --  On some runtimes the allocation of the minimum stack size is
+   --  ensured by a call to Adjust_Storage_Size. We cannot use this
+   --  function here as it is not static and evaluated at runtime.
+   --  Note: This expression may not appear in the expanded code
+   --  as the compiler evaluates this expression before code
+   --  generation.
+
+   Task_Size :=
+ Convert_To
+   (RTE (RE_Storage_Offset),
+Make_Attribute_Reference (Loc,
+  Attribute_Name => Name_Max,
+  Prefix =>
+New_Occurrence_Of
+  (RTE (RE_Size_Type), Loc), Expressions => New_List (
+ Convert_To (RTE (RE_Size_Type), Task_Size),
+ New_Occurrence_Of (RTE (RE_Minimum_Stack_Size),
+   Loc;
 end;
 
  else
diff --git a/gcc/ada/rtsfind.ads b/gcc/ada/rtsfind.ads
index 9cfd2ed4c48..16c817dc37e 100644
--- a/gcc/ada/rtsfind.ads
+++ b/gcc/ada/rtsfind.ads
@@ -1618,6 +1618,7 @@ package Rtsfind is
 
  RE_Adjust_Storage_Size, -- System.Parameters
  RE_Default_Stack_Size,  -- System.Parameters
+ RE_Minimum_Stack_Size,  -- System.Parameters
  RE_Size_Type,   -- System.Parameters
  RE_Unspecified_Size,-- System.Parameters
 
@@ -3274,6 +3275,7 @@ package Rtsfind is
 
  RE_Adjust_Storage_Size  => System_Parameters,
  RE_Default_Stack_Size   => System_Parameters,
+ RE_Minimum_Stack_Size   => System_Parameters,
  RE_Size_Type=> System_Parameters,
  RE_Unspecified_Size => System_Parameters,
 
-- 
2.43.0

[COMMITTED 16/30] ada: Minor refactoring in expansion of array aggregates

2024-12-12 Thread Marc Poulhiès

From: Eric Botcazou 

This just moves a couple of checks done in conjunction with the predicate
Aggr_Assignment_OK_For_Backend into its body and adds a couple of comments.

No functional changes.

gcc/ada/ChangeLog:

* exp_aggr.adb (Aggr_Assignment_OK_For_Backend): Add Target formal
parameter and check that it is not a bit-aligned component or slice.
Return False in CodePeer mode as well.
(Build_Array_Aggr_Code): Remove redundant tests done in conjunction
with a call to Aggr_Assignment_OK_For_Backend.
(Expand_Array_Aggregate): Likewise.  Add a couple of comments and
improve formatting.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_aggr.adb | 104 +--
 1 file changed, 60 insertions(+), 44 deletions(-)

diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index d0ccaa4f3ff..37c21ac5762 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -250,8 +250,12 @@ package body Exp_Aggr is
-- Local Subprograms for Array Aggregate Expansion --
-
 
-   function Aggr_Assignment_OK_For_Backend (N : Node_Id) return Boolean;
-   --  Returns true if an aggregate assignment can be done by the back end
+   function Aggr_Assignment_OK_For_Backend
+ (N  : Node_Id;
+  Target : Node_Id := Empty) return Boolean;
+   --  Returns true if assignment of aggregate N can be done by the back end.
+   --  If Target is present, it is the left-hand side of the assignment; if it
+   --  is not, the assignment is the initialization of an object or allocator.
 
function Aggr_Size_OK (N : Node_Id) return Boolean;
--  Very large static aggregates present problems to the back-end, and are
@@ -371,8 +375,10 @@ package body Exp_Aggr is
--  The ultimate goal is to generate a call to a fast memset routine
--  specifically optimized for the target.
 
-   function Aggr_Assignment_OK_For_Backend (N : Node_Id) return Boolean is
-
+   function Aggr_Assignment_OK_For_Backend
+ (N  : Node_Id;
+  Target : Node_Id := Empty) return Boolean
+   is
   function Is_OK_Aggregate (Aggr : Node_Id) return Boolean;
   --  Return true if Aggr is suitable for back-end assignment
 
@@ -422,12 +428,27 @@ package body Exp_Aggr is
--  Start of processing for Aggr_Assignment_OK_For_Backend
 
begin
+  --  CodePeer does not support this
+
+  if CodePeer_Mode then
+ return False;
+  end if;
+
   --  Back end doesn't know about <>
 
   if Has_Default_Init_Comps (N) then
  return False;
   end if;
 
+  --  Assignments to bit-aligned components or slices are not OK
+
+  if Present (Target)
+and then (Possible_Bit_Aligned_Component (Target)
+   or else Is_Possibly_Unaligned_Slice (Target))
+  then
+ return False;
+  end if;
+
   --  Recurse as far as possible to find the innermost component type
 
   Ctyp := Etype (N);
@@ -1922,10 +1943,7 @@ package body Exp_Aggr is
   --  into an assignment statement.
 
   if Present (Etype (N))
-and then Aggr_Assignment_OK_For_Backend (N)
-and then not Possible_Bit_Aligned_Component (Into)
-and then not Is_Possibly_Unaligned_Slice (Into)
-and then not CodePeer_Mode
+and then Aggr_Assignment_OK_For_Backend (N, Into)
   then
  declare
 New_Aggr : constant Node_Id := Relocate_Node (N);
@@ -6132,52 +6150,50 @@ package body Exp_Aggr is
  or else (Parent_Kind in N_Aggregate | N_Extension_Aggregate
and then not Is_Container_Aggregate (Parent_Node))
 
- --  Allocator (see Convert_Aggr_In_Allocator)
+ --  Allocator (see Convert_Aggr_In_Allocator). Bit-packed array types
+ --  need specific processing and sliding cannot be done in place for
+ --  the time being.
 
  or else (Nkind (Parent_Node) = N_Allocator
-   and then (Aggr_Assignment_OK_For_Backend (N)
-  or else Is_Limited_Type (Typ)
-  or else Needs_Finalization (Typ)
-  or else (not Is_Bit_Packed_Array (Typ)
-and then not
-  Must_Slide
-(N,
- Designated_Type
-   (Etype (Parent_Node)),
- Typ
-
- --  Object declaration (see Convert_Aggr_In_Object_Decl)
+   and then
+ (Aggr_Assignment_OK_For_Backend (N)
+   or else Is_Limited_Type (Typ)
+   or else Needs_Finalization (Typ)
+   or else (not Is_Bit_Packed_Array (Typ)
+ and then not
+

[COMMITTED 30/30] ada: Fix reference to Ada 2020 in comment

2024-12-12 Thread Marc Poulhiès

From: Piotr Trojanek 

Code cleanup.

gcc/ada/ChangeLog:

* par-ch5.adb (Test_Statement_Required): Fix comment.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/par-ch5.adb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/ada/par-ch5.adb b/gcc/ada/par-ch5.adb
index 34c845f838c..6e6690395f1 100644
--- a/gcc/ada/par-ch5.adb
+++ b/gcc/ada/par-ch5.adb
@@ -204,7 +204,7 @@ package body Ch5 is
null;
 
 --  If not Ada 2012, or not special case above, and no declaration
---  seen (as allowed in Ada 2020), give error message.
+--  seen (as allowed in Ada 2022), give error message.
 
 elsif No (Decl_Loc) then
Error_Msg_BC -- CODEFIX
-- 
2.43.0

[COMMITTED 19/30] ada: Fix reference manual clauses

2024-12-12 Thread Marc Poulhiès

From: Ronan Desplanques 

The clauses in section 3.5 of the reference manual were moved around
along the different Ada versions, which caused some comments in our
source code to go out of date. This patch updates the references in
those comments.

gcc/ada/ChangeLog:

* libgnat/a-tifiio.adb: Fix comment.
* libgnat/a-tifiio__128.adb: Likewise.
* libgnat/s-imaged.ads (Image_Decimal): Likewise.
* libgnat/s-imagef.ads (Image_Fixed): Likewise.
* libgnat/s-imager.ads (Image_Fixed_Point): Likewise.
* libgnat/s-imde32.ads (Image_Decimal32): Likewise.
* libgnat/s-imfi64.ads (Image_Fixed64): Likewise.
* libgnat/s-imgcha.adb (Image_Character): Likewise.
* libgnat/s-valuer.adb (Scan_Raw_Real): Likewise.
* sem_attr.adb (Eval_Attribute): Likewise.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/a-tifiio.adb  | 2 +-
 gcc/ada/libgnat/a-tifiio__128.adb | 2 +-
 gcc/ada/libgnat/s-imaged.ads  | 2 +-
 gcc/ada/libgnat/s-imagef.ads  | 2 +-
 gcc/ada/libgnat/s-imager.ads  | 8 
 gcc/ada/libgnat/s-imde32.ads  | 2 +-
 gcc/ada/libgnat/s-imfi64.ads  | 2 +-
 gcc/ada/libgnat/s-imgcha.adb  | 5 +++--
 gcc/ada/libgnat/s-valuer.adb  | 2 +-
 gcc/ada/sem_attr.adb  | 5 +++--
 10 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/gcc/ada/libgnat/a-tifiio.adb b/gcc/ada/libgnat/a-tifiio.adb
index c44b2ba36f7..7358d123313 100644
--- a/gcc/ada/libgnat/a-tifiio.adb
+++ b/gcc/ada/libgnat/a-tifiio.adb
@@ -69,7 +69,7 @@
 --  Operations
 --  --
 
---  [Wide_[Wide_]]Image attribute (see RM 3.5(27.1/2))
+--  [Wide_[Wide_]]Image attribute (see RM 4.10(30))
 
 --  These attributes return a decimal real literal best approximating
 --  the value (rounded away from zero if halfway between) with a
diff --git a/gcc/ada/libgnat/a-tifiio__128.adb 
b/gcc/ada/libgnat/a-tifiio__128.adb
index 51b4b219ff7..59ce81cc706 100644
--- a/gcc/ada/libgnat/a-tifiio__128.adb
+++ b/gcc/ada/libgnat/a-tifiio__128.adb
@@ -69,7 +69,7 @@
 --  Operations
 --  --
 
---  [Wide_[Wide_]]Image attribute (see RM 3.5(27.1/2))
+--  [Wide_[Wide_]]Image attribute (see RM 4.10(30))
 
 --  These attributes return a decimal real literal best approximating
 --  the value (rounded away from zero if halfway between) with a
diff --git a/gcc/ada/libgnat/s-imaged.ads b/gcc/ada/libgnat/s-imaged.ads
index c9017d11e31..d9debde3c27 100644
--- a/gcc/ada/libgnat/s-imaged.ads
+++ b/gcc/ada/libgnat/s-imaged.ads
@@ -48,7 +48,7 @@ package System.Image_D is
--  Computes fixed_type'Image (V), where V is the integer value (in units of
--  delta) of a decimal type whose Scale is as given and stores the result
--  S (1 .. P), updating P on return. The result is computed according to
-   --  the rules for image for fixed-point types (RM 3.5(34)). The caller
+   --  the rules for image for fixed-point types (RM 4.10(14)). The caller
--  guarantees that S is long enough to hold the result and has a lower
--  bound of 1.
 
diff --git a/gcc/ada/libgnat/s-imagef.ads b/gcc/ada/libgnat/s-imagef.ads
index bcb3aee15fa..277559dcb9d 100644
--- a/gcc/ada/libgnat/s-imagef.ads
+++ b/gcc/ada/libgnat/s-imagef.ads
@@ -56,7 +56,7 @@ package System.Image_F is
--  Computes fixed_type'Image (V), where V is the integer value (in units of
--  small) of an ordinary fixed point type with small Num/Den, and stores
--  the result in S (1 .. P), updating P on return. The result is computed
-   --  according to the rules for image for fixed-point types (RM 3.5(34)).
+   --  according to the rules for image for fixed-point types (RM 4.10(14)).
--  For0 and Aft0 are the values of the Fore and Aft attributes for the
--  fixed point type whose mantissa type is Int and whose small is Num/Den.
--  This function is used only for fixed point whose Small is the ratio of
diff --git a/gcc/ada/libgnat/s-imager.ads b/gcc/ada/libgnat/s-imager.ads
index 700582bfda5..826815588ad 100644
--- a/gcc/ada/libgnat/s-imager.ads
+++ b/gcc/ada/libgnat/s-imager.ads
@@ -56,7 +56,7 @@ package System.Image_R is
   Aft : Natural);
--  Computes fixed_type'Image (V) and returns the result in S (1 .. P)
--  updating P on return. The result is computed according to the rules for
-   --  image for fixed-point types (RM 3.5(34)), where Aft is the value of the
+   --  image for fixed-point types (RM 4.10(14)), where Aft is the value of the
--  Aft attribute for the fixed-point type. The caller guarantees that S is
--  long enough to hold the result and has a lower bound of 1.
--
@@ -69,9 +69,9 @@ package System.Image_R is
   Digs : Natural);
--  Computes Uns'Image (V) and returns the result in S (1 .. P) updating P
--  on return. The result is computed according to the rules for image for
-   --  floating-point types (RM 3.5(33)), where Digs is the value of the Digits
-   --  attribute for the f

[COMMITTED 29/30] ada: Elide the copy for bit-packed aggregates in object declarations

2024-12-12 Thread Marc Poulhiès

From: Eric Botcazou 

The in-place expansion has been historically disabled for them, but there
does not seem to be any good reason left for this.  However, this requires
a small trick in order for the expanded code not to be flagged as using the
object uninitialized by the code generator.

gcc/ada/ChangeLog:

* exp_aggr.adb (Convert_Aggr_In_Object_Decl): Clear the component
referenced on the right-hand side of the first assignment generated
for a bit-packed array, if any.
(Expand_Array_Aggregate): Do not exclude aggregates of bit-packed
array types in object declarations from in-place expansion.
* sem_eval.adb (Eval_Indexed_Component): Do not attempt a constant
evaluation for a bit-packed array type.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_aggr.adb | 36 +---
 gcc/ada/sem_eval.adb |  8 ++--
 2 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index 364af228359..c01011cc1fb 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -3811,6 +3811,31 @@ package body Exp_Aggr is
  end loop;
   end if;
 
+  --  If Typ is a bit-packed array and the first statement generated for
+  --  the aggregate initialization is an assignment of the form:
+
+  --Obj (j) := (Obj (j) [and Mask]) or Val
+
+  --  then we initialize Obj (j) right before the assignment, in order to
+  --  avoid a spurious warning about Obj being used uninitialized.
+
+  if Is_Bit_Packed_Array (Typ) then
+ Stmt := Next (N);
+
+ if Stmt /= Marker
+   and then Nkind (Stmt) = N_Assignment_Statement
+   and then Nkind (Expression (Stmt)) in N_Op_And | N_Op_Or
+   and then Nkind (Name (Stmt)) = N_Indexed_Component
+   and then Is_Entity_Name (Prefix (Name (Stmt)))
+   and then Entity (Prefix (Name (Stmt))) = Obj
+ then
+Insert_Action (Stmt,
+  Make_Assignment_Statement (Loc,
+Name   => New_Copy_Tree (Name (Stmt)),
+Expression => Make_Integer_Literal (Loc, Uint_0)));
+ end if;
+  end if;
+
   --  After expansion the expression can be removed from the declaration
   --  except if the object is class-wide, in which case the aggregate
   --  provides the actual type.
@@ -6163,9 +6188,8 @@ package body Exp_Aggr is
   Designated_Type (Etype (Parent_Node)),
   Typ)))
 
- --  Object declaration (see Convert_Aggr_In_Object_Decl). Bit-packed
- --  array types need specific processing and sliding cannot be done
- --  in place for the time being.
+ --  Object declaration (see Convert_Aggr_In_Object_Decl). Sliding
+ --  cannot be done in place for the time being.
 
  or else (Parent_Kind = N_Object_Declaration
and then
@@ -6174,13 +6198,11 @@ package body Exp_Aggr is
or else Needs_Finalization (Typ)
or else Is_Special_Return_Object
  (Defining_Identifier (Parent_Node))
-   or else (not Is_Bit_Packed_Array (Typ)
- and then not
-   Must_Slide
+   or else not Must_Slide
  (N,
   Etype
 (Defining_Identifier (Parent_Node)),
-  Typ
+  Typ)))
 
  --  Safe assignment (see Convert_Aggr_In_Assignment). So far only the
  --  assignments in init procs are taken into account, as well those
diff --git a/gcc/ada/sem_eval.adb b/gcc/ada/sem_eval.adb
index f0f83d29c38..c55e4d3bb24 100644
--- a/gcc/ada/sem_eval.adb
+++ b/gcc/ada/sem_eval.adb
@@ -2696,9 +2696,13 @@ package body Sem_Eval is
 
 --  If we have an array type (we should have but perhaps there are
 --  error cases where this is not the case), then see if we can do
---  a constant evaluation of the array reference.
+--  a constant evaluation of the array reference, although specific
+--  processing would be required if the array type is bit-packed.
 
-if Is_Array_Type (Atyp) and then Atyp /= Any_Composite then
+if Is_Array_Type (Atyp)
+  and then not Is_Bit_Packed_Array (Atyp)
+  and then Atyp /= Any_Composite
+then
if Ekind (Atyp) = E_String_Literal_Subtype then
   Lbd := String_Literal_Low_Bound (Atyp);
else
-- 
2.43.0

[COMMITTED 23/30] ada: Fix internal error on loop parameter specifications

2024-12-12 Thread Marc Poulhiès

From: Piotr Trojanek 

Originally loop parameter specification only occurred in loops, but now
it also occurs in quantified expressions. This patch guards against
flagging non-loop nodes as null loop statements. This was causing
internal compiler errors that were only visible with switch -gnatdk,
which happens to be default in GNATprove testsuite.

gcc/ada/ChangeLog:

* sem_ch5.adb (Analyze_Loop_Parameter_Specification): Only set
flag Is_Null_Loop when loop parameter specification comes from
a loop and not from a quantified expression.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch5.adb | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcc/ada/sem_ch5.adb b/gcc/ada/sem_ch5.adb
index 131195a78c7..4c82e274ddb 100644
--- a/gcc/ada/sem_ch5.adb
+++ b/gcc/ada/sem_ch5.adb
@@ -3303,7 +3303,10 @@ package body Sem_Ch5 is
   --  set the appropriate flag to remove the loop entirely
   --  during expansion.
 
-  Set_Is_Null_Loop (Loop_Nod);
+  if Nkind (Loop_Nod) = N_Loop_Statement then
+ Set_Is_Null_Loop (Loop_Nod);
+  end if;
+
   Null_Range := True;
end if;
 
@@ -3339,7 +3342,9 @@ package body Sem_Ch5 is
--  since it is likely that these warnings will be inappropriate
--  if the loop never actually executes, which is likely.
 
-   Set_Suppress_Loop_Warnings (Loop_Nod);
+   if Nkind (Loop_Nod) = N_Loop_Statement then
+  Set_Suppress_Loop_Warnings (Loop_Nod);
+   end if;
 
--  The other case for a warning is a reverse loop where the
--  upper bound is the integer literal zero or one, and the
@@ -3441,12 +3446,13 @@ package body Sem_Ch5 is
   Subtype_Mark (DS));
  end if;
 
- Set_Is_Null_Loop (Loop_Nod);
- Null_Range := True;
+ if Nkind (Loop_Nod) = N_Loop_Statement then
+Set_Is_Null_Loop (Loop_Nod);
 
- --  Suppress other warnings about the body of the loop, as
- --  it will never execute.
- Set_Suppress_Loop_Warnings (Loop_Nod);
+--  Suppress other warnings about the body of the loop,
+--  as it will never execute.
+Set_Suppress_Loop_Warnings (Loop_Nod);
+ end if;
   end if;
end;
 end if;
-- 
2.43.0

[COMMITTED 24/30] ada: Refactor warning about null loops

2024-12-12 Thread Marc Poulhiès

From: Piotr Trojanek 

Code cleanup; semantics is unaffected.

gcc/ada/ChangeLog:

* sem_ch5.adb (Analyze_Loop_Parameter_Specification): Move call
to Comes_From_Source to the outer if-statement.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch5.adb | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/gcc/ada/sem_ch5.adb b/gcc/ada/sem_ch5.adb
index 4c82e274ddb..2a5c8dcdbe5 100644
--- a/gcc/ada/sem_ch5.adb
+++ b/gcc/ada/sem_ch5.adb
@@ -3314,24 +3314,25 @@ package body Sem_Ch5 is
--  instance, since in practice they tend to be dubious in these
--  cases since they can result from intended parameterization.
 
-   if not Inside_A_Generic and then not In_Instance then
+   if Comes_From_Source (N)
+ and then not Inside_A_Generic
+ and then not In_Instance
+   then
 
   --  Specialize msg if invalid values could make the loop
   --  non-null after all.
 
   if Null_Range then
- if Comes_From_Source (N) then
-Error_Msg_N
-  ("??loop range is null, loop will not execute", DS);
- end if;
+ Error_Msg_N
+   ("??loop range is null, loop will not execute", DS);
 
   --  Here is where the loop could execute because of
   --  invalid values, so issue appropriate message.
 
-  elsif Comes_From_Source (N) then
+  else
  Error_Msg_N
-   ("??loop range may be null, loop may not execute",
-DS);
+   ("??loop range may be null, loop may not execute", DS);
+
  Error_Msg_N
("??can only execute if invalid values are present",
 DS);
-- 
2.43.0

[COMMITTED 17/30] ada: Refactor code of Check_Ambiguous_Call and Valid_Conversion

2024-12-12 Thread Marc Poulhiès

From: Javier Miranda 

gcc/ada/ChangeLog:

* sem_res.adb (Is_Ambiguous_Operand): Add missing decoration of
the operand when it is labeled overloaded but has just one
interpretation.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_res.adb | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb
index 5ca20613529..cd75508021c 100644
--- a/gcc/ada/sem_res.adb
+++ b/gcc/ada/sem_res.adb
@@ -13785,10 +13785,9 @@ package body Sem_Res is
 
 return True;
  end if;
-
- Set_Etype (Operand, It1.Typ);
   end if;
 
+  Set_Etype (Operand, It1.Typ);
   return False;
end Is_Ambiguous_Operand;
 
-- 
2.43.0

[COMMITTED 25/30] ada: Improve task entry context detection

2024-12-12 Thread Marc Poulhiès

From: Ronan Desplanques 

Access parameters are not allowed in specifications of task entries.
Before this patch, the compiler failed to detect that case in accept
statements that were not directly in their task body's scopes. This
patch fixes this issue.

gcc/ada/ChangeLog:

* sem_ch3.adb (Access_Definition): Remove test for task entry context.
* sem_ch6.adb (Process_Formals): Add improved test for task entry
context.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch3.adb |  7 ---
 gcc/ada/sem_ch6.adb | 10 ++
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
index a5d69c33b15..11f69db21dc 100644
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -761,13 +761,6 @@ package body Sem_Ch3 is
   Enclosing_Prot_Type : Entity_Id := Empty;
 
begin
-  if Is_Entry (Current_Scope)
-and then Is_Task_Type (Etype (Scope (Current_Scope)))
-  then
- Error_Msg_N ("task entries cannot have access parameters", N);
- return Empty;
-  end if;
-
   --  Ada 2005: For an object declaration the corresponding anonymous
   --  type is declared in the current scope.
 
diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb
index 16f296523f4..1ac76bbd0db 100644
--- a/gcc/ada/sem_ch6.adb
+++ b/gcc/ada/sem_ch6.adb
@@ -13144,6 +13144,16 @@ package body Sem_Ch6 is
  --  An access formal type
 
  else
+if Nkind (Parent (T)) = N_Accept_Statement
+  or else (Nkind (Parent (T)) = N_Entry_Declaration
+   and then Nkind (Context) = N_Task_Definition)
+then
+   Error_Msg_N
+ ("task entries cannot have access parameters",
+  Parameter_Type (Param_Spec));
+   return;
+end if;
+
 Formal_Type :=
   Access_Definition (Related_Nod, Parameter_Type (Param_Spec));
 
-- 
2.43.0

[COMMITTED 18/30] ada: Fix pragma Compile_Time_Error for sizes of nonstatic array types

2024-12-12 Thread Marc Poulhiès

From: Eric Botcazou 

The pragma is consistenly rejected for the sizes of nonstatic array types
because Eval_Attribute does not evaluate it even if it is known.

gcc/ada/ChangeLog:

* sem_attr.adb (Eval_Attribute): Treat the various size attributes
like Component_Size for nonstatic array types.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_attr.adb | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb
index 4e06ec54978..2315d515ac4 100644
--- a/gcc/ada/sem_attr.adb
+++ b/gcc/ada/sem_attr.adb
@@ -8824,6 +8824,8 @@ package body Sem_Attr is
   --  unconstrained arrays. Furthermore, it is essential to fold this
   --  in the packed case, since otherwise the value will be incorrect.
   --  Moreover, the exact same reasoning can be applied to Alignment.
+  --  Likewise for the various size attributes, although folding will
+  --  never succeed for them with unconstrained arrays.
 
   elsif Id = Attribute_Atomic_Always_Lock_Free  or else
 Id = Attribute_Definite or else
@@ -8835,7 +8837,12 @@ package body Sem_Attr is
 Id = Attribute_Type_Class   or else
 Id = Attribute_Unconstrained_Array  or else
 Id = Attribute_Component_Size   or else
-Id = Attribute_Alignment
+Id = Attribute_Alignmentor else
+Id = Attribute_Machine_Size or else
+Id = Attribute_Object_Size  or else
+Id = Attribute_Size or else
+Id = Attribute_VADS_Sizeor else
+Id = Attribute_Value_Size
   then
  Static := False;
  Set_Is_Static_Expression (N, False);
-- 
2.43.0

[PATCH] match.pd: Defer some CTZ/CLZ foldings until after ubsan pass for -fsanitize=builtin [PR115127]

2024-12-12 Thread Jakub Jelinek

Hi!

As the following testcase shows, -fsanitize=builtin instruments the
builtins in the ubsan pass which is done shortly after going into
SSA, but if optimizations optimize the builtins away before that,
nothing is instrumented.  Now, I think it is just fine if the
result of the builtins isn't used in any way and we just DCE them,
but in the following optimizations the result is used.
So, the following patch for -fsanitize=builtin only defers the
optimizations that might turn single argument CLZ/CTZ (aka undefined
at zero) until the ubsan pass is done.
Now, we don't have PROP_ubsan and am not sure it is worth adding it,
there is PROP_ssa set by the ssa pass which is 3 passes before
ubsan, but there are only 2 warning passes in between, so PROP_ssa
looked good enough to me.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-12-12  Jakub Jelinek  

PR sanitizer/115127
* match.pd (clz (X) == C, ctz (X) == C, ctz (X) >= C): Don't
optimize if -fsanitize=builtin and not yet in SSA form.

* c-c++-common/ubsan/builtin-2.c: New test.

--- gcc/match.pd.jj 2024-12-06 11:00:27.937579733 +0100
+++ gcc/match.pd2024-12-11 19:55:08.978334222 +0100
@@ -9636,13 +9636,17 @@ (define_operator_list SYNC_FETCH_AND_AND
   cmp (lt ge)
   (simplify
(op (clz:s@2 @0) INTEGER_CST@1)
-   (if (integer_zerop (@1) && single_use (@2))
-/* clz(X) == 0 is (int)X < 0 and clz(X) != 0 is (int)X >= 0.  */
-(with { tree stype = signed_type_for (TREE_TYPE (@0)); }
- (cmp (convert:stype @0) { build_zero_cst (stype); }))
-/* clz(X) == (prec-1) is X == 1 and clz(X) != (prec-1) is X != 1.  */
-(if (wi::to_wide (@1) == TYPE_PRECISION (TREE_TYPE (@0)) - 1)
- (op @0 { build_one_cst (TREE_TYPE (@0)); }))
+   (if (!sanitize_flags_p (SANITIZE_BUILTIN)
+   /* For -fsanitize=builtin give ubsan pass a chance
+  to instrument it first.  */
+   || (cfun && (cfun->curr_properties & PROP_ssa) != 0))
+(if (integer_zerop (@1) && single_use (@2))
+ /* clz(X) == 0 is (int)X < 0 and clz(X) != 0 is (int)X >= 0.  */
+ (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
+  (cmp (convert:stype @0) { build_zero_cst (stype); }))
+ /* clz(X) == (prec-1) is X == 1 and clz(X) != (prec-1) is X != 1.  */
+ (if (wi::to_wide (@1) == TYPE_PRECISION (TREE_TYPE (@0)) - 1)
+  (op @0 { build_one_cst (TREE_TYPE (@0)); })))
 (for op (eq ne)
  cmp (lt ge)
  (simplify
@@ -9682,7 +9686,13 @@ (define_operator_list SYNC_FETCH_AND_AND
(op (ctz:s @0) INTEGER_CST@1)
 (with { bool ok = true;
HOST_WIDE_INT val = 0;
-   if (!tree_fits_shwi_p (@1))
+   if (sanitize_flags_p (SANITIZE_BUILTIN)
+   /* For -fsanitize=builtin give ubsan pass a chance
+  to instrument it first.  */
+   && (!cfun
+   || (cfun->curr_properties & PROP_ssa) == 0))
+ ok = false;
+   else if (!tree_fits_shwi_p (@1))
  ok = false;
else
  {
@@ -9713,8 +9723,15 @@ (define_operator_list SYNC_FETCH_AND_AND
(op (ctz:s @0) INTEGER_CST@1)
 (with { tree type0 = TREE_TYPE (@0);
int prec = TYPE_PRECISION (type0);
+   bool ok = true;
+   if (sanitize_flags_p (SANITIZE_BUILTIN)
+   /* For -fsanitize=builtin give ubsan pass a chance
+  to instrument it first.  */
+   && (!cfun
+   || (cfun->curr_properties & PROP_ssa) == 0))
+ ok = false;
  }
- (if (prec <= MAX_FIXED_MODE_SIZE)
+ (if (ok && prec <= MAX_FIXED_MODE_SIZE)
   (if (tree_int_cst_sgn (@1) < 0 || wi::to_widest (@1) >= prec)
{ constant_boolean_node (op == EQ_EXPR ? false : true, type); }
(op (bit_and @0 { wide_int_to_tree (type0,
@@ -9815,7 +9832,13 @@ (define_operator_list SYNC_FETCH_AND_AND
   else if (TYPE_PRECISION (type0)
== TYPE_PRECISION (long_long_unsigned_type_node))
 cfn = CFN_BUILT_IN_CTZLL;
-} }
+}
+  if (sanitize_flags_p (SANITIZE_BUILTIN)
+  /* For -fsanitize=builtin give ubsan pass a chance
+ to instrument it first.  */
+  && (!cfun
+  || (cfun->curr_properties & PROP_ssa) == 0))
+   cfn = CFN_LAST; }
 (if (cfn == CFN_CTZ)
  (IFN_CTZ (convert:type0 @0))
  (if (cfn == CFN_BUILT_IN_CTZ)
--- gcc/testsuite/c-c++-common/ubsan/builtin-2.c.jj 2024-12-11 
19:49:42.072941749 +0100
+++ gcc/testsuite/c-c++-common/ubsan/builtin-2.c2024-12-11 
19:51:21.503540338 +0100
@@ -0,0 +1,89 @@
+/* PR sanitizer/115127 */
+/* { dg-do run } */
+/* { dg-options "-fsanitize=undefined" } */
+
+#include 
+
+__attribute__((noipa)) int
+f1 (unsigned a)
+{
+  return __builtin_clz (a) == 0;
+}
+
+__attribute__((noipa)) int
+f2 (unsigned long a)
+{
+  return __builtin_clzl (a) != 0;
+}
+
+__a

[COMMITTED] i386: regenerate i386.opt.urls

2024-12-12 Thread Sam James

r15-6128-gfa878dc8c45fa3 missed the regeneration of the URL doc map, so
regenerate it here to make the buildbots happy.

gcc/ChangeLog:

* config/i386/i386.opt.urls: Regenerate.
---
Committed as obvious, as the buildbots kept emailing and thought it better
to get it out of the way.

 gcc/config/i386/i386.opt.urls | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls
index 1ff0c9c2256e..2f8ae60b1d06 100644
--- a/gcc/config/i386/i386.opt.urls
+++ b/gcc/config/i386/i386.opt.urls
@@ -446,7 +446,8 @@ 
UrlSuffix(gcc/x86-Options.html#index-mstack-protector-guard-reg-3)
 mstack-protector-guard-offset=
 UrlSuffix(gcc/x86-Options.html#index-mstack-protector-guard-offset-4)
 
-; skipping UrlSuffix for 'mstack-protector-guard-symbol=' due to finding no 
URLs
+mstack-protector-guard-symbol=
+UrlSuffix(gcc/x86-Options.html#index-mstack-protector-guard-symbol)
 
 mgeneral-regs-only
 UrlSuffix(gcc/x86-Options.html#index-mgeneral-regs-only-2)
-- 
2.47.1

Re: [PATCH] testsuite: arm: Update expected assembler for pr43920-2.c test

2024-12-12 Thread Richard Earnshaw (lists)


On 10/11/2024 10:02, Torbjörn SVENSSON wrote:

Ok for trunk, releases/gcc-12, releases/gcc-13 and releases/gcc-14?

--

In version 6-2017-q1-update of the "GNU Arm Embedded Toolchain" build,
there are 2 pop instructions. In version 7-2018-q2-update, the next
version that still have a binary build available on launchpad, there is
only a single pop instruction.
When I try to build vanilla GCC in the same version range, I always end
up with a single pop instruciton.

Since r12-5301-g04520645038, the generated assembler contains one more
registry move, and it's requested in PR103298 to allow it.


Is that reference correct?  Which comment are you referring to if so? 
That PR is about switch table optimizations.


R.



gcc/testsuite/ChangeLog:

PR testsuite/103298
* gcc.target/arm/pr43920-2.c: Increase allowed text size and
lower number of expected pop instructions.

Signed-off-by: Torbjörn SVENSSON 
---
  gcc/testsuite/gcc.target/arm/pr43920-2.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr43920-2.c 
b/gcc/testsuite/gcc.target/arm/pr43920-2.c
index c367d6bc15d..80cc0b7d260 100644
--- a/gcc/testsuite/gcc.target/arm/pr43920-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr43920-2.c
@@ -27,6 +27,6 @@ int getFileStartAndLength (int fd, int *start_, size_t 
*length_)
return 0;
  }
  
-/* { dg-final { scan-assembler-times "pop" 2 } } */

+/* { dg-final { scan-assembler-times "pop" 1 } } */
  /* { dg-final { scan-assembler-times "beq" 3 } } */
-/* { dg-final { object-size text <= 54 { target { ! arm*-*-uclinuxfdpiceabi } 
} } } */
+/* { dg-final { object-size text <= 56 { target { ! arm*-*-uclinuxfdpiceabi } 
} } } */

[PATCH] docs: Fix [us]abd pattern name.

2024-12-12 Thread Robin Dapp

Hi,

the uabd and sabd optab names in the docs are missing a 3 suffix (for
their three arguments).  This patch adds it.

Should be obvious enough so going push it in some days unless there
are complaints.

Regards
 Robin

gcc/ChangeLog:

* doc/md.texi: Add "3" suffix.
---
 gcc/doc/md.texi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index f0b63a144ad..523ce9bce17 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6208,8 +6208,8 @@ Other shift and rotate instructions, analogous to the
 Vector shift and rotate instructions that take vectors as operand 2
 instead of a scalar type.
 
-@cindex @code{uabd@var{m}} instruction pattern
-@cindex @code{sabd@var{m}} instruction pattern
+@cindex @code{uabd@var{m}3} instruction pattern
+@cindex @code{sabd@var{m}3} instruction pattern
 @item @samp{uabd@var{m}}, @samp{sabd@var{m}}
 Signed and unsigned absolute difference instructions.  These
 instructions find the difference between operands 1 and 2
-- 
2.47.1

[PATCH v2 5/5] RISC-V: Add new constraint R for register even-odd pairs

2024-12-12 Thread Kito Cheng

Although this constraint is not currently used for any instructions, it is very
useful for custom instructions. Additionally, some new standard extensions
(not yet upstream), such as `Zilsd` and `Zclsd`, are potential users of this
constraint. Therefore, I believe there is sufficient justification to add it
now.

gcc/ChangeLog:

* config/riscv/constraints.md (R): New constraint.
* doc/md.texi: Document new constraint `R`.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/constraint-R.c: New.
---
 gcc/config/riscv/constraints.md   |  4 
 gcc/doc/md.texi   |  3 +++
 gcc/testsuite/gcc.target/riscv/constraint-R.c | 23 +++
 3 files changed, 30 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/constraint-R.c

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 2dce9832219..ebb71000d12 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -28,6 +28,10 @@ (define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS 
:
 (define_register_constraint "j" "SIBCALL_REGS"
   "@internal")
 
+(define_register_constraint "R" "GR_REGS"
+  "Even-odd general purpose register pair."
+  "regno % 2 == 0")
+
 ;; Avoid using register t0 for JALR's argument, because for some
 ;; microarchitectures that is a return-address stack hint.
 (define_register_constraint "l" "JALR_REGS"
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 494a9e14c2a..ae5e31438d6 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3667,6 +3667,9 @@ RVC general purpose register (x8-x15).
 RVC floating-point registers (f8-f15), if available, reuse GPR as FPR when use
 zfinx.
 
+@item R
+Even-odd general purpose register pair.
+
 @end table
 
 @item RX---@file{config/rx/constraints.md}
diff --git a/gcc/testsuite/gcc.target/riscv/constraint-R.c 
b/gcc/testsuite/gcc.target/riscv/constraint-R.c
new file mode 100644
index 000..cb13d8a1f38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/constraint-R.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-additional-options "-std=gnu99" } */
+
+void foo(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int 
m0, int m1) {
+/*
+** foo:
+**   ...
+**   addi t1, (a[0246]|s[02468]|t[02]), 1
+**   ...
+*/
+__asm__ volatile("addi t1, %0, 1" : : "R" (a1) : "memory");
+}
+void foo2(int a0, long long a1a2) {
+/*
+** foo2:
+**   ...
+**   addi t1, (a[0246]|s[02468]|t[02]), 1
+**   ...
+*/
+__asm__ volatile("addi t1, %0, 1" : : "R" (a1a2) : "memory");
+}
-- 
2.34.1

Re: [PATCH 1/2] Refactor final_value_replacement_loop [PR90594]

2024-12-12 Thread Feng Xue OS

Updated the patch according to comments. OK for trunk?

Thanks,
Feng
---
gcc/
PR tree-optimization/90594
* tree-scalar-evolution.cc (get_scev_final_value): New function.
(apply_scev_final_value_replacement): Likewise.
(final_value_replacement_loop): Call new functions.
* tree-scalar-evolution.h (get_scev_final_value): New function
declaration.
(apply_scev_final_value_replacement): Likewise.
(scev_const_prop): Remove unused declaration.
---
 gcc/tree-scalar-evolution.cc | 294 +--
 gcc/tree-scalar-evolution.h  |   3 +-
 2 files changed, 179 insertions(+), 118 deletions(-)

diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
index abb2bad7773..3c3719e8e64 100644
--- a/gcc/tree-scalar-evolution.cc
+++ b/gcc/tree-scalar-evolution.cc
@@ -3775,6 +3775,170 @@ analyze_and_compute_bitop_with_inv_effect (class loop* 
loop, tree phidef,
   return fold_build2 (code1, type, inv, match_op[0]);
 }

+/* For induction VALUE of LOOP, return its final value at loop exit if it could
+   be directly calculated based on the initial value and loop niter, also set
+   REWRITE_OVERFLOW to true in the case that we need to rewrite the final value
+   to avoid overflow UB when replacement would really happen later. Otherwise,
+   empty value is returned.  The flag CONSIDER_COST specifies whether we care
+   about if the value is expensive or not.  */
+
+tree
+get_scev_final_value (class loop *loop, tree value, bool *rewrite_overflow,
+ bool consider_cost)
+{
+  edge exit = single_exit (loop);
+  if (!exit)
+return NULL_TREE;
+
+  tree niter = number_of_latch_executions (loop);
+  if (niter == chrec_dont_know)
+return NULL_TREE;
+
+  class loop *ex_loop
+= superloop_at_depth (loop, loop_depth (exit->dest->loop_father) + 1);
+
+  bool folded_casts;
+  tree def = analyze_scalar_evolution_in_loop (ex_loop, loop, value,
+  &folded_casts);
+  tree bitinv_def, bit_def;
+  unsigned HOST_WIDE_INT niter_num;
+
+  if (def != chrec_dont_know)
+def = compute_overall_effect_of_inner_loop (ex_loop, def);
+
+  /* Handle bitop with invariant induction expression.
+
+ .i.e
+ for (int i =0 ;i < 32; i++)
+   tmp &= bit2;
+ if bit2 is an invariant in loop which could simple to tmp &= bit2.  */
+  else if ((bitinv_def
+   = analyze_and_compute_bitop_with_inv_effect (loop,
+value, niter)))
+def = bitinv_def;
+
+  /* Handle bitwise induction expression.
+
+ .i.e.
+ for (int i = 0; i != 64; i+=3)
+   res &= ~(1UL << i);
+
+ RES can't be analyzed out by SCEV because it is not polynomially
+ expressible, but in fact final value of RES can be replaced by
+ RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
+ being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR.  */
+  else if (tree_fits_uhwi_p (niter)
+  && (niter_num = tree_to_uhwi (niter)) != 0
+  && niter_num < TYPE_PRECISION (TREE_TYPE (value))
+  && (bit_def
+  = analyze_and_compute_bitwise_induction_effect (loop, value,
+  niter_num)))
+def = bit_def;
+
+  bool cond_overflow_p = false;
+
+  if (!tree_does_not_contain_chrecs (def)
+  || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
+  /* Moving the computation from the loop may prolong life range
+of some ssa names, which may cause problems if they appear
+on abnormal edges.  */
+  || contains_abnormal_ssa_name_p (def)
+  /* Do not emit expensive expressions.  The rationale is that
+when someone writes a code like
+
+while (n > 45) n -= 45;
+
+he probably knows that n is not large, and does not want it
+to be turned into n %= 45.  */
+  || (consider_cost && expression_expensive_p (def, &cond_overflow_p)))
+{
+  if (dump_file && (dump_flags & TDF_DETAILS))
+   {
+ fprintf (dump_file, "skip scev final value:\n  ");
+ print_generic_expr (dump_file, value);
+ fprintf (dump_file, " -> ");
+ print_generic_expr (dump_file, def);
+ fprintf (dump_file, "\n");
+   }
+  return NULL_TREE;
+}
+
+  if (rewrite_overflow)
+{
+  *rewrite_overflow = false;
+
+  if ((folded_casts
+ && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
+|| cond_overflow_p)
+   *rewrite_overflow = true;
+}
+
+  return def;
+}
+
+/* Given a loop closed PHI, replace it with a new assignment from its
+   FINAL_VALUE at loop exit. The flag REWRITE_OVERFLOW tells if we need to
+   rewrite expressions in FINAL_VALUE to avoid overflow UB.  When FINAL_VALUE
+   is constant, we could just propagate the constant, however, sometimes we
+   have to leave

Re: [PATCH] Add COBOL to gcc

2024-12-12 Thread James K. Lowden

On Thu, 12 Dec 2024 15:07:35 +0100
Richard Biener  wrote:

> On Wed, Dec 11, 2024 at 4:19?PM James K. Lowden
>  wrote:
> >
> > I think the term of art is "ping"?
> >
> > If GCC needs something from me to proceed with this, please tell me
> > what it is.
> 
> I think we're waiting on the rest of the patches adding the Cobol
> frontend at this point.

Ah, the ever careful cross of wires!  I thought I understood Jakub to
have asked me to hold off sending further patches until these were
committed.  

Sancho!  Fetch me my steed!  

--jkl

[PATCH] Fix toplevel-asm-1.c failure for riscv

2024-12-12 Thread Andreas Schwab

Don't use 'c' modifier with a symbolic address.  The riscv target doesn't
accept it as a CONSTANT_ADDRESS_P and doesn't support 'c' with SYMBOL_REF.

* c-c++-common/toplevel-asm-1.c: Remove 'c' from %3 and %4.
---
 gcc/testsuite/c-c++-common/toplevel-asm-1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/toplevel-asm-1.c 
b/gcc/testsuite/c-c++-common/toplevel-asm-1.c
index 12623fcd666..21f726a4656 100644
--- a/gcc/testsuite/c-c++-common/toplevel-asm-1.c
+++ b/gcc/testsuite/c-c++-common/toplevel-asm-1.c
@@ -8,7 +8,7 @@ enum E { E0, E1 = sizeof (struct S) + 15 };
 int v[42];
 void foo (void) {}
 
-asm ("# %0 %1 %2 %c3 %c4 %5 %% %="
+asm ("# %0 %1 %2 %3 %4 %5 %% %="
  :: "i" (sizeof (struct S)),
"i" (__builtin_offsetof (struct S, c)),
"i" (E1),
-- 
2.47.1


-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

Re: [PATCH] Fix toplevel-asm-1.c failure for riscv

2024-12-12 Thread Jakub Jelinek

On Thu, Dec 12, 2024 at 06:05:31PM +0100, Andreas Schwab wrote:
> Don't use 'c' modifier with a symbolic address.  The riscv target doesn't
> accept it as a CONSTANT_ADDRESS_P and doesn't support 'c' with SYMBOL_REF.
> 
>   * c-c++-common/toplevel-asm-1.c: Remove 'c' from %3 and %4.

The intent was to test %cN because %N doesn't DTRT on various targets.
I have a patch to add %ccN support which should then work even on riscv
hopefully, but unfortunately it hasn't been fully reviewed yet.

If you want a workaround before that, I'd suggest doing it conditionally
for arches which don't handle %cN with SYMBOL_REFs even with -fno-pie, so
that we do test %cN on those where it does work.

> --- a/gcc/testsuite/c-c++-common/toplevel-asm-1.c
> +++ b/gcc/testsuite/c-c++-common/toplevel-asm-1.c
> @@ -8,7 +8,7 @@ enum E { E0, E1 = sizeof (struct S) + 15 };
>  int v[42];
>  void foo (void) {}
>  
> -asm ("# %0 %1 %2 %c3 %c4 %5 %% %="
> +asm ("# %0 %1 %2 %3 %4 %5 %% %="
>   :: "i" (sizeof (struct S)),
>   "i" (__builtin_offsetof (struct S, c)),
>   "i" (E1),
> -- 
> 2.47.1

Jakub

[COMMITTED 12/30] ada: Crash on assignment of task allocator with expanded name

2024-12-12 Thread Marc Poulhiès

From: Bob Duff 

The compiler crashes on an assignment statement of the form
"X.Y := new T;", where X.Y is an expanded name (i.e. not a record
component or similar) and T is a type containing tasks.

gcc/ada/ChangeLog:

* exp_util.adb (Build_Task_Image_Decls):
Deal properly with the case of an expanded name.
Minor cleanup: use a case statement instead of if/elsif chain.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_util.adb | 31 ++-
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
index 99aacc763a1..dd284e2a20d 100644
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -4396,6 +4396,10 @@ package body Exp_Util is
  Nkind (Parent (Id_Ref)) = N_Assignment_Statement
and then
  Nkind (Expression (Parent (Id_Ref))) = N_Allocator;
+  Id : constant Node_Id :=
+(if Nkind (Id_Ref) = N_Expanded_Name then Selector_Name (Id_Ref)
+ else Id_Ref);
+  --  Replace expanded_name X.Y with Y
 
   Component_Suffix_Index : constant Int :=
 (if In_Init_Proc then -1 else 0);
@@ -4421,11 +4425,10 @@ package body Exp_Util is
Expression =>
  Make_String_Literal (Loc,
Strval => String_From_Name_Buffer)));
+  end if;
 
-  else
- if Nkind (Id_Ref) = N_Identifier
-   or else Nkind (Id_Ref) = N_Defining_Identifier
- then
+  case Nkind (Id) is
+ when N_Identifier | N_Defining_Identifier =>
 --  For a simple variable, the image of the task is built from
 --  the name of the variable. To avoid possible conflict with the
 --  anonymous type created for a single protected object, add a
@@ -4433,29 +4436,31 @@ package body Exp_Util is
 
 T_Id :=
   Make_Defining_Identifier (Loc,
-New_External_Name (Chars (Id_Ref), 'T', 1));
+New_External_Name (Chars (Id), 'T', 1));
 
-Get_Name_String (Chars (Id_Ref));
+Get_Name_String (Chars (Id));
 
 Expr :=
   Make_String_Literal (Loc,
 Strval => String_From_Name_Buffer);
 
- elsif Nkind (Id_Ref) = N_Selected_Component then
+ when N_Selected_Component =>
 T_Id :=
   Make_Defining_Identifier (Loc,
-New_External_Name (Chars (Selector_Name (Id_Ref)), 'T',
+New_External_Name (Chars (Selector_Name (Id)), 'T',
   Suffix_Index => Component_Suffix_Index));
-Fun := Build_Task_Record_Image (Loc, Id_Ref, Is_Dyn);
+Fun := Build_Task_Record_Image (Loc, Id, Is_Dyn);
 
- elsif Nkind (Id_Ref) = N_Indexed_Component then
+ when N_Indexed_Component =>
 T_Id :=
   Make_Defining_Identifier (Loc,
 New_External_Name (Chars (A_Type), 'N'));
 
-Fun := Build_Task_Array_Image (Loc, Id_Ref, A_Type, Is_Dyn);
- end if;
-  end if;
+Fun := Build_Task_Array_Image (Loc, Id, A_Type, Is_Dyn);
+
+ when others =>
+raise Program_Error;
+  end case;
 
   if Present (Fun) then
  Append (Fun, Decls);
-- 
2.43.0

[COMMITTED 04/30] ada: Avoid expanding LHS assignments for controlled types

2024-12-12 Thread Marc Poulhiès

From: Viljar Indus 

Expanding a function call that returns a controlled type
on the left-hand side of an assignment should be avoided.
Otherwise we will miss the diagnostic for
trying to assign something to a non-variable element.

gcc/ada/ChangeLog:

* exp_ch6.adb (Expand_Ctrl_Function_Call): Avoid expansion
of controlled types when the LHS is a function call.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch6.adb | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
index 7010256b1a9..7839b671632 100644
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -5356,6 +5356,13 @@ package body Exp_Ch6 is
  return;
   end if;
 
+  --  Avoid expansions to catch an error when the function call is on the
+  --  left-hand side of an assignment.
+
+  if Nkind (Par) = N_Assignment_Statement and then N = Name (Par) then
+ return;
+  end if;
+
   --  Resolution is now finished, make sure we don't start analysis again
   --  because of the duplication.
 
-- 
2.43.0

[COMMITTED 05/30] ada: Clean up and restrict usage of Initialization_Statements

2024-12-12 Thread Marc Poulhiès

From: Eric Botcazou 

This mechanism is the only producer of N_Compound_Statement in the expanded
code and parks the statements generated for the in-place initialization of
objects by an aggregate, so that they can be moved to the freeze point if
there is an address aspect/clause, or even cancelled if the aggregate has
been generated for Initialize_Scalars/Normalize_Scalars before a subsequent
pragma Import for the object is encountered.

The main condition for its triggering is that the object be not yet frozen,
but that's always the case when its declaration is being processed, so the
mechanism is triggered unnecessarily and the change restricts this but, on
the other hand, it also extends its usage to the in-place initialization by
a function call, which was implemented by means of a custom deferral.

There should be no functional changes.

gcc/ada/ChangeLog:

* einfo.ads (Initialization_Statements): Document usage precisely.
* exp_aggr.adb (Convert_Aggr_In_Object_Decl): Do not create a
compound statement in most cases, do it only if necessary.
* exp_ch3.adb (Expand_N_Object_Declaration): Remove a couple of
useless statements.
* exp_ch6.adb (Make_Build_In_Place_Call_In_Object_Declaration):
Use the Initialization_Statements mechanism if necessary.
* exp_ch7.adb: Remove clauses for Aspects package.
(Insert_Actions_In_Scope_Around): Use the support code of Exp_Util
for the Initialization_Statements mechanism.
* exp_prag.adb (Undo_Initialization): Remove obsolete code.
* exp_util.ads (Move_To_Initialization_Statements): New procedure.
(Needs_Initialization_Statements): New function.
* exp_util.adb (Move_To_Initialization_Statements): New procedure.
(Needs_Initialization_Statements): New predicate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/einfo.ads| 12 ---
 gcc/ada/exp_aggr.adb | 56 
 gcc/ada/exp_ch3.adb  |  2 --
 gcc/ada/exp_ch6.adb  | 76 
 gcc/ada/exp_ch7.adb  | 26 +++
 gcc/ada/exp_prag.adb | 21 
 gcc/ada/exp_util.adb | 37 +
 gcc/ada/exp_util.ads | 12 +++
 8 files changed, 94 insertions(+), 148 deletions(-)

diff --git a/gcc/ada/einfo.ads b/gcc/ada/einfo.ads
index 8255ae95683..f929c26571d 100644
--- a/gcc/ada/einfo.ads
+++ b/gcc/ada/einfo.ads
@@ -2270,10 +2270,14 @@ package Einfo is
 --   call wrapper if available.
 
 --Initialization_Statements
---   Defined in constants and variables. For a composite object initialized
---   with an aggregate that has been converted to a sequence of
---   assignments, points to a compound statement containing the
---   assignments.
+--   Defined in constants and variables. For a composite object coming from
+--   source and initialized with an aggregate or a call expanded in place,
+--   points to a compound statement containing the assignment(s). This is
+--   used for a couple of purposes: 1) to defer the initialization to the
+--   freeze point if an address aspect/clause is present for the object,
+--   2) to cancel the initialization of imported objects generated by
+--   Initialize_Scalars or Normalize_Scalars before the pragma Import is
+--   encountered for the object.
 
 --Inner_Instances
 --   Defined in generic units. Contains element list of units that are
diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index a82705dca3f..9162e9694f9 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -3576,10 +3576,11 @@ package body Exp_Aggr is
-
 
procedure Convert_Aggr_In_Object_Decl (N : Node_Id) is
-  Obj  : constant Entity_Id  := Defining_Identifier (N);
-  Aggr : constant Node_Id:= Unqualify (Expression (N));
-  Loc  : constant Source_Ptr := Sloc (Aggr);
-  Typ  : constant Entity_Id  := Etype (Aggr);
+  Obj: constant Entity_Id  := Defining_Identifier (N);
+  Aggr   : constant Node_Id:= Unqualify (Expression (N));
+  Loc: constant Source_Ptr := Sloc (Aggr);
+  Typ: constant Entity_Id  := Etype (Aggr);
+  Marker : constant Node_Id:= Next (N);
 
   function Discriminants_Ok return Boolean;
   --  If the object's subtype is constrained, the discriminants in the
@@ -3651,11 +3652,10 @@ package body Exp_Aggr is
 
   --  Local variables
 
-  Has_Transient_Scope : Boolean;
-  Occ : Node_Id;
-  Param   : Node_Id;
-  Stmt: Node_Id;
-  Stmts   : List_Id;
+  Occ   : Node_Id;
+  Param : Node_Id;
+  Stmt  : Node_Id;
+  Stmts : List_Id;
 
--  Start of processing for Convert_Aggr_In_Object_Decl
 
@@ -3685,39 +3685,14 @@ package body Exp_Aggr is
 and then Ekind (Current_Scope) /= E_Return_Statement
 and then

[COMMITTED 09/30] ada: Fix wrong finalization with private unconstrained array type

2024-12-12 Thread Marc Poulhiès

From: Eric Botcazou 

The address passed to the routine attaching a controlled object to the
finalization master must be that of its dope vector for an object whose
nominal subtype is an unconstrained array type, but this is not the case
when this subtype has a private declaration.

gcc/ada/ChangeLog:

* exp_ch7.adb (Make_Address_For_Finalize): Look at the underlying
subtype to detect the unconstrained array type case.
* sprint.adb (Write_Itype) : New case.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch7.adb | 10 ++
 gcc/ada/sprint.adb  |  4 
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/exp_ch7.adb b/gcc/ada/exp_ch7.adb
index d3cc6c70d97..be281e3519d 100644
--- a/gcc/ada/exp_ch7.adb
+++ b/gcc/ada/exp_ch7.adb
@@ -5514,6 +5514,8 @@ package body Exp_Ch7 is
   Obj_Ref : Node_Id;
   Obj_Typ : Entity_Id) return Node_Id
is
+  Utyp : constant Entity_Id := Underlying_Type (Obj_Typ);
+
   Obj_Addr : Node_Id;
 
begin
@@ -5529,13 +5531,13 @@ package body Exp_Ch7 is
   --  but the address of the object is still that of its elements,
   --  so we need to shift it.
 
-  if Is_Array_Type (Obj_Typ)
-and then not Is_Constrained (First_Subtype (Obj_Typ))
+  if Is_Array_Type (Utyp)
+and then not Is_Constrained (First_Subtype (Utyp))
   then
  --  Shift the address from the start of the elements to the
  --  start of the dope vector:
 
- --V - (Obj_Typ'Descriptor_Size / Storage_Unit)
+ --V - (Utyp'Descriptor_Size / Storage_Unit)
 
  Obj_Addr :=
Make_Function_Call (Loc,
@@ -5552,7 +5554,7 @@ package body Exp_Ch7 is
Make_Op_Divide (Loc,
  Left_Opnd  =>
Make_Attribute_Reference (Loc,
- Prefix => New_Occurrence_Of (Obj_Typ, Loc),
+ Prefix => New_Occurrence_Of (Utyp, Loc),
  Attribute_Name => Name_Descriptor_Size),
  Right_Opnd =>
Make_Integer_Literal (Loc, System_Storage_Unit;
diff --git a/gcc/ada/sprint.adb b/gcc/ada/sprint.adb
index 614bcc17b14..67259b9831c 100644
--- a/gcc/ada/sprint.adb
+++ b/gcc/ada/sprint.adb
@@ -4712,6 +4712,10 @@ package body Sprint is
 Write_Str (");");
  end;
 
+  when E_Private_Subtype =>
+ Write_Header (False);
+ Write_Name_With_Col_Check (Chars (Full_View (Typ)));
+
   --  For all other Itypes, print a triple ? (fill in later
   --  if needed).
 
-- 
2.43.0

[COMMITTED 11/30] ada: Lift technical limitation in expansion of record aggregates

2024-12-12 Thread Marc Poulhiès

From: Eric Botcazou 

The mechanim deferring the expansion of record aggregates nested in other
aggregates with intermediate conditional expressions is disabled in the
case where they contain self-references, because of a technical limitation
in the replacements done by Build_Record_Aggr_Code.  This change lifts it.

gcc/ada/ChangeLog:

* exp_aggr.adb (Traverse_Proc_For_Aggregate): New generic procedure.
(Replace_Discriminants): Instantiate it instead of Traverse_Proc.
(Replace_Self_Reference): Likewise.
(Convert_To_Assignments): Remove limitation for nested aggregates
that contain self-references.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_aggr.adb | 90 
 1 file changed, 82 insertions(+), 8 deletions(-)

diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index 9162e9694f9..d0ccaa4f3ff 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -2268,6 +2268,16 @@ package body Exp_Aggr is
   --  If default expression of a component mentions a discriminant of the
   --  type, it must be rewritten as the discriminant of the target object.
 
+  generic
+ with function Process (N : Node_Id) return Traverse_Result is <>;
+  procedure Traverse_Proc_For_Aggregate (N : Node_Id);
+  pragma Inline (Traverse_Proc_For_Aggregate);
+  --  This extends Traverse_Proc from Atree by looking into the Actions
+  --  list of conditional expressions, which are semantic fields and not
+  --  syntactic ones like the Actions of an N_Expression_With_Actions.
+  --  This makes it possible to delay the expansion of these conditional
+  --  expressions when they appear within the aggregate.
+
   -
   -- Ancestor_Discriminant_Value --
   -
@@ -2825,11 +2835,78 @@ package body Exp_Aggr is
  return OK;
   end Rewrite_Discriminant;
 
+  -
+  -- Traverse_Proc_For_Aggregate --
+  -
+
+  procedure Traverse_Proc_For_Aggregate (N : Node_Id) is
+
+ function Process_For_Aggregate (N : Node_Id) return Traverse_Result;
+ --  Call Process on N and on the nodes in the Actions list of N if
+ --  it is a conditional expression.
+
+ procedure Traverse_Node is new Traverse_Proc (Process_For_Aggregate);
+ --  Call Process_For_Aggregate on the subtree rooted at N
+
+ ---
+ -- Process_For_Aggregate --
+ ---
+
+ function Process_For_Aggregate (N : Node_Id) return Traverse_Result is
+
+procedure Traverse_List (L : List_Id);
+pragma Inline (Traverse_List);
+--  Call Traverse_Node on the nodes of list L
+
+
+-- Traverse_List --
+
+
+procedure Traverse_List (L : List_Id) is
+   N : Node_Id := First (L);
+
+begin
+   while Present (N) loop
+  Traverse_Node (N);
+  Next (N);
+   end loop;
+end Traverse_List;
+
+--  Local variables
+
+Alt : Node_Id;
+Discard : Traverse_Final_Result;
+pragma Unreferenced (Discard);
+
+ --  Start of processing for Process_For_Aggregate
+
+ begin
+Discard := Process (N);
+
+if Nkind (N) = N_Case_Expression then
+   Alt := First (Alternatives (N));
+   while Present (Alt) loop
+  Traverse_List (Actions (Alt));
+  Next (Alt);
+   end loop;
+
+elsif Nkind (N) = N_If_Expression then
+   Traverse_List (Then_Actions (N));
+   Traverse_List (Else_Actions (N));
+end if;
+
+return OK;
+ end Process_For_Aggregate;
+
+  begin
+ Traverse_Node (N);
+  end Traverse_Proc_For_Aggregate;
+
   procedure Replace_Discriminants is
-new Traverse_Proc (Rewrite_Discriminant);
+new Traverse_Proc_For_Aggregate (Rewrite_Discriminant);
 
   procedure Replace_Self_Reference is
-new Traverse_Proc (Replace_Type);
+new Traverse_Proc_For_Aggregate (Replace_Type);
 
--  Start of processing for Build_Record_Aggr_Code
 
@@ -4131,16 +4208,13 @@ package body Exp_Aggr is
   if
  --  Internal aggregates (transformed when expanding the parent),
  --  excluding container aggregates as these are transformed into
- --  subprogram calls later. So far aggregates with self-references
- --  are not supported if they appear in a conditional expression.
+ --  subprogram calls later.
 
  (Nkind (Parent_Node) = N_Component_Association
-   and then not Is_Container_Aggregate (Parent

[COMMITTED 06/30] ada: Restrict External_Initialization file lookup

2024-12-12 Thread Marc Poulhiès

From: Ronan Desplanques 

Before this patch, External_Initialization looked for files in all
directories of the source search path, which led to inconsistencies in
some cases. This patch restricts the file lookup so the argument is
interpreted as relative to the current source file's directory only.

gcc/ada/ChangeLog:

* sem_ch3.adb (Apply_External_Initialization): Restrict File lookup.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch3.adb | 41 +
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
index 4a3d020330c..f88c5adc929 100644
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -42,6 +42,7 @@ with Exp_Dist;   use Exp_Dist;
 with Exp_Tss;use Exp_Tss;
 with Exp_Util;   use Exp_Util;
 with Expander;   use Expander;
+with Fmap;
 with Freeze; use Freeze;
 with Ghost;  use Ghost;
 with Itypes; use Itypes;
@@ -54,6 +55,7 @@ with Namet;  use Namet;
 with Nlists; use Nlists;
 with Nmake;  use Nmake;
 with Opt;use Opt;
+with Osint;
 with Restrict;   use Restrict;
 with Rident; use Rident;
 with Rtsfind;use Rtsfind;
@@ -87,6 +89,7 @@ with Sinput.L;
 with Snames; use Snames;
 with Stringt;
 with Strub;  use Strub;
+with System.OS_Lib;
 with Targparm;   use Targparm;
 with Tbuild; use Tbuild;
 with Ttypes; use Ttypes;
@@ -3885,6 +3888,7 @@ package body Sem_Ch3 is
 
  Expr : N_Subexpr_Id;
 
+ Data_Path : File_Name_Type;
   begin
  Remove (Specification);
 
@@ -3919,13 +3923,42 @@ package body Sem_Ch3 is
 return;
  end if;
 
+ declare
+S : constant String := Stringt.To_String (Strval (Def));
  begin
-declare
-   Name : constant Valid_Name_Id :=
- Stringt.String_To_Name (Strval (Def));
+if System.OS_Lib.Is_Absolute_Path (S) then
+   Data_Path := Name_Find (S);
+else
+   declare
+  Current_File_Name : constant File_Name_Type :=
+Unit_File_Name (Current_Sem_Unit);
+
+  Current_File_Path : constant File_Name_Type :=
+Fmap.Mapped_Path_Name (Current_File_Name);
+
+  Current_File_Directory : constant File_Name_Type :=
+Osint.Get_Directory (Current_File_Path);
+
+  Absolute_Dir : constant String :=
+System.OS_Lib.Normalize_Pathname
+  (Get_Name_String (Current_File_Directory),
+   Resolve_Links => False);
+
+  Data_Path_String : constant String :=
+Absolute_Dir
+& System.OS_Lib.Directory_Separator
+& Stringt.To_String (Strval (Def));
 
+   begin
+  Data_Path := Name_Find (Data_Path_String);
+   end;
+end if;
+ end;
+
+ begin
+declare
Source_File_I : constant Source_File_Index :=
- Sinput.L.Load_Source_File (File_Name_Type (Name));
+ Sinput.L.Load_Source_File (Data_Path);
 begin
if Source_File_I <= No_Source_File then
   Error_Msg_N ("cannot find input file", Specification);
-- 
2.43.0

[COMMITTED 13/30] ada: Fix documentation comment for Scan_Sign

2024-12-12 Thread Marc Poulhiès

From: Ronan Desplanques 

This patches fixes a couple of details that were wrong in the
documentation comment for System.Val_Util.Scan_Sign.

gcc/ada/ChangeLog:

* libgnat/s-valuti.ads (Scan_Sign): Fix documentation comment.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/s-valuti.ads | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/gcc/ada/libgnat/s-valuti.ads b/gcc/ada/libgnat/s-valuti.ads
index 6f91c363a43..7963245377b 100644
--- a/gcc/ada/libgnat/s-valuti.ads
+++ b/gcc/ada/libgnat/s-valuti.ads
@@ -121,13 +121,12 @@ is
--  string to be scanned starting at Ptr.all, and Max is the index of the
--  last character in the string). Scan_Sign first scans out any initial
--  blanks, raising Constraint_Error if the field is all blank. It then
-   --  checks for and skips an initial plus or minus, requiring a non-blank
-   --  character to follow (Constraint_Error is raised if plus or minus appears
-   --  at the end of the string or with a following blank). Minus is set True
-   --  if a minus sign was skipped, and False otherwise. On exit Ptr.all points
-   --  to the character after the sign, or to the first non-blank character
-   --  if no sign is present. Start is set to the point to the first non-blank
-   --  character (sign or digit after it).
+   --  checks for and skips an initial plus or minus (Constraint_Error is
+   --  raised if plus or minus appears at the end of the string). Minus is set
+   --  True if a minus sign was skipped, and False otherwise. On exit Ptr.all
+   --  points to the character after the sign, or to the first non-blank
+   --  character if no sign is present. Start is set to the point to the first
+   --  non-blank character.
--
--  Note: if Str is null, i.e. if Max is less than Ptr, then this is a
--  special case of an all-blank string, and Ptr is unchanged, and hence
-- 
2.43.0

[COMMITTED 02/30] ada: Export CHERI exception IDs

2024-12-12 Thread Marc Poulhiès

From: Daniel King 

This allows CHERI exceptions to be raised from C code in the runtime.

gcc/ada/ChangeLog:

* libgnat/i-cheri-exceptions.ads: Export CHERI exception IDs.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/i-cheri-exceptions.ads | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/gcc/ada/libgnat/i-cheri-exceptions.ads 
b/gcc/ada/libgnat/i-cheri-exceptions.ads
index 88ecb1325c4..2a3acd2e918 100644
--- a/gcc/ada/libgnat/i-cheri-exceptions.ads
+++ b/gcc/ada/libgnat/i-cheri-exceptions.ads
@@ -28,6 +28,7 @@
 -- Extensive contributions were provided by Ada Core Technologies Inc.  --
 --  --
 --
+with Ada.Exceptions;
 
 --  This package defines exception types for CHERI-related errors
 
@@ -47,4 +48,33 @@ is
Capability_Tag_Error : exception;
--  An invalid capability was dereferenced
 
+private
+
+   --  Expose C names for exception identifiers to allow raising from signal
+   --  handlers in init.c.
+
+   Capability_Bound_Error_Id : constant Ada.Exceptions.Exception_Id :=
+ Capability_Bound_Error'Identity;
+   pragma Export (C,
+  Capability_Bound_Error_Id,
+  "capability_bound_error_id");
+
+   Capability_Permission_Error_Id : constant Ada.Exceptions.Exception_Id :=
+ Capability_Permission_Error'Identity;
+   pragma Export (C,
+  Capability_Permission_Error_Id,
+  "capability_permission_error_id");
+
+   Capability_Sealed_Error_Id : constant Ada.Exceptions.Exception_Id :=
+ Capability_Sealed_Error'Identity;
+   pragma Export (C,
+  Capability_Sealed_Error_Id,
+  "capability_sealed_error_id");
+
+   Capability_Tag_Error_Id : constant Ada.Exceptions.Exception_Id :=
+ Capability_Tag_Error'Identity;
+   pragma Export (C,
+  Capability_Tag_Error_Id,
+  "capability_tag_error_id");
+
 end Interfaces.CHERI.Exceptions;
-- 
2.43.0

[COMMITTED 14/30] ada: Add minimal support for other delayed aspects on controlled objects

2024-12-12 Thread Marc Poulhiès

From: Eric Botcazou 

This extends the processing done for the Address aspect to other delayed
aspects.  The External_Name aspect is also reclassified as a representation
aspect and the three representation aspects External_Name, Link_Name and
Linker_Section are moved from the Always_Delay to the Rep_Aspect category,
which makes it possible not to delay them in most cases with a small tweak.

gcc/ada/ChangeLog:

* aspects.ads (Is_Representation_Aspect): True for External_Name.
(Aspect_Delay): Use Rep_Aspect for External_Name, Link_Name and
Linker_Section.
* einfo.ads (Initialization_Statements): Document extended usage.
* exp_util.adb (Needs_Initialization_Statements): Return True for
all delayed aspects.
* freeze.adb (Check_Address_Clause): Do not move the initialization
expression here...
(Freeze_Object_Declaration): ...but here instead, as well as for all
delayed aspects.  Remove test for pragma Linker_Section.
* sem_ch13.adb (Analyze_One_Aspect): Do not delay in the Rep_Aspect
case if the expression is a string literal.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/aspects.ads  |   8 +--
 gcc/ada/einfo.ads|   6 +--
 gcc/ada/exp_util.adb |   2 +-
 gcc/ada/freeze.adb   | 119 +--
 gcc/ada/sem_ch13.adb |   5 +-
 5 files changed, 58 insertions(+), 82 deletions(-)

diff --git a/gcc/ada/aspects.ads b/gcc/ada/aspects.ads
index ebf09602ea5..100ab4d55a8 100644
--- a/gcc/ada/aspects.ads
+++ b/gcc/ada/aspects.ads
@@ -543,7 +543,7 @@ package Aspects is
   Aspect_Exclusive_Functions  => False,
   Aspect_Extended_Access  => True,
   Aspect_External_Initialization  => False,
-  Aspect_External_Name=> False,
+  Aspect_External_Name=> True,
   Aspect_External_Tag => False,
   Aspect_Finalizable  => False,
   Aspect_First_Controlling_Parameter  => False,
@@ -973,7 +973,6 @@ package Aspects is
   Aspect_Dynamic_Predicate=> Always_Delay,
   Aspect_Elaborate_Body   => Always_Delay,
   Aspect_Exclusive_Functions  => Always_Delay,
-  Aspect_External_Name=> Always_Delay,
   Aspect_External_Tag => Always_Delay,
   Aspect_Favor_Top_Level  => Always_Delay,
   Aspect_Finalizable  => Always_Delay,
@@ -990,8 +989,6 @@ package Aspects is
   Aspect_Invariant=> Always_Delay,
   Aspect_Iterable => Always_Delay,
   Aspect_Iterator_Element => Always_Delay,
-  Aspect_Link_Name=> Always_Delay,
-  Aspect_Linker_Section   => Always_Delay,
   Aspect_Lock_Free=> Always_Delay,
   Aspect_No_Inline=> Always_Delay,
   Aspect_No_Raise => Always_Delay,
@@ -1100,7 +1097,10 @@ package Aspects is
   Aspect_Bit_Order=> Rep_Aspect,
   Aspect_Component_Size   => Rep_Aspect,
   Aspect_Extended_Access  => Rep_Aspect,
+  Aspect_External_Name=> Rep_Aspect,
   Aspect_Full_Access_Only => Rep_Aspect,
+  Aspect_Link_Name=> Rep_Aspect,
+  Aspect_Linker_Section   => Rep_Aspect,
   Aspect_Machine_Radix=> Rep_Aspect,
   Aspect_Object_Size  => Rep_Aspect,
   Aspect_Pack => Rep_Aspect,
diff --git a/gcc/ada/einfo.ads b/gcc/ada/einfo.ads
index f929c26571d..1a8760c0dbb 100644
--- a/gcc/ada/einfo.ads
+++ b/gcc/ada/einfo.ads
@@ -2274,9 +2274,9 @@ package Einfo is
 --   source and initialized with an aggregate or a call expanded in place,
 --   points to a compound statement containing the assignment(s). This is
 --   used for a couple of purposes: 1) to defer the initialization to the
---   freeze point if an address aspect/clause is present for the object,
---   2) to cancel the initialization of imported objects generated by
---   Initialize_Scalars or Normalize_Scalars before the pragma Import is
+--   freeze point if an address clause or a delayed aspect is present for
+--   the object, 2) to cancel initialization of imported objects generated
+--   by Initialize_Scalars or Normalize_Scalars before the pragma Import is
 --   encountered for the object.
 
 --Inner_Instances
diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
index dd284e2a20d..e9a683f8255 100644
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -11700,7 +11700,7 @@ package body Exp_Util is
   --  See the documentation of Initialization_Statements in Einfo
 
   return Comes_From_Source (Decl)
-and then (Has_Aspect (Obj_Id, Aspect_Address)
+and then (Has_Delayed_Aspe

[COMMITTED 15/30] ada: Fix validity check for private types

2024-12-12 Thread Marc Poulhiès

From: Ronan Desplanques 

Before this patch, the machinery to generate validity checks got
confused in some situations involving private views of types, and ended
up generating incorrect conversions from floating point types to integer
types. This patch fixes this.

gcc/ada/ChangeLog:

* exp_attr.adb (Expand_N_Attribute_Reference): Fix computation of type
category.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_attr.adb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/exp_attr.adb b/gcc/ada/exp_attr.adb
index cb068c102a2..904293bbd1d 100644
--- a/gcc/ada/exp_attr.adb
+++ b/gcc/ada/exp_attr.adb
@@ -7627,7 +7627,7 @@ package body Exp_Attr is
  --  Floating-point case. This case is handled by the Valid attribute
  --  code in the floating-point attribute run-time library.
 
- if Is_Floating_Point_Type (Ptyp) then
+ if Is_Floating_Point_Type (PBtyp) then
 Float_Valid : declare
Pkg : RE_Id;
Ftp : Entity_Id;
@@ -7652,7 +7652,7 @@ package body Exp_Attr is
 --  Start of processing for Float_Valid
 
 begin
-   Find_Fat_Info (Ptyp, Ftp, Pkg);
+   Find_Fat_Info (PBtyp, Ftp, Pkg);
 
--  If the prefix is a reverse SSO component, or is possibly
--  unaligned, first create a temporary copy that is in
-- 
2.43.0

[COMMITTED 20/30] ada: Accept static strings with External_Initialization

2024-12-12 Thread Marc Poulhiès

From: Ronan Desplanques 

Before this patch, the argument to the External_Initialization aspect
had to be a string literal. This patch extends the possibilities so that
any static string is accepted.

A new helper function, Is_OK_Static_Expression_Of_Type, is introduced,
and in addition to the main change of this patch a couple of calls to
that helper function are added in other places to replace equivalent
inline code.

gcc/ada/ChangeLog:

* sem_eval.ads (Is_OK_Static_Expression_Of_Type): New function.
* sem_eval.adb (Is_OK_Static_Expression_Of_Type): Likewise.
* sem_ch13.adb (Check_Expr_Is_OK_Static_Expression): Use new function.
* sem_prag.adb (Check_Expr_Is_OK_Static_Expression): Likewise.
* sem_ch3.adb (Apply_External_Initialization): Accept static strings
for the parameter.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch13.adb | 39 --
 gcc/ada/sem_ch3.adb  | 24 ++---
 gcc/ada/sem_eval.adb | 39 ++
 gcc/ada/sem_eval.ads |  9 
 gcc/ada/sem_prag.adb | 50 +---
 5 files changed, 91 insertions(+), 70 deletions(-)

diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb
index 1a3a16ac9ee..14bc33582eb 100644
--- a/gcc/ada/sem_ch13.adb
+++ b/gcc/ada/sem_ch13.adb
@@ -2581,35 +2581,22 @@ package body Sem_Ch13 is
 
 
 procedure Check_Expr_Is_OK_Static_Expression
-  (Expr : Node_Id;
-   Typ  : Entity_Id := Empty)
-is
+  (Expr : Node_Id; Typ : Entity_Id := Empty) is
 begin
-   if Present (Typ) then
-  Analyze_And_Resolve (Expr, Typ);
-   else
-  Analyze_And_Resolve (Expr);
-   end if;
-
-   --  An expression cannot be considered static if its resolution
-   --  failed or if it's erroneous. Stop the analysis of the
-   --  related aspect.
-
-   if Etype (Expr) = Any_Type or else Error_Posted (Expr) then
-  raise Aspect_Exit;
-
-   elsif Is_OK_Static_Expression (Expr) then
-  return;
+   case Is_OK_Static_Expression_Of_Type (Expr, Typ) is
+  when Static =>
+ null;
 
-   --  Finally, we have a real error
+  when Not_Static =>
+ Error_Msg_Name_1 := Nam;
+ Flag_Non_Static_Expr
+   ("entity for aspect% must be a static expression!",
+Expr);
+ raise Aspect_Exit;
 
-   else
-  Error_Msg_Name_1 := Nam;
-  Flag_Non_Static_Expr
-("entity for aspect% must be a static expression!",
- Expr);
-  raise Aspect_Exit;
-   end if;
+  when Invalid =>
+ raise Aspect_Exit;
+   end case;
 end Check_Expr_Is_OK_Static_Expression;
 
 
diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
index f88c5adc929..a5d69c33b15 100644
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -3906,15 +3906,22 @@ package body Sem_Ch3 is
  Set_Expression (N, Error);
  E := Error;
 
- if Nkind (Def) /= N_String_Literal then
-Error_Msg_N
-  ("External_Initialization aspect expects a string literal value",
-   Specification);
-return;
- end if;
+ case Is_OK_Static_Expression_Of_Type (Def, Standard_String) is
+when Static =>
+   null;
+
+when Not_Static =>
+   Error_Msg_N
+ ("External_Initialization aspect expects a static string",
+  Specification);
+   return;
+
+when Invalid =>
+   return;
+ end case;
 
  if not (Is_String_Type (T)
-   or else Is_RTE (Base_Type (T), RE_Stream_Element_Array))
+  or else Is_RTE (Base_Type (T), RE_Stream_Element_Array))
  then
 Error_Msg_N
   ("External_Initialization aspect can only be applied to objects "
@@ -3924,7 +3931,8 @@ package body Sem_Ch3 is
  end if;
 
  declare
-S : constant String := Stringt.To_String (Strval (Def));
+S : constant String :=
+  Stringt.To_String (Strval (Expr_Value_S (Def)));
  begin
 if System.OS_Lib.Is_Absolute_Path (S) then
Data_Path := Name_Find (S);
diff --git a/gcc/ada/sem_eval.adb b/gcc/ada/sem_eval.adb
index 9ea042ba0d3..f0f83d29c38 100644
--- a/gcc/ada/sem_eval.adb
+++ b/gcc/ada/sem_eval.adb
@@ -5527,6 +5527,45 @@ package body Sem_Eval is
   return Is_Static_Expression

[COMMITTED 07/30] ada: Tweak Is_Predefined_File_Name

2024-12-12 Thread Marc Poulhiès

From: Ronan Desplanques 

This patch slightly widens the set of filenames that the compiler
considers predefined. That makes it possible to build the GNAT runtime
using only the file mapping facilities of the compiler, without having
to rename files.

gcc/ada/ChangeLog:

* fname.adb (Is_Predefined_File_Name): Tweak test.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/fname.adb | 23 +--
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/gcc/ada/fname.adb b/gcc/ada/fname.adb
index 165411cf6ec..7c5572a8c03 100644
--- a/gcc/ada/fname.adb
+++ b/gcc/ada/fname.adb
@@ -141,6 +141,8 @@ package body Fname is
   if Fname'Length > 12
 and then Fname (Fname'First .. Fname'First + 1) /= "i-"
 and then Fname (Fname'First .. Fname'First + 1) /= "s-"
+and then not Has_Prefix (Fname, "system-")
+and then not Has_Prefix (Fname, "interfac__")
   then
  return False;
   end if;
@@ -151,23 +153,24 @@ package body Fname is
 
   --  Definitely predefined if prefix is a- i- or s-
 
-  if Fname'Length >= 2 then
- declare
-S : String renames Fname (Fname'First .. Fname'First + 1);
- begin
-if S = "a-" or else S = "i-" or else S = "s-" then
-   return True;
-end if;
- end;
-  end if;
+  pragma Assert (Fname'Length >= 2);
+  declare
+ S : String renames Fname (Fname'First .. Fname'First + 1);
+  begin
+ if S = "a-" or else S = "i-" or else S = "s-" then
+return True;
+ end if;
+  end;
 
   --  We include the "." in the prefixes below, so we don't match (e.g.)
   --  adamant.ads. So the first line matches "ada.ads", "ada.adb", and
   --  "ada.ali". But that's not necessary if they have 8 characters.
 
   if Has_Prefix (Fname, "ada.") --  Ada
-or else Has_Prefix (Fname, "interfac")  --  Interfaces
+or else Fname = "interfac.ads"
+or else Has_Prefix (Fname, "interfac__")
 or else Has_Prefix (Fname, "system.a")  --  System
+or else Has_Prefix (Fname, "system-")   --  System with platform suffix
   then
  return True;
   end if;
-- 
2.43.0

[COMMITTED 22/30] ada: Elide the copy for bit-packed aggregates in allocators

2024-12-12 Thread Marc Poulhiès

From: Eric Botcazou 

The in-place expansion has been historically disabled for them, but there
does not seem to be any good reason left for this.

gcc/ada/ChangeLog:

* exp_aggr.adb (Expand_Array_Aggregate): Do not exclude aggregates
of bit-packed array types in allocators from in-place expansion.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_aggr.adb | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index 37c21ac5762..364af228359 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -6150,21 +6150,18 @@ package body Exp_Aggr is
  or else (Parent_Kind in N_Aggregate | N_Extension_Aggregate
and then not Is_Container_Aggregate (Parent_Node))
 
- --  Allocator (see Convert_Aggr_In_Allocator). Bit-packed array types
- --  need specific processing and sliding cannot be done in place for
- --  the time being.
+ --  Allocator (see Convert_Aggr_In_Allocator). Sliding cannot be done
+ --  in place for the time being.
 
  or else (Nkind (Parent_Node) = N_Allocator
and then
  (Aggr_Assignment_OK_For_Backend (N)
or else Is_Limited_Type (Typ)
or else Needs_Finalization (Typ)
-   or else (not Is_Bit_Packed_Array (Typ)
- and then not
-   Must_Slide
+   or else not Must_Slide
  (N,
   Designated_Type (Etype (Parent_Node)),
-  Typ
+  Typ)))
 
  --  Object declaration (see Convert_Aggr_In_Object_Decl). Bit-packed
  --  array types need specific processing and sliding cannot be done
-- 
2.43.0

[COMMITTED 10/30] ada: Small improvements to expansion of conditional expressions

2024-12-12 Thread Marc Poulhiès

From: Eric Botcazou 

They comprise using a nonnull accesss type for the indirect expansion to
avoid useless checks, smplifying the expansion of if expressions whose
condition is known at compile time to avoid an N_Expression_With_Actions,
using the indirect expansion for them in the indefinite case too, which
makes the special case for an unconstrained array type obsolete.

No functional changes.

gcc/ada/ChangeLog:

* exp_ch4.adb (Expand_N_Case_Expression): Remove obsolete comment
about C code generation.  Do not create a useless target type if
the parent statement is rewritten instead of the expression.  Use
a nonnull accesss type for the expansion done for composite types.
(Expand_N_If_Expression): Simplify the expansion when the condition
is known at compile time.  Apply the expansion done for by-reference
types to indefinite types and remove the obsolete special case for
unconstrained array types  Use a nonnull access type in this case.
Rename New_If local variable to If_Stmt for the sake of consistency.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch4.adb | 312 
 1 file changed, 141 insertions(+), 171 deletions(-)

diff --git a/gcc/ada/exp_ch4.adb b/gcc/ada/exp_ch4.adb
index 5ae2d11b04c..8c1faf415e1 100644
--- a/gcc/ada/exp_ch4.adb
+++ b/gcc/ada/exp_ch4.adb
@@ -5101,8 +5101,10 @@ package body Exp_Ch4 is
   --   Target : Typ;
   --   case X is
   --  when A =>
+  -- <>
   -- Target := AX;
   --  when B =>
+  -- <>
   -- Target := BX;
   --  ...
   --   end case;
@@ -5110,12 +5112,14 @@ package body Exp_Ch4 is
 
   --  In all other cases expand into
 
-  --   type Ptr_Typ is access all Typ;
+  --   type Ptr_Typ is not null access all Typ;
   --   Target : Ptr_Typ;
   --   case X is
   --  when A =>
+  -- <>
   -- Target := AX'Unrestricted_Access;
   --  when B =>
+  -- <>
   -- Target := BX'Unrestricted_Access;
   --  ...
   --   end case;
@@ -5124,9 +5128,6 @@ package body Exp_Ch4 is
 
   --  This approach avoids extra copies of potentially large objects. It
   --  also allows handling of values of limited or unconstrained types.
-  --  Note that we do the copy also for constrained, nonlimited types
-  --  when minimizing expressions with actions (e.g. when generating C
-  --  code) since it allows us to do the optimization below in more cases.
 
   Case_Stmt :=
 Make_Case_Statement (Loc,
@@ -5141,16 +5142,21 @@ package body Exp_Ch4 is
   Set_From_Conditional_Expression (Case_Stmt);
   Acts := New_List;
 
+  --  No need for Target_Typ in the case of statements
+
+  if Optimize_Assignment_Stmt or else Optimize_Return_Stmt then
+ null;
+
   --  Scalar/Copy case
 
-  if Is_Copy_Type (Typ) then
+  elsif Is_Copy_Type (Typ) then
  Target_Typ := Typ;
 
   --  Otherwise create an access type to handle the general case using
   --  'Unrestricted_Access.
 
   --  Generate:
-  --type Ptr_Typ is access all Typ;
+  --type Ptr_Typ is not null access all Typ;
 
   else
  Target_Typ := Make_Temporary (Loc, 'P');
@@ -5160,8 +5166,9 @@ package body Exp_Ch4 is
  Defining_Identifier => Target_Typ,
  Type_Definition =>
Make_Access_To_Object_Definition (Loc,
- All_Present=> True,
- Subtype_Indication => New_Occurrence_Of (Typ, Loc;
+ All_Present=> True,
+ Null_Exclusion_Present => True,
+ Subtype_Indication => New_Occurrence_Of (Typ, Loc;
   end if;
 
   --  Create the declaration of the target which captures the value of the
@@ -5190,8 +5197,9 @@ package body Exp_Ch4 is
   Alt := First (Alternatives (N));
   while Present (Alt) loop
  declare
-Alt_Expr : Node_Id := Relocate_Node (Expression (Alt));
-Alt_Loc  : constant Source_Ptr := Sloc (Alt_Expr);
+Alt_Loc  : constant Source_Ptr := Sloc (Expression (Alt));
+
+Alt_Expr : Node_Id := Relocate_Node (Expression (Alt));
 LHS  : Node_Id;
 Stmts: List_Id;
 
@@ -5516,11 +5524,11 @@ package body Exp_Ch4 is
 
   --  Local variables
 
-  Actions : List_Id;
+  Actions  : List_Id;
   Decl : Node_Id;
   Expr : Node_Id;
+  If_Stmt  : Node_Id;
   New_Else : Node_Id;
-  New_If   : Node_Id;
   New_N: Node_Id;
   New_Then : Node_Id;
 
@@ -5585,53 +5593,42 @@ package body Exp_Ch4 is
   --  expression, and Sem_Elab circuitry removing it repeatedly.

[COMMITTED 26/30] ada: Fix minor display issue on invalid floats

2024-12-12 Thread Marc Poulhiès

From: Ronan Desplanques 

GNAT implements a format with trailing '*' signs for the Image attribute
of NaN, +inf and -inf. It was probably always intended to be the same
length as the image of 1.0, but one '*' was actually missing. This patch
fixes this.

gcc/ada/ChangeLog:

* libgnat/s-imager.adb (Image_Floating_Point): Tweak display of
invalid floating point values.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/s-imager.adb | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/libgnat/s-imager.adb b/gcc/ada/libgnat/s-imager.adb
index c598cc94e15..d19fda3b613 100644
--- a/gcc/ada/libgnat/s-imager.adb
+++ b/gcc/ada/libgnat/s-imager.adb
@@ -109,6 +109,7 @@ package body System.Image_R is
is
   pragma Assert (S'First = 1);
 
+  Fore : Natural;
begin
   --  Decide whether a blank should be prepended before the call to
   --  Set_Image_Real. We generate a blank for positive values, and
@@ -125,11 +126,13 @@ package body System.Image_R is
  --  Image_Floating_Point operation.
  S (1) := ' ';
  P := 1;
+ Fore := 1;
   else
  P := 0;
+ Fore := 2;
   end if;
 
-  Set_Image_Real (V, S, P, 1, Digs - 1, 3);
+  Set_Image_Real (V, S, P, Fore, Digs - 1, 3);
end Image_Floating_Point;
 
-
-- 
2.43.0

[COMMITTED 28/30] ada: Defend against risk of infinite loop

2024-12-12 Thread Marc Poulhiès

From: Ronan Desplanques 

A recently fixed bug caused an infinite loop when assertions were not
checked. With assertions checked, the symptom was just an internal
error caused by an assertion failure. This patch makes it so that if
another bug ever causes the same condition to fail, there will never be
an infinite loop with any assertion policy.

gcc/ada/ChangeLog:

* sem_ch3.adb (Access_Subprogram_Declaration): Replace assertion with
more defensive code.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch3.adb | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
index 11f69db21dc..5df949aa667 100644
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -1100,7 +1100,9 @@ package body Sem_Ch3 is
 | N_Protected_Type_Declaration
   loop
  D_Ityp := Parent (D_Ityp);
- pragma Assert (D_Ityp /= Empty);
+ if No (D_Ityp) then
+raise Program_Error;
+ end if;
   end loop;
 
   Set_Associated_Node_For_Itype (Desig_Type, D_Ityp);
-- 
2.43.0

[COMMITTED 21/30] ada: Fix the level of the LLVM chapter in the User's Guide

2024-12-12 Thread Marc Poulhiès

From: Sebastian Poeplau 

gcc/ada/ChangeLog:

* doc/gnat_ugn/building_executable_programs_with_gnat.rst: Move
the LLVM chapter one level up.
* gnat_ugn.texi: Regenerate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 .../building_executable_programs_with_gnat.rst   |  2 +-
 gcc/ada/gnat_ugn.texi| 12 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst 
b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
index 7870d8d7c57..48e47eb8336 100644
--- a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
+++ b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
@@ -8023,7 +8023,7 @@ replace colons with semicolons in the assignments to 
these variables.
 .. _GNATLLVM:
 
 GNAT with the LLVM Back End

+===
 
 This section outlines the usage of the GNAT compiler with the LLVM
 back end and highlights its key limitations. Certain GNAT versions,
diff --git a/gcc/ada/gnat_ugn.texi b/gcc/ada/gnat_ugn.texi
index c856dddba90..662fe1c1642 100644
--- a/gcc/ada/gnat_ugn.texi
+++ b/gcc/ada/gnat_ugn.texi
@@ -239,6 +239,7 @@ Building Executable Programs with GNAT
 * Binding with gnatbind:: 
 * Linking with gnatlink:: 
 * Using the GNU make Utility:: 
+* GNAT with the LLVM Back End:: 
 
 Building with gnatmake
 
@@ -307,7 +308,6 @@ Using the GNU make Utility
 * Automatically Creating a List of Directories:: 
 * Generating the Command Line Switches:: 
 * Overcoming Command Line Length Limits:: 
-* GNAT with the LLVM Back End:: 
 
 GNAT Utility Programs
 
@@ -7060,6 +7060,7 @@ in a GNAT context (see @ref{71,,Using the GNU make 
Utility}).
 * Binding with gnatbind:: 
 * Linking with gnatlink:: 
 * Using the GNU make Utility:: 
+* GNAT with the LLVM Back End:: 
 
 @end menu
 
@@ -17255,7 +17256,7 @@ may be useful to control the exact invocation by using 
the verbose
 switch.
 @end table
 
-@node Using the GNU make Utility,,Linking with gnatlink,Building Executable 
Programs with GNAT
+@node Using the GNU make Utility,GNAT with the LLVM Back End,Linking with 
gnatlink,Building Executable Programs with GNAT
 @anchor{gnat_ugn/building_executable_programs_with_gnat 
id48}@anchor{12f}@anchor{gnat_ugn/building_executable_programs_with_gnat 
using-the-gnu-make-utility}@anchor{71}
 @section Using the GNU @code{make} Utility
 
@@ -17277,7 +17278,6 @@ is the same, these examples use some advanced features 
found only in
 * Automatically Creating a List of Directories:: 
 * Generating the Command Line Switches:: 
 * Overcoming Command Line Length Limits:: 
-* GNAT with the LLVM Back End:: 
 
 @end menu
 
@@ -17479,7 +17479,7 @@ all:
 gnatmake $@{GNATMAKE_SWITCHES@} main_unit
 @end example
 
-@node Overcoming Command Line Length Limits,GNAT with the LLVM Back 
End,Generating the Command Line Switches,Using the GNU make Utility
+@node Overcoming Command Line Length Limits,,Generating the Command Line 
Switches,Using the GNU make Utility
 @anchor{gnat_ugn/building_executable_programs_with_gnat 
id52}@anchor{136}@anchor{gnat_ugn/building_executable_programs_with_gnat 
overcoming-command-line-length-limits}@anchor{137}
 @subsection Overcoming Command Line Length Limits
 
@@ -17537,9 +17537,9 @@ all:
 gnatmake main_unit
 @end example
 
-@node GNAT with the LLVM Back End,,Overcoming Command Line Length Limits,Using 
the GNU make Utility
+@node GNAT with the LLVM Back End,,Using the GNU make Utility,Building 
Executable Programs with GNAT
 @anchor{gnat_ugn/building_executable_programs_with_gnat 
gnat-with-the-llvm-back-end}@anchor{138}@anchor{gnat_ugn/building_executable_programs_with_gnat
 gnatllvm}@anchor{7}
-@subsection GNAT with the LLVM Back End
+@section GNAT with the LLVM Back End
 
 
 This section outlines the usage of the GNAT compiler with the LLVM
-- 
2.43.0

[committed] crc: Comment spelling fix

2024-12-12 Thread Jakub Jelinek

Hi!

"replacement is succeeded" doesn't look correct to me (though, sure, I'm
not native English speaker), this patch drops the is.

2024-12-12  Jakub Jelinek  

* gimple-crc-optimization.cc (crc_optimization::optimize_crc_loop):
Comment spelling fix, is succeeded -> succeeded.

--- gcc/gimple-crc-optimization.cc.jj   2024-12-02 11:08:52.070134518 +0100
+++ gcc/gimple-crc-optimization.cc  2024-12-11 17:50:32.562023899 +0100
@@ -227,7 +227,7 @@ class crc_optimization {
 
   /* Attempts to optimize a CRC calculation loop by replacing it with a call to
  an internal function (IFN_CRC or IFN_CRC_REV).
- Returns true if replacement is succeeded, otherwise false.  */
+ Returns true if replacement succeeded, otherwise false.  */
   bool optimize_crc_loop (gphi *output_crc);
 
  public:
@@ -1214,7 +1214,7 @@ crc_optimization::get_output_phi ()
 
 /* Attempts to optimize a CRC calculation loop by replacing it with a call to
an internal function (IFN_CRC or IFN_CRC_REV).
-   Returns true if replacement is succeeded, otherwise false.  */
+   Returns true if replacement succeeded, otherwise false.  */
 
 bool
 crc_optimization::optimize_crc_loop (gphi *output_crc)

Jakub

[RFC PATCH] expr, gimplify: Optimize !complete_p CONSTRUCTOR initialization [PR116416]

2024-12-12 Thread Jakub Jelinek

Hi!

As we don't have a SRA fix for PR117971, I thought I'd try to improve
it using an optimization during gimplification.
This is about the tree-ssa/pr78687.C testcase, which is a variant with
struct option_1
{
void *a, *b, *c, *d, *e;
};

struct option_2
{
};
variants.  Since the PR116416 changes we clear the whole object,
which is at least sizeof (size_t) + 5 * sizeof (void *) large, because
there is a single byte (option_2) zero initialized (it uses option_2()
rather than e.g. option_2{}), that causes categorize_ctor_elements
to say that the whole CONSTRUCTOR is !complete_p and we clear everything,
which kills SRA for some reason (I think SRA needs to be fixed in any case,
because the clearing because of required clearing of padding bits will
now be more common since the PR116416 changes).
So, what I wanted to do is avoid clearing everything when we know only a
single subobject is incomplete, in that case we can clear just that and not
everything else.
The patch works by extending categorize_ctor_elements to note the access
path, vector of constructor_elt indexes (purposes) for the only incomplete
subobject if the CONSTRUCTOR is !complete_p but everything except that
subobject is complete.
This is done by pushing there the complete access path on the first
incomplete subobject encountered and truncating that access path later
if we notice incomplete subobjects somewhere else.
E.g. on C23:
struct S { int a; };
struct T { struct S b; int c; struct S d; };
struct U { struct T e; int f; struct T g; };
void f0 (struct U *);

void
f1 (void)
{
  struct U u = { { {}, 1, {} }, 2, { {}, 3, {} } };
  f0 (&u);
}

void
f2 (void)
{
  struct U u = { { { 1 }, 2, { 3 } }, 4, { { 5 }, 6, {} } };
  f0 (&u);
}

void
f3 (void)
{
  struct U u = { { { }, 2, { } }, 4, { { 5 }, 6, { 7 } } };
  f0 (&u);
}
the optimization doesn't change anything in f1 (there are multiple
incomplete bits all over), changes
-  u = {};
   u.e.b.a = 1;
   u.e.c = 2;
   u.e.d.a = 3;
   u.f = 4;
   u.g.b.a = 5;
   u.g.c = 6;
+  u.g.d = {};
   f0 (&u);
in f2, only u.g.d is incomplete, and in f3
-  u = {};
+  u.e = {};
   u.e.c = 2;
   u.f = 4;
   u.g.b.a = 5;
...
(u.e has multiple incomplete subobjects, but nothing else does).
Now, when looking at the tree-ssa/pr78687.C testcase, I've noticed
that the expected
-  D.10177 = {};
+  D.10177._storage.D.9582.D.9163._tail.D.9221._tail.D.9280._head = {};
change doesn't happen there (instead the clearing is dropped)
because gimplification optimizes it away,
so I've filed PR118002 and worked on that and posted a patch for that.
Now, unfortunately with the version of the patch posted without the
cp-gimplify.cc hunk (which regressed empty1.C test) it is still removed,
so I wonder if this optimization doesn't have to use some langhook where
the C++ FE would tell that the store is to an empty base that the
gimplification hook would optimize away and so needs to try an outer
object.  Or if we need to somehow force the zero initializers in this
case anyway somehow by skipping further gimplification on it or what.
The thing is if the outer = {}; would be added (but that would need to
be verified if it wouldn't be optimized away by cp_gimplify_expr somehow),
that clears the whole thing, so just clearing some subobject of it should
be still fine, it is necessarily before we actually store some other data
members that might overlay those.

And another problem is that the patch regresses the
tree-ssa/pr90883.C test, and in that case it actually makes the emitted
code worse.  The testcase has a struct with char a[7]; int b;
members with NSDMI, previously we cleared everything, because the a
initializer was {} and thus "incomplete", even when the padding byte
doesn't have to be cleared, but it actually results in
xorl%eax, %eax
xorl%edx, %edx
ret
Now, with the patch we see that not the whole struct is incomplete
with D.2618 = { {}, 0 }, just a, and so
-  D.2618 = {};
+  D.2618.a = {};
+  D.2618.b = 0;
(note, the D.2618.b part was omitted because of the clearing of the whole).
Alexandre added recently an optimization to clear whole object anyway,
/* If the object is small enough to go in registers, and it's
   not required to be constructed in memory, clear it first.
   That will avoid wasting cycles preserving any padding bits
   that might be there, and if there aren't any, the compiler
   is smart enough to optimize the clearing out.  */
else if (complete_p <= 0
 && !TREE_ADDRESSABLE (ctor)
 && !TREE_THIS_VOLATILE (object)
 && (TYPE_MODE (type) != BLKmode || TYPE_NO_FORCE_BLK (type))
 && optimize)
  cleared = true;
but unfortunately that doesn't trigger here, because type has BLKmode here,
it is 12 bytes.  So maybe we'd need to go smarter here, don't perform the
clearing of the subobject t

[COMMITTED 27/30] ada: Avoid going through symlinks in the json report

2024-12-12 Thread Marc Poulhiès

From: Viljar Indus 

gcc/ada/ChangeLog:

* errout.adb (Write_JSON_Location): Avoid going through
symbolic links when printing the full name.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/errout.adb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/errout.adb b/gcc/ada/errout.adb
index 7e24970f187..3c499273ab7 100644
--- a/gcc/ada/errout.adb
+++ b/gcc/ada/errout.adb
@@ -2372,7 +2372,8 @@ package body Errout is
  Write_Str ("{""file"":""");
  if Full_Path_Name_For_Brief_Errors then
 Write_JSON_Escaped_String
-  (System.OS_Lib.Normalize_Pathname (Get_Name_String (Name)));
+  (System.OS_Lib.Normalize_Pathname
+ (Get_Name_String (Name), Resolve_Links => False));
  else
 Write_Name (Name);
  end if;
-- 
2.43.0

Re: [patch,avr] Add new 24-bit address space __flashx

2024-12-12 Thread Denis Chertykov

ср, 11 дек. 2024 г. в 16:56, Georg-Johann Lay :
>
> This patch adds __flashx as a new named address space that allocates
> objects in .progmemx.data.  The handling is mostly the same or similar
> to that of 24-bit space __memx, except that the output routines are
> simpler and more efficient.  Loads are emit inline when ELPMX or
> LPMX is available.  The address space uses a 24-bit addresses even
> on devices with a program memory size of 64 KiB or less.
>
> Passes without new regressions.
>
> Also passes without new regressions when used as AS in the proposed
> new target hook TARGET_ADDR_SPACE_FOR_ARTIFICIAL_RODATA, cf.
> https://gcc.gnu.org/pipermail/gcc-patches/2024-December/671216.html
>
> Ok for trunk?

Ok.
Please apply.

Denis

Re: [PATCH v2 0/4] arm: [MVE intrinsics] Rework intrinsics for loads/stores/ tuples

2024-12-12 Thread Richard Earnshaw (lists)


On 09/12/2024 15:05, Christophe Lyon wrote:

Changes v1->v2:

- Keep MAX_TUPLE_SIZE=0 and update accesses to acle_vector_types
   accordingly.

- implement arm_array_mode in patch 4/4 instead of 2/4 to avoid
   temporary regressions when running the testsuite at patch 2/4 (helps
   future bisects)



This patch series re-implements the intrinscs for loads and stores of
2- and 4- tuples using the new framework.

Most of the code is very similar to the aarch64 counterpart.

Patch #1 adds new modes, similar to aarch64's Advanced SIMD ones for
q-registers.

Patch #2 updates and fixes support for tuple types in the MVE
intrinsics framework.

Patch #3 makes use of the fixes in patch #2 to include tuples in the
store shape description.

Patch #4 is the main one, which updates a few places in arm.cc where
we handled only VALID_NEON_STRUCT_MODE: since we now have a different
VALID_MVE_STRUCT_MODE, a couple of placed needed a fix.

The introduction of all these new modes instead of just OImode and
XImode makes a few parts more verbose though.

This patch series applies on top of the previous one "Rework
store_scatter and load_gather intrinsics".

Christophe Lyon (4):
   arm: [MVE intrinsics] add modes for tuples
   arm: [MVE intrinsics] add support for tuples
   arm: [MVE intrinsics] fix store shape to support tuples
   arm: [MVE intrinsics] rework vst2q vst4q vld2q vld4q

  gcc/config/arm/arm-modes.def  |  22 +
  gcc/config/arm/arm-mve-builtins-base.cc   |  71 +++
  gcc/config/arm/arm-mve-builtins-base.def  |   8 +
  gcc/config/arm/arm-mve-builtins-base.h|   4 +
  gcc/config/arm/arm-mve-builtins-shapes.cc |   6 +-
  gcc/config/arm/arm-mve-builtins.cc|  78 ++-
  gcc/config/arm/arm-mve-builtins.h |   3 +-
  gcc/config/arm/arm.cc |  43 +-
  gcc/config/arm/arm.h  |  13 +-
  gcc/config/arm/arm_mve.h  | 628 --
  gcc/config/arm/arm_mve_builtins.def   |   4 -
  gcc/config/arm/iterators.md   |  36 +-
  gcc/config/arm/mve.md |  47 +-
  gcc/config/arm/vec-common.md  |  76 ++-
  14 files changed, 343 insertions(+), 696 deletions(-)



OK

R.

[PATCH] RISC-V: Emit vector shift pattern for const_vector [PR117353].

2024-12-12 Thread Robin Dapp

Hi,

in PR117353 and PR117878 we expand a const vector during reload.  For
this we use an unpredicated left shift.  Normally an insn like this is
split but as we introduce it late and cannot create pseudos anymore
it remains unpredicated and is not recognized by the vsetvl pass (where
we expect all insns to be in predicated RVV format).

This patch directly emits a predicated shift instead.  We could
distinguish between !lra_in_progress and lra_in_progress and emit
an unpredicated shift in the former case but we're not very likely
to optimize it anyway so it doesn't seem worth it.

Regtested on rv64gcv_zvl512b and waiting for the CI.

Regards
 Robin

PR target/117353
PR target/117878

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_const_vector): Use predicated
instead of simple shift.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr117353.c: New test.
---
 gcc/config/riscv/riscv-v.cc   |  8 +++--
 .../gcc.target/riscv/rvv/autovec/pr117353.c   | 29 +++
 2 files changed, 34 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117353.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 5c14c77068f..417c36a7587 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1439,9 +1439,11 @@ expand_const_vector (rtx target, rtx src)
  rtx shift_count
= gen_int_mode (exact_log2 (builder.npatterns ()),
builder.inner_mode ());
- rtx tmp1 = expand_simple_binop (builder.mode (), LSHIFTRT,
-vid, shift_count, NULL_RTX,
-false, OPTAB_DIRECT);
+ rtx tmp1 = gen_reg_rtx (builder.mode ());
+ rtx shift_ops[] = {tmp1, vid, shift_count};
+ emit_vlmax_insn (code_for_pred_scalar
+  (LSHIFTRT, builder.mode ()), BINARY_OP,
+  shift_ops);
 
  /* Step 3: Generate tmp2 = tmp1 * step.  */
  rtx tmp2 = gen_reg_rtx (builder.mode ());
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117353.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117353.c
new file mode 100644
index 000..135a00194c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117353.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gcv_zvl256b -mabi=lp64d" } */
+
+int *b;
+
+inline void c (char *d, int e)
+{
+  d[0] = 0;
+  d[1] = e;
+}
+
+void f ();
+
+void h ()
+{
+  for (;;)
+{
+  char *a;
+  long g = 8;
+  while (g)
+   {
+ c (a, *b);
+ b++;
+ a += 2;
+ g--;
+   }
+  f ();
+}
+}
-- 
2.47.1

Re: [PATCH 2/7]AArch64: Add SVE support for simd clones [PR96342]

2024-12-12 Thread Richard Sandiford

Tamar Christina  writes:
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins.cc
> index 
> 0fec1cd439e729dca495aac4dea054a25ede20a7..e6c2bdeb00681848a838009c833cfe3271a94049
>  100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
> @@ -998,14 +998,16 @@ static GTY(()) hash_map 
> *overload_names[2];
>  /* Record that TYPE is an ABI-defined SVE type that contains NUM_ZR SVE 
> vectors
> and NUM_PR SVE predicates.  MANGLED_NAME, if nonnull, is the ABI-defined
> mangling of the type.  ACLE_NAME is the  name of the type.  */

How about:

mangling of the type.  ACLE_NAME is the  name of the type,
or null if  does not provide the type.  */

> -static void
> +void
>  add_sve_type_attribute (tree type, unsigned int num_zr, unsigned int num_pr,
>   const char *mangled_name, const char *acle_name)
>  {
>tree mangled_name_tree
>  = (mangled_name ? get_identifier (mangled_name) : NULL_TREE);
> +  tree acle_name_tree
> += (acle_name ? get_identifier (acle_name) : NULL_TREE);
>  
> -  tree value = tree_cons (NULL_TREE, get_identifier (acle_name), NULL_TREE);
> +  tree value = tree_cons (NULL_TREE, acle_name_tree, NULL_TREE);
>value = tree_cons (NULL_TREE, mangled_name_tree, value);
>value = tree_cons (NULL_TREE, size_int (num_pr), value);
>value = tree_cons (NULL_TREE, size_int (num_zr), value);
> [...]
>  
> -  clonei->vecsize_mangle = 'n';
> +  /* If we could not determine the WDS type from available parameters/return,
> + then fallback to using uintptr_t.  */
> +  if (wds_elt_bits == 0)
> +wds_elt_bits = POINTER_SIZE;
> +
>clonei->mask_mode = VOIDmode;
>poly_uint64 simdlen;
> -  auto_vec simdlens (2);
> +  typedef struct
> +{
> +  poly_uint64 len;
> +  char mangle;
> +} aarch64_clone_info;
> +  auto_vec clones (3);

Might as well make this "auto_vec clones;".

> [...]
> +/* Helper function to adjust an SVE vector type of an SVE simd clone.  
> Returns
> +   an SVE vector type based on the element type of the vector TYPE, with 
> SIMDLEN
> +   number of elements.  If IS_MASK, returns an SVE mask type appropriate for 
> use
> +   with the SVE type it would otherwise return.  */
 
> +static tree
> +simd_clone_adjust_sve_vector_type (tree type, bool is_mask, poly_uint64 
> simdlen)
> +{
> +  unsigned int num_zr = 0;
> +  unsigned int num_pr = 0;
> +  machine_mode vector_mode;
> +  type = TREE_TYPE (type);
> +  scalar_mode scalar_m = SCALAR_TYPE_MODE (type);
> +  vector_mode = aarch64_sve_data_mode (scalar_m, simdlen).require ();
> +  type = build_vector_type_for_mode (type, vector_mode);
> +  if (is_mask)
> +{
> +  type = truth_type_for (type);
> +  num_pr = 1;
> +}
> +  else
> +num_zr = 1;
> +
> +  /* We create new types here with the SVE type attribute instead of using 
> ACLE
> + types as we need to support unpacked vectors which aren't available as
> + ACLE SVE types.  */

One thing that worried me when seeing this again is that we'll create
anonymous attributes for things that do have an ACLE type.  The anonymous
and ACLE attributes will then compare unequal.  But that will only make
a difference once we support a means of defining the implementation in
C/C++.  It might be worth adding a ??? though:

  /* ??? This creates anonymous "SVE type" attributes for all types,
 even those that correspond to  types.  This affects type
 compatibility in C/C++, but not in gimple.  (Gimple type equivalence
 is instead decided by TARGET_COMPATIBLE_VECTOR_TYPES_P.)

 Thus a C/C++ definition of the implementation function will have a
 different function type from the declaration that this code creates.
 However, it doesn't seem worth trying to fix that until we have a
 way of handling implementations that operate on unpacked types.  */

> +  type = build_distinct_type_copy (type);
> +  aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL);
> +  return type;
> +}
> +
>+/* Implement TARGET_SIMD_CLONE_ADJUST.  */
>  static void
>  aarch64_simd_clone_adjust (struct cgraph_node *node)
>  {
> -  /* Add aarch64_vector_pcs target attribute to SIMD clones so they
> - use the correct ABI.  */
> -
>tree t = TREE_TYPE (node->decl);
> -  TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
> - TYPE_ATTRIBUTES (t));
> +
> +  if (node->simdclone->vecsize_mangle == 's')
> +{
> +  /* This is additive and has no effect if SVE, or a superset thereof, is
> +  already enabled.  */
> +  tree target = build_string (strlen ("+sve") + 1, "+sve");
> +  if (!aarch64_option_valid_attribute_p (node->decl, NULL_TREE, target, 
> 0))
> + gcc_unreachable ();
> +  push_function_decl (node->decl);
> +}
> +  else
> +{
> + /* Add aarch64_vector_pcs target attribute to SIMD clones so they
> +us

Re: [PATCH] testsuite: arm: Use -mtune=cortex-m4 for thumb-ifcvt.c test

2024-12-12 Thread Torbjorn SVENSSON





On 2024-12-04 12:42, Richard Earnshaw (lists) wrote:

On 21/11/2024 19:01, Torbjörn SVENSSON wrote:

Ok for trunk and releases/gcc-14?

--

On Cortex-M4, the code generated is:
  cmp r0, r1
  ittene
  lslne   r0, r0, r1
  asrne   r0, r0, #1
  moveq   r0, r1
  add r0, r0, r1
  bx  lr

On Cortex-M7, the code generated is:
  cmp r0, r1
  beq .L3
  lslsr0, r0, r1
  asrsr0, r0, #1
  add r0, r0, r1
  bx  lr
.L3:
  mov r0, r1
  add r0, r0, r1
  bx  lr

As Cortex-M7 only allow maximum one conditional instruction, force
Cortex-M4 to have a stable test case.

gcc/testsuite/ChangeLog:

* gcc.target/arm/thumb-ifcvt.c: Use -mtune=cortex-m4.


OK.

R.


Pushed as r15-6168-ge7615f6c99f and r14.2.0-556-gcbedb3394a2.

Kind regards,
Torbjörn

Re: [PATCH] ifcombine field-merge: set upper bound for get_best_mode

2024-12-12 Thread Richard Biener

On Wed, Dec 11, 2024 at 6:32 PM Alexandre Oliva  wrote:
>
> On Dec 11, 2024, Richard Biener  wrote:
>
> > I think These 0, 0 args are supposed to indicate Maximum extent of the
> > resulting Access
>
> Thanks, that looks much better indeed.
>
>
> A bootstrap on aarch64-linux-gnu revealed that sometimes (for example,
> when building shorten_branches in final.cc) we will find such things
> as MEM , where unsigned int happens to be a variant of
> the original unsigned int type, but with 64-bit alignment.  This
> unusual alignment circumstance caused (i) get_inner_reference to not
> look inside the MEM, (ii) get_best_mode to choose DImode instead of
> SImode to access the object, so we built a BIT_FIELD_REF that
> attempted to select all 64 bits of a 32-bit object, and that failed
> gimple verification ("position plus size exceeds size of referenced
> object") because there aren't that many bits in the unsigned int
> object.
>
> This patch avoids this failure mode by limiting the bitfield range
> with the size of the inner object, if it is a known constant.
>
> This enables us to avoid creating a BIT_FIELD_REF and reusing the load
> expr, but we still introduced a separate load, that would presumably
> get optimized out, but that is easy enough to avoid in the first place
> by reusing the SSA_NAME it was originally loaded into, so I
> implemented that in make_bit_field_load.
>
> Regstrapped on x86_64-linux-gnu; tested that it fixes the known issue on
> aarch64-linux-gnu, regstrapping now.  Ok to install?

OK.

Thanks,
Richard.

>
> for  gcc/ChangeLog
>
> * gimple-fold.cc (fold_truth_andor_for_ifcombine): Limit the
> size of the bitregion in get_best_mode calls by the inner
> object's type size, if known.
> (make_bit_field_load): Reuse SSA_NAME if we're attempting to
> issue an identical load.
> ---
>  gcc/gimple-fold.cc |   52 
> ++--
>  1 file changed, 34 insertions(+), 18 deletions(-)
>
> diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> index a31fc283d51b0..9179010c9eaf1 100644
> --- a/gcc/gimple-fold.cc
> +++ b/gcc/gimple-fold.cc
> @@ -7751,6 +7751,15 @@ make_bit_field_load (location_t loc, tree inner, tree 
> orig_inner, tree type,
>if (!point)
>  return ref;
>
> +  /* If we're remaking the same load, reuse the SSA NAME it is already loaded
> + into.  */
> +  if (gimple_assign_load_p (point)
> +  && operand_equal_p (ref, gimple_assign_rhs1 (point)))
> +{
> +  gcc_checking_assert (TREE_CODE (gimple_assign_lhs (point)) == 
> SSA_NAME);
> +  return gimple_assign_lhs (point);
> +}
> +
>gimple_seq stmts = NULL;
>tree ret = force_gimple_operand (ref, &stmts, true, NULL_TREE);
>
> @@ -8204,24 +8213,27 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
> tree truth_type,
>   to be relative to a field of that size.  */
>first_bit = MIN (ll_bitpos, rl_bitpos);
>end_bit = MAX (ll_bitpos + ll_bitsize, rl_bitpos + rl_bitsize);
> -  if (get_best_mode (end_bit - first_bit, first_bit, 0, 0,
> -TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
> -volatilep, &lnmode))
> +  HOST_WIDE_INT ll_align = TYPE_ALIGN (TREE_TYPE (ll_inner));
> +  poly_uint64 ll_end_region = 0;
> +  if (TYPE_SIZE (TREE_TYPE (ll_inner))
> +  && uniform_integer_cst_p (TYPE_SIZE (TREE_TYPE (ll_inner
> +ll_end_region = tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (ll_inner)));
> +  if (get_best_mode (end_bit - first_bit, first_bit, 0, ll_end_region,
> +ll_align, BITS_PER_WORD, volatilep, &lnmode))
>  l_split_load = false;
>else
>  {
>/* Consider the possibility of recombining loads if any of the
>  fields straddles across an alignment boundary, so that either
>  part can be loaded along with the other field.  */
> -  HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
>HOST_WIDE_INT boundary = compute_split_boundary_from_align
> -   (align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
> +   (ll_align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
>
>if (boundary < 0
> - || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
> -align, BITS_PER_WORD, volatilep, &lnmode)
> - || !get_best_mode (end_bit - boundary, boundary, 0, 0,
> -align, BITS_PER_WORD, volatilep, &lnmode2))
> + || !get_best_mode (boundary - first_bit, first_bit, 0, 
> ll_end_region,
> +ll_align, BITS_PER_WORD, volatilep, &lnmode)
> + || !get_best_mode (end_bit - boundary, boundary, 0, ll_end_region,
> +ll_align, BITS_PER_WORD, volatilep, &lnmode2))
> return 0;
>
>/* If we can't have a single load, but can with two, figure out whether
> @@ -8368,16 +8380,19 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
> tree truth_type,
>  a

Re: [PATCH v2] libstdc++: add initializer_list constructor to std::span (P2447)

2024-12-12 Thread Jonathan Wakely

On Thu, 12 Dec 2024 at 14:24, Giuseppe D'Angelo
 wrote:
>
> Hi,
>
> On 12/12/2024 01:04, Jonathan Wakely wrote:
> >> I'll prepare a patch to do that,
> > Et voila:
> > https://gcc.gnu.org/pipermail/gcc-patches/2024-December/671432.html
>
> Thanks! All done, new patch is attached.

Thanks. Do you have any thoughts on my patch for the std::span assertions?


> >
> > These mem-initializers are in the wrong order (we had an existing
> > constructor with the same problem, but I pushed a fix less than an
> > hour ago).
>
> Which of course I c&p from. Are there some tests meant to be ran under
> -Wreorder?

We should be using -Wall for the whole testsuite ... but we're not,
which is strange. I'll have to look into that.

Re: [PATCH 3/7]AArch64: Disable `omp declare variant' tests for aarch64 [PR96342]

2024-12-12 Thread Richard Sandiford

Tamar Christina  writes:
> Hi All,
>
> These tests are x86 specific and shouldn't be run for aarch64.
>
> gcc/testsuite/ChangeLog:
>
>   PR target/96342
>   * c-c++-common/gomp/declare-variant-14.c: Make i?86 and x86_64 target
>   only test.
>   * gfortran.dg/gomp/declare-variant-14.f90: Likewise.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> ---
>
> diff --git a/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c 
> b/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c
> index 
> e3668893afe33a58c029cddd433d9bf43cce2bfa..2b71869787e819dc7bb8ca8f9512792ac2877515
>  100644
> --- a/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c
> +++ b/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c
> @@ -1,6 +1,6 @@
> -/* { dg-do compile { target vect_simd_clones } } */
> +/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && vect_simd_clones } 
> } } */
>  /* { dg-additional-options "-fdump-tree-gimple -fdump-tree-optimized" } */
> -/* { dg-additional-options "-mno-sse3" { target { i?86-*-* x86_64-*-* } } } 
> */
> +/* { dg-additional-options "-mno-sse3" } */

Might as well fold the -mno-sse3 into the previous dg-additional-options too.

Same for the other test.

OK with that change, thanks.

Richard

>  
>  int f01 (int);
>  int f02 (int);
> @@ -15,15 +15,13 @@ int
>  test1 (int x)
>  {
>/* At gimplification time, we can't decide yet which function to call.  */
> -  /* { dg-final { scan-tree-dump-times "f04 \\\(x" 2 "gimple" { target { 
> !aarch64*-*-* } } } } */
> +  /* { dg-final { scan-tree-dump-times "f04 \\\(x" 2 "gimple" } } */
>/* After simd clones are created, the original non-clone test1 shall
>   call f03 (score 6), the sse2/avx/avx2 clones too, but avx512f clones
>   shall call f01 with score 8.  */
>/* { dg-final { scan-tree-dump-not "f04 \\\(x" "optimized" } } */
> -  /* { dg-final { scan-tree-dump-times "f03 \\\(x" 14 "optimized" { target { 
> !aarch64*-*-* } } } } */
> -  /* { dg-final { scan-tree-dump-times "f03 \\\(x" 10 "optimized" { target { 
> aarch64*-*-* } } } } */
> -  /* { dg-final { scan-tree-dump-times "f01 \\\(x" 4 "optimized" { target { 
> !aarch64*-*-* } } } } */
> -  /* { dg-final { scan-tree-dump-times "f01 \\\(x" 0 "optimized" { target { 
> aarch64*-*-* } } } } */
> +  /* { dg-final { scan-tree-dump-times "f03 \\\(x" 14 "optimized" } } */
> +  /* { dg-final { scan-tree-dump-times "f01 \\\(x" 4 "optimized" } } */
>int a = f04 (x);
>int b = f04 (x);
>return a + b;
> diff --git a/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 
> b/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90
> index 
> 6319df0558f37b95f1b2eb17374bdb4ecbc33295..8db341fd15306a5deeae146808d7ef55aa713bb1
>  100644
> --- a/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90
> +++ b/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90
> @@ -1,6 +1,6 @@
> -! { dg-do compile { target vect_simd_clones } }
> +! { dg-do compile { target { { i?86-*-* x86_64-*-* } && vect_simd_clones } } 
> } */
>  ! { dg-additional-options "-O0 -fdump-tree-gimple -fdump-tree-optimized" }
> -! { dg-additional-options "-mno-sse3" { target { i?86-*-* x86_64-*-* } } }
> +! { dg-additional-options "-mno-sse3" }
>  
>  module main
>implicit none
> @@ -40,10 +40,8 @@ contains
>  ! call f03 (score 6), the sse2/avx/avx2 clones too, but avx512f clones
>  ! shall call f01 with score 8.
>  ! { dg-final { scan-tree-dump-not "f04 \\\(x" "optimized" } }
> -! { dg-final { scan-tree-dump-times "f03 \\\(x" 14 "optimized" { target 
> { !aarch64*-*-* } } } }
> -! { dg-final { scan-tree-dump-times "f03 \\\(x" 6 "optimized" { target { 
> aarch64*-*-* } } } }
> -! { dg-final { scan-tree-dump-times "f01 \\\(x" 4 "optimized" { target { 
> !aarch64*-*-* } } } }
> -! { dg-final { scan-tree-dump-times "f01 \\\(x" 0 "optimized" { target { 
> aarch64*-*-* } } } }
> +! { dg-final { scan-tree-dump-times "f03 \\\(x" 14 "optimized" } }
> +! { dg-final { scan-tree-dump-times "f01 \\\(x" 4 "optimized" } }
>  a = f04 (x)
>  b = f04 (x)
>  test1 = a + b

Re: [PATCH v2 2/5] testsuite: arm: Use -march=unset for bfloat16_scalar* tests

2024-12-12 Thread Torbjorn SVENSSON





On 2024-12-04 12:57, Richard Earnshaw (lists) wrote:

On 21/11/2024 14:24, Torbjörn SVENSSON wrote:

Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/bfloat16_scalar_1_1.c: Use effective-target
arm_arch_v8_2a_bf16_hard.
* gcc.target/arm/bfloat16_scalar_2_1.c: Likewise.
* gcc.target/arm/bfloat16_scalar_3_1.c: Likewise.
* gcc.target/arm/bfloat16_scalar_1_2.c: Use effective-target
arm_arch_v8_2a_bf16.
* gcc.target/arm/bfloat16_scalar_2_2.c: Likewise.
* gcc.target/arm/bfloat16_scalar_3_2.c: Likewise.
* lib/target-supports.exp: Define effective-target
v8_2a_bf16 and v8_2a_bf16_hard.

Signed-off-by: Torbjörn SVENSSON 


OK.

R.



Pushed as r15-6162-g721c689f18c.

Kind regards,
Torbjörn

[PATCH v2 1/5] RISC-V: Rename constraint c0* to k0*

2024-12-12 Thread Kito Cheng

Rename those constraint since we want define other constraint start with
`c`, those constraints are internal and undocumented, so it's fine to
rename.

gcc/ChangeLog:

* config/riscv/constraints.md (c01): Rename to...
(k01): ...this.
(c02): Rename to...
(k02): ...this.
(c03): Rename to...
(k03): ...this.
(c04): Rename to...
(k04): ...this.
(c08): Rename to...
(k08): ...this.
* config/riscv/corev.md (riscv_cv_simd_add_h_si): Update
constraints.
(riscv_cv_simd_sub_h_si): Ditto.
(riscv_cv_simd_cplxmul_i_si): Ditto.
(riscv_cv_simd_subrotmj_si): Ditto.
* config/riscv/riscv-v.cc (splat_to_scalar_move_p): Update
constraints.
* config/riscv/vector-iterators.md (stride_load_constraint):
Update constraints.
(stride_store_constraint): Ditto.
---
 gcc/config/riscv/constraints.md  |  10 +-
 gcc/config/riscv/corev.md|  10 +-
 gcc/config/riscv/riscv-v.cc  |   2 +-
 gcc/config/riscv/vector-iterators.md | 444 +--
 4 files changed, 233 insertions(+), 233 deletions(-)

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index eb5a0bb75c7..af8186117db 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -45,27 +45,27 @@ (define_constraint "J"
   (and (match_code "const_int")
(match_test "ival == 0")))
 
-(define_constraint "c01"
+(define_constraint "k01"
   "Constant value 1."
   (and (match_code "const_int")
(match_test "ival == 1")))
 
-(define_constraint "c02"
+(define_constraint "k02"
   "Constant value 2"
   (and (match_code "const_int")
(match_test "ival == 2")))
 
-(define_constraint "c03"
+(define_constraint "k03"
   "Constant value 3"
   (and (match_code "const_int")
(match_test "ival == 3")))
 
-(define_constraint "c04"
+(define_constraint "k04"
   "Constant value 4"
   (and (match_code "const_int")
(match_test "ival == 4")))
 
-(define_constraint "c08"
+(define_constraint "k08"
   "Constant value 8"
   (and (match_code "const_int")
(match_test "ival == 8")))
diff --git a/gcc/config/riscv/corev.md b/gcc/config/riscv/corev.md
index e2db8f31130..02c27043301 100644
--- a/gcc/config/riscv/corev.md
+++ b/gcc/config/riscv/corev.md
@@ -871,7 +871,7 @@ (define_insn "riscv_cv_simd_add_h_si"
[(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
(unspec:SI [(match_operand:SI 1 "register_operand" "r,r,r,r")
(match_operand:SI 2 "register_operand" "r,r,r,r")
-   (match_operand:QI 3 "const_int2_operand" "J,c01,c02,c03")]
+   (match_operand:QI 3 "const_int2_operand" "J,k01,k02,k03")]
UNSPEC_CV_ADD_H))]
"TARGET_XCVSIMD && !TARGET_64BIT"
"@
@@ -924,7 +924,7 @@ (define_insn "riscv_cv_simd_sub_h_si"
[(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
(unspec:SI [(match_operand:SI 1 "register_operand" "r,r,r,r")
(match_operand:SI 2 "register_operand" "r,r,r,r")
-   (match_operand:QI 3 "const_int2_operand" "J,c01,c02,c03")]
+   (match_operand:QI 3 "const_int2_operand" "J,k01,k02,k03")]
UNSPEC_CV_SUB_H))]
"TARGET_XCVSIMD && !TARGET_64BIT"
"@
@@ -2561,7 +2561,7 @@ (define_insn "riscv_cv_simd_cplxmul_r_si"
(unspec:SI [(match_operand:SI 1 "register_operand" "r,r,r,r")
(match_operand:SI 2 "register_operand" "r,r,r,r")
(match_operand:SI 3 "register_operand" "0,0,0,0")
-   (match_operand:QI 4 "const_int2_operand" "J,c01,c02,c03")]
+   (match_operand:QI 4 "const_int2_operand" "J,k01,k02,k03")]
UNSPEC_CV_CPLXMUL_R))]
"TARGET_XCVSIMD && !TARGET_64BIT"
"@
@@ -2578,7 +2578,7 @@ (define_insn "riscv_cv_simd_cplxmul_i_si"
(unspec:SI [(match_operand:SI 1 "register_operand" "r,r,r,r")
(match_operand:SI 2 "register_operand" "r,r,r,r")
(match_operand:SI 3 "register_operand" "0,0,0,0")
-   (match_operand:QI 4 "const_int2_operand" "J,c01,c02,c03")]
+   (match_operand:QI 4 "const_int2_operand" "J,k01,k02,k03")]
UNSPEC_CV_CPLXMUL_I))]
"TARGET_XCVSIMD && !TARGET_64BIT"
"@
@@ -2604,7 +2604,7 @@ (define_insn "riscv_cv_simd_subrotmj_si"
[(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
(unspec:SI [(match_operand:SI 1 "register_operand" "r,r,r,r")
(match_operand:SI 2 "register_operand" "r,r,r,r")
-   (match_operand:QI 3 "const_int2_operand" "J,c01,c02,c03")]
+   (match_operand:QI 3 "const_int2_operand" "J,k01,k02,k03")]
UNSPEC_CV_SUBROTMJ))]
"TARGET_XCVSIMD && !TARGET_64BIT"
"@
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 47bc0255aa3..ebba33f82ca 100644
--- a/gc

[PATCH v2 2/5] RISC-V: Add cr and cf constraint

2024-12-12 Thread Kito Cheng

gcc/ChangeLog:

* config/riscv/constraints.md (cr): New.
(cf): New.
* config/riscv/riscv.h (reg_class): Add RVC_GR_REGS and
RVC_FP_REGS.
(REG_CLASS_NAMES): Ditto.
(REG_CLASS_CONTENTS): Ditto.
* doc/md.texi: Document cr and cf constraint.
* config/riscv/riscv.cc (riscv_regno_to_class): Update
FP_REGS to RVC_FP_REGS since it smaller set.
(riscv_secondary_memory_needed): Handle RVC_FP_REGS.
(riscv_register_move_cost): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/constraint-cf-zfinx.c: New.
* gcc.target/riscv/constraint-cf.c: New.
* gcc.target/riscv/constraint-cr.c: New.
---
 gcc/config/riscv/constraints.md   |  6 
 gcc/config/riscv/riscv.cc | 28 +++
 gcc/config/riscv/riscv.h  |  6 
 gcc/doc/md.texi   |  7 +
 .../gcc.target/riscv/constraint-cf-zfinx.c| 14 ++
 .../gcc.target/riscv/constraint-cf.c  | 14 ++
 .../gcc.target/riscv/constraint-cr.c  | 13 +
 7 files changed, 77 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/constraint-cf-zfinx.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/constraint-cf.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/constraint-cr.c

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index af8186117db..2dce9832219 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -33,6 +33,12 @@ (define_register_constraint "j" "SIBCALL_REGS"
 (define_register_constraint "l" "JALR_REGS"
   "@internal")
 
+(define_register_constraint "cr" "RVC_GR_REGS"
+  "RVC general purpose register (x8-x15).")
+
+(define_register_constraint "cf" "TARGET_HARD_FLOAT ? RVC_FP_REGS : 
(TARGET_ZFINX ? RVC_GR_REGS : NO_REGS)"
+  "RVC floating-point registers (f8-f15), if available, reuse GPR as FPR when 
use zfinx.")
+
 ;; General constraints
 
 (define_constraint "I"
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7860e5fbc23..db4de34f969 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -350,14 +350,14 @@ const enum reg_class 
riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
   JALR_REGS,   JALR_REGS,  JALR_REGS,  JALR_REGS,
   JALR_REGS,   JALR_REGS,  JALR_REGS,  JALR_REGS,
   SIBCALL_REGS,SIBCALL_REGS,   SIBCALL_REGS,   SIBCALL_REGS,
-  FP_REGS, FP_REGS,FP_REGS,FP_REGS,
-  FP_REGS, FP_REGS,FP_REGS,FP_REGS,
-  FP_REGS, FP_REGS,FP_REGS,FP_REGS,
-  FP_REGS, FP_REGS,FP_REGS,FP_REGS,
-  FP_REGS, FP_REGS,FP_REGS,FP_REGS,
-  FP_REGS, FP_REGS,FP_REGS,FP_REGS,
-  FP_REGS, FP_REGS,FP_REGS,FP_REGS,
-  FP_REGS, FP_REGS,FP_REGS,FP_REGS,
+  RVC_FP_REGS, RVC_FP_REGS,RVC_FP_REGS,RVC_FP_REGS,
+  RVC_FP_REGS, RVC_FP_REGS,RVC_FP_REGS,RVC_FP_REGS,
+  RVC_FP_REGS, RVC_FP_REGS,RVC_FP_REGS,RVC_FP_REGS,
+  RVC_FP_REGS, RVC_FP_REGS,RVC_FP_REGS,RVC_FP_REGS,
+  RVC_FP_REGS, RVC_FP_REGS,RVC_FP_REGS,RVC_FP_REGS,
+  RVC_FP_REGS, RVC_FP_REGS,RVC_FP_REGS,RVC_FP_REGS,
+  RVC_FP_REGS, RVC_FP_REGS,RVC_FP_REGS,RVC_FP_REGS,
+  RVC_FP_REGS, RVC_FP_REGS,RVC_FP_REGS,RVC_FP_REGS,
   FRAME_REGS,  FRAME_REGS, NO_REGS,NO_REGS,
   NO_REGS, NO_REGS,NO_REGS,NO_REGS,
   NO_REGS, NO_REGS,NO_REGS,NO_REGS,
@@ -9500,9 +9500,11 @@ static bool
 riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1,
   reg_class_t class2)
 {
+  bool class1_is_fpr = class1 == FP_REGS || class1 == RVC_FP_REGS;
+  bool class2_is_fpr = class2 == FP_REGS || class2 == RVC_FP_REGS;
   return (!riscv_v_ext_mode_p (mode)
  && GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
- && (class1 == FP_REGS) != (class2 == FP_REGS)
+ && (class1_is_fpr != class2_is_fpr)
  && !TARGET_XTHEADFMV
  && !TARGET_ZFA);
 }
@@ -9513,8 +9515,12 @@ static int
 riscv_register_move_cost (machine_mode mode,
  reg_class_t from, reg_class_t to)
 {
-  if ((from == FP_REGS && to == GR_REGS) ||
-  (from == GR_REGS && to == FP_REGS))
+  bool from_is_fpr = from == FP_REGS || from == RVC_FP_REGS;
+  bool from_is_gpr = from == GR_REGS || from == RVC_GR_REGS;
+  bool to_is_fpr = to == FP_REGS || to == RVC_FP_REGS;
+  bool to_is_gpr = to == GR_REGS || to == RVC_GR_REGS;
+  if ((from_is_fpr && to == to_is_gpr) ||
+  (from_is_gpr && to_is_fpr))
 return tune_param->fmv_cost;
 
   return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 09de74667a9..aacb557248f 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.

Re: [PATCH 7/7]AArch64: Implement vector concat of partial SVE vectors

2024-12-12 Thread Richard Sandiford

Tamar Christina  writes:
> Hi All,
>
> This patch adds support for vector constructor from two partial SVE vectors 
> into
> a full SVE vector. It also implements support for the standard vec_init obtab 
> to
> do this.
>
> gcc/ChangeLog:
>
>   PR target/96342
>   * config/aarch64/aarch64-sve.md (vec_init): New.
>   (@aarch64_pack_partial): New.
>   * config/aarch64/aarch64.cc (aarch64_sve_expand_vector_init): Special
>   case constructors of two vectors.
>   * config/aarch64/iterators.md (SVE_NO2E, SVE_PARTIAL_NO2E): New.
>   (VHALF, Vhalf, Vwstype): Add SVE partial vectors.
>
> gcc/testsuite/ChangeLog:
>
>   PR target/96342
>   * gcc.target/aarch64/vect-simd-clone-2.c: New test.

This triggers an ICE for:

typedef unsigned int v8si __attribute__((vector_size(32)));
typedef unsigned int v16si __attribute__((vector_size(64)));

v16si __GIMPLE
foo (v8si x, v8si y)
{
  v16si res;

  res = _Literal (v16si) { x, y };
  return res;
}

compiled with -O2 -march=armv8-a+sve -msve-vector-bits=512 -fgimple.
Suggested fix below.

> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 
> 9afd11d347626eeb640722fdba2ab763b8479aa7..9e3577be6e943d7a5c951196463873d4bcfee07c
>  100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -2840,6 +2840,16 @@ (define_expand "vec_init"
>}
>  )
>  
> +(define_expand "vec_init"
> +  [(match_operand:SVE_NO2E 0 "register_operand")
> +(match_operand 1 "")]

Nit: excess indentation.

> +  "TARGET_SVE"
> +  {
> +aarch64_sve_expand_vector_init (operands[0], operands[1]);
> +DONE;
> +  }
> +)
> +
>  ;; Shift an SVE vector left and insert a scalar into element 0.
>  (define_insn "vec_shl_insert_"
>[(set (match_operand:SVE_FULL 0 "register_operand")
> @@ -9347,6 +9357,20 @@ (define_insn "vec_pack_trunc_"
>"uzp1\t%0., %1., %2."
>  )
>  
> +;; Integer partial pack packing two partial SVE types into a single full SVE
> +;; type of the same element type.  Use UZP1 on the wider type, which discards
> +;; the high part of each wide element.  This allows to concat SVE partial 
> types
> +;; into a wider vector.
> +(define_insn "@aarch64_pack_partial"
> +  [(set (match_operand:SVE_PARTIAL_NO2E 0 "register_operand" "=w")
> + (unspec:SVE_PARTIAL_NO2E
> +   [(match_operand: 1 "register_operand" "w")
> +(match_operand: 2 "register_operand" "w")]
> +   UNSPEC_PACK))]
> +  "TARGET_SVE"
> +  "uzp1\t%0., %1., %2."
> +)
> +

To fix the ICE above, I think we should define this pattern for
all SVE_NO2E.  We can also make it a vec_concat, which should work
for both endiannesses.

Rather than use Vwstype, I think this is conceptually a permute of the
containers, so should use Vctype.  That will change VNx4QI from using .h
(as in the patch) to .s (to match VNx4SI), but both work.

>  ;; -
>  ;;  [INT<-INT] Unpacks
>  ;; -
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> af6fede102c2be6673c24f8020d000ea56322997..690d54b0a2954327e00d559f96c414c81c2e18cd
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -24790,6 +24790,17 @@ aarch64_sve_expand_vector_init (rtx target, rtx vals)
>  v.quick_push (XVECEXP (vals, 0, i));
>v.finalize ();
>  
> +  /* If we have two elements and are concatting vector.  */
> +  machine_mode elem_mode = GET_MODE (v.elt (0));
> +  if (nelts == 2 && VECTOR_MODE_P (elem_mode))
> +{
> +  /* We've failed expansion using a dup.  Try using a cheeky truncate. */
> +  rtx arg0 = force_reg (elem_mode, v.elt(0));
> +  rtx arg1 = force_reg (elem_mode, v.elt(1));
> +  emit_insn (gen_aarch64_pack_partial (mode, target, arg0, arg1));
> +  return;
> +}
> +

I think it'd be better to use an independent routine for this,
since there's not really any overlap with the scalar-element code.
In particular, we might as well get the vectors directly from
XVECEXP (val, 0, ...), since we don't need the rtx_vector_builder
for the expansion.

>/* If neither sub-vectors of v could be initialized specially,
>   then use INSR to insert all elements from v into TARGET.
>   ??? This might not be optimal for vectors with large
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 
> 023893d35f3e955e222c322ce370e84c95c29ee6..77d23d6ad795630d3d5fb5c076c086a479d46fee
>  100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -138,6 +138,14 @@ (define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI])
>  ;; VQ without 2 element modes.
>  (define_mode_iterator VQ_NO2E

Re: [RFC][PATCH] AArch64: Remove AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS

2024-12-12 Thread Jennifer Schmitz



> On 6 Dec 2024, at 08:41, Jennifer Schmitz  wrote:
> 
> 
> 
>> On 5 Dec 2024, at 20:07, Richard Sandiford  wrote:
>> 
>> External email: Use caution opening links or attachments
>> 
>> 
>> Jennifer Schmitz  writes:
 On 5 Dec 2024, at 11:44, Richard Biener  wrote:
 
 External email: Use caution opening links or attachments
 
 
 On Thu, 5 Dec 2024, Jennifer Schmitz wrote:
 
> 
> 
>> On 17 Oct 2024, at 19:23, Richard Sandiford  
>> wrote:
>> 
>> External email: Use caution opening links or attachments
>> 
>> 
>> Jennifer Schmitz  writes:
>>> [...]
>>> Looking at the diff of the vect dumps (below is a section of the diff 
>>> for strided_store_2.c), it seemed odd that vec_to_scalar operations 
>>> cost 0 now, instead of the previous cost of 2:
>>> 
>>> +strided_store_1.c:38:151: note:=== vectorizable_operation ===
>>> +strided_store_1.c:38:151: note:vect_model_simple_cost: inside_cost 
>>> = 1, prologue_cost  = 0 .
>>> +strided_store_1.c:38:151: note:   ==> examining statement: *_6 = _7;
>>> +strided_store_1.c:38:151: note:   vect_is_simple_use: operand _3 + 
>>> 1.0e+0, type of def:internal
>>> +strided_store_1.c:38:151: note:   Vectorizing an unaligned access.
>>> +Applying pattern match.pd:236, generic-match-9.cc:4128
>>> +Applying pattern match.pd:5285, generic-match-10.cc:4234
>>> +strided_store_1.c:38:151: note:   vect_model_store_cost: inside_cost = 
>>> 12, prologue_cost = 0 .
>>> *_2 1 times unaligned_load (misalign -1) costs 1 in body
>>> -_3 + 1.0e+0 1 times scalar_to_vec costs 1 in prologue
>>> _3 + 1.0e+0 1 times vector_stmt costs 1 in body
>>> -_7 1 times vec_to_scalar costs 2 in body
>>> + 1 times vector_load costs 1 in prologue
>>> +_7 1 times vec_to_scalar costs 0 in body
>>> _7 1 times scalar_store costs 1 in body
>>> -_7 1 times vec_to_scalar costs 2 in body
>>> +_7 1 times vec_to_scalar costs 0 in body
>>> _7 1 times scalar_store costs 1 in body
>>> -_7 1 times vec_to_scalar costs 2 in body
>>> +_7 1 times vec_to_scalar costs 0 in body
>>> _7 1 times scalar_store costs 1 in body
>>> -_7 1 times vec_to_scalar costs 2 in body
>>> +_7 1 times vec_to_scalar costs 0 in body
>>> _7 1 times scalar_store costs 1 in body
>>> 
>>> Although the aarch64_use_new_vector_costs_p flag was used in multiple 
>>> places in aarch64.cc, the location that causes this behavior is this 
>>> one:
>>> unsigned
>>> aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
>>>  stmt_vec_info stmt_info, slp_tree,
>>>  tree vectype, int misalign,
>>>  vect_cost_model_location where)
>>> {
>>> [...]
>>> /* Try to get a more accurate cost by looking at STMT_INFO instead
>>>  of just looking at KIND.  */
>>> -  if (stmt_info && aarch64_use_new_vector_costs_p ())
>>> +  if (stmt_info)
>>> {
>>>   /* If we scalarize a strided store, the vectorizer costs one
>>>  vec_to_scalar for each element.  However, we can store the first
>>>  element using an FP store without a separate extract step.  */
>>>   if (vect_is_store_elt_extraction (kind, stmt_info))
>>> count -= 1;
>>> 
>>>   stmt_cost = aarch64_detect_scalar_stmt_subtype (m_vinfo, kind,
>>>   stmt_info, stmt_cost);
>>> 
>>>   if (vectype && m_vec_flags)
>>> stmt_cost = aarch64_detect_vector_stmt_subtype (m_vinfo, kind,
>>> stmt_info, vectype,
>>> where, stmt_cost);
>>> }
>>> [...]
>>> return record_stmt_cost (stmt_info, where, (count * stmt_cost).ceil ());
>>> }
>>> 
>>> Previously, for mtune=generic, this function returned a cost of 2 for a 
>>> vec_to_scalar operation in the vect body. Now "if (stmt_info)" is 
>>> entered and "if (vect_is_store_elt_extraction (kind, stmt_info))" 
>>> evaluates to true, which sets the count to 0 and leads to a return 
>>> value of 0.
>> 
>> At the time the code was written, a scalarised store would be costed
>> using one vec_to_scalar call into the backend, with the count parameter
>> set to the number of elements being stored.  The "count -= 1" was
>> supposed to lop off the leading element extraction, since we can store
>> lane 0 as a normal FP store.
>> 
>> The target-independent costing was later reworked so that it costs
>> each operation individually:
>> 
>>   for (i = 0; i < nstores; i++)
>> {
>>   if (costing_p)
>> {
>>   /* Only need vector extracting when there are more
>>

Re: [PATCH] testsuite: arm: Update expected RTL for reg_equal_test.c test

2024-12-12 Thread Torbjorn SVENSSON





On 2024-12-12 12:26, Richard Earnshaw (lists) wrote:

On 10/11/2024 13:38, Torbjörn SVENSSON wrote:

Hi Richard,

I'm not sure if I'm doing something wrong here, or if it was an oversight
when doing the update in r12-8108-g62082d278d1.
Anyway, the commit message suggest that it's only the constant that is of
interrest, so I updated the test to only check the constant. Do you think
this is enough, or is should the test case also verify that it's used in
a "set" expression?

Ok for trunk and releases/gcc-14?

--

The test case was re-writtend in r12-8108-g62082d278d1, but the expected
RTL was not updated.

The diff for the generated reg_equal_test.c.*r.expand files produced by
r12-8108-g62082d278d1 and r15-5047-g7e1d9f58858 is:

--- reg_equal_test.c.253r.expand-r12-8108-g62082d278d1  2024-11-10 
14:24:54.957438394 +0100
+++ reg_equal_test.c.268r.expand-r15-5047-g7e1d9f58858  2024-11-10 
14:30:13.633437178 +0100

@@ -1,5 +1,5 @@

-;; Function x (x, funcdef_no=0, decl_uid=4195, cgraph_uid=1, 
symbol_order=0)
+;; Function x (x, funcdef_no=0, decl_uid=4590, cgraph_uid=1, 
symbol_order=0)


  ;; Generating RTL for gimple basic block 2
@@ -25,6 +25,6 @@
  (note 1 0 3 NOTE_INSN_DELETED)
  (note 3 1 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
-(insn 5 2 0 2 (set (reg/v:SI 113 [ d ])
+(insn 5 2 0 2 (set (reg/v:SI 114 [ d ])
  (const_int -942519458 [0xc7d24b5e])) -1
   (nil))



That's not what I see if I compile with "-march=armv8-a -mthumb".  I get 
the reg_equal note that I expect and the insn is something like:


(insn 6 5 0 2 (set (zero_extract:SI (reg/v:SI 114 [ d ])
     (const_int 16 [0x10])
     (const_int 16 [0x10]))
     (const_int 51154 [0xc7d2])) -1
  (expr_list:REG_EQUAL (const_int -942519458 [0xc7d24b5e])
     (nil)))

Can you tell me the exact options you were using to get your output?


Hmm.. This is interesting. With Cortex-A, I do see the same output that 
you get. With Cortex-M, it's instead my output.


You can get my output with any of the Cortex-M targets (M3 or above):

This is the line that I've used
arm-none-eabi-gcc gcc.target/arm/reg_equal_test.c  -mthumb 
-march=armv8.1-m.main -mfloat-abi=soft -fgimple -O1 -fdump-rtl-expand -S 
-o /dev/null


I suppose the change I propose will match both cases, but is there any 
backside of not checking the REG_EQUAL part?

Should the test case be Cortex-A only?

Kind regards,
Torbjörn




R.


In both versions, the constant is simply assigned, thus I updated the
expected RTL accordingly.

gcc/testsuite/ChangeLog:

* gcc.target/arm/reg_equal_test.c: Update expected RTL.

Signed-off-by: Torbjörn SVENSSON 
---
  gcc/testsuite/gcc.target/arm/reg_equal_test.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/reg_equal_test.c b/gcc/ 
testsuite/gcc.target/arm/reg_equal_test.c

index d87c75cc27c..4337e3f0af5 100644
--- a/gcc/testsuite/gcc.target/arm/reg_equal_test.c
+++ b/gcc/testsuite/gcc.target/arm/reg_equal_test.c
@@ -12,4 +12,4 @@ x ()
    return;
  }
-/* { dg-final { scan-rtl-dump "expr_list:REG_EQUAL \\(const_int 
-942519458" "expand" } } */

+/* { dg-final { scan-rtl-dump "\\(const_int -942519458" "expand" } } */

[PATCH v2 3/5] RISC-V: Rename internal operand modifier N to n

2024-12-12 Thread Kito Cheng

Here is a purposal that using N for printing register encoding number,
so let rename the existing internal operand modifier `N` to `n`.

gcc/ChangeLog:

* config/riscv/corev.md (*cv_branch): Update modifier.
(*branch): Ditto.
* config/riscv/riscv.cc (riscv_print_operand): Update modifier.
* config/riscv/riscv.md (*branch): Update modifier.
---
 gcc/config/riscv/corev.md | 4 ++--
 gcc/config/riscv/riscv.cc | 4 ++--
 gcc/config/riscv/riscv.md | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/corev.md b/gcc/config/riscv/corev.md
index 02c27043301..3c0e9cecdfb 100644
--- a/gcc/config/riscv/corev.md
+++ b/gcc/config/riscv/corev.md
@@ -2627,7 +2627,7 @@ (define_insn "*cv_branch"
   "TARGET_XCVBI"
 {
   if (get_attr_length (insn) == 12)
-return "cv.b%N1\t%2,%z3,1f; jump\t%l0,ra; 1:";
+return "cv.b%n1\t%2,%z3,1f; jump\t%l0,ra; 1:";
 
   return "cv.b%C1imm\t%2,%3,%0";
 }
@@ -2645,7 +2645,7 @@ (define_insn "*branch"
   "TARGET_XCVBI"
 {
   if (get_attr_length (insn) == 12)
-return "b%N1\t%2,%z3,1f; jump\t%l0,ra; 1:";
+return "b%n1\t%2,%z3,1f; jump\t%l0,ra; 1:";
 
   return "b%C1\t%2,%z3,%l0";
 }
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index db4de34f969..b637cee5362 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -6823,7 +6823,7 @@ riscv_asm_output_opcode (FILE *asm_out_file, const char 
*p)
  any outermost HIGH.
'R' Print the low-part relocation associated with OP.
'C' Print the integer branch condition for comparison OP.
-   'N' Print the inverse of the integer branch condition for comparison OP.
+   'n' Print the inverse of the integer branch condition for comparison OP.
'A' Print the atomic operation suffix for memory model OP.
'I' Print the LR suffix for memory model OP.
'J' Print the SC suffix for memory model OP.
@@ -6981,7 +6981,7 @@ riscv_print_operand (FILE *file, rtx op, int letter)
   fputs (GET_RTX_NAME (code), file);
   break;
 
-case 'N':
+case 'n':
   /* The RTL names match the instruction names. */
   fputs (GET_RTX_NAME (reverse_condition (code)), file);
   break;
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 3a4cd1d93a0..1eec51c117a 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3215,7 +3215,7 @@ (define_insn "*branch"
   "!TARGET_XCVBI"
 {
   if (get_attr_length (insn) == 12)
-return "b%N1\t%2,%z3,1f; jump\t%l0,ra; 1:";
+return "b%n1\t%2,%z3,1f; jump\t%l0,ra; 1:";
 
   return "b%C1\t%2,%z3,%l0";
 }
-- 
2.34.1

[PATCH v2 4/5] RISC-V: Implment N modifier for printing the register number rather than the register name

2024-12-12 Thread Kito Cheng

The modifier `N`, to print the raw encoding of a register. This is used
when using `.insn , `, where the user wants to pass
a value to the instruction in a known register, but where the
instruction doesn't follow the existing instruction formats, so the
assembly parser is not expecting a register name, just a raw integer.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_print_operand): Add N.
* doc/extend.texi: Document for N,

gcc/testsuite/ChangeLog:

* gcc.target/riscv/modifier-N-fpr.c: New.
* gcc.target/riscv/modifier-N-vr.c: New.
* gcc.target/riscv/modifier-N.c: New.
---
 gcc/config/riscv/riscv.cc | 23 +++
 gcc/doc/extend.texi   |  1 +
 .../gcc.target/riscv/modifier-N-fpr.c | 16 +
 .../gcc.target/riscv/modifier-N-vr.c  | 18 +++
 gcc/testsuite/gcc.target/riscv/modifier-N.c   | 16 +
 5 files changed, 74 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/modifier-N-fpr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/modifier-N-vr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/modifier-N.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index b637cee5362..684e5d24082 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -6833,6 +6833,7 @@ riscv_asm_output_opcode (FILE *asm_out_file, const char 
*p)
'S' Print shift-index of single-bit mask OP.
'T' Print shift-index of inverted single-bit mask OP.
'~' Print w if TARGET_64BIT is true; otherwise not print anything.
+   'N'  Print register encoding as integer (0-31).
 
Note please keep this list and the list in riscv.md in sync.  */
 
@@ -7079,6 +7080,28 @@ riscv_print_operand (FILE *file, rtx op, int letter)
output_addr_const (file, newop);
break;
   }
+case 'N':
+  {
+   if (!REG_P(op))
+ {
+   output_operand_lossage ("modifier 'N' require register operand");
+   break;
+ }
+
+   unsigned regno = REGNO (op);
+   unsigned offset = 0;
+   if (IN_RANGE (regno, GP_REG_FIRST, GP_REG_LAST))
+ offset = GP_REG_FIRST;
+   else if (IN_RANGE (regno, FP_REG_FIRST, FP_REG_LAST))
+ offset = FP_REG_FIRST;
+   else if (IN_RANGE (regno, V_REG_FIRST, V_REG_LAST))
+ offset = V_REG_FIRST;
+   else
+ output_operand_lossage ("invalid register number for 'N' modifie");
+
+   asm_fprintf (file, "%u", (regno - offset));
+   break;
+  }
 default:
   switch (code)
{
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 336361e0cfd..c9dea0fe6eb 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -12586,6 +12586,7 @@ The list below describes the supported modifiers and 
their effects for RISC-V.
 @headitem Modifier @tab Description
 @item @code{z} @tab Print ''@code{zero}'' instead of 0 if the operand is an 
immediate with a value of zero.
 @item @code{i} @tab Print the character ''@code{i}'' if the operand is an 
immediate.
+@item @code{N} @tab Print the register encoding as integer (0 - 31).
 @end multitable
 
 @anchor{shOperandmodifiers}
diff --git a/gcc/testsuite/gcc.target/riscv/modifier-N-fpr.c 
b/gcc/testsuite/gcc.target/riscv/modifier-N-fpr.c
new file mode 100644
index 000..42590e00c0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/modifier-N-fpr.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64if -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void foo() {
+/*
+** foo:
+**   ...
+**   fadd.s\s*ft0,\s*8,\s*9
+**   ...
+*/
+  register float fs0 __asm__ ("fs0");
+  register float fs1 __asm__ ("fs1");
+  __asm__ volatile("fadd.s ft0, %N0, %N1" : : "f" (fs0), "f" (fs1) : "memory");
+}
diff --git a/gcc/testsuite/gcc.target/riscv/modifier-N-vr.c 
b/gcc/testsuite/gcc.target/riscv/modifier-N-vr.c
new file mode 100644
index 000..ea591b02138
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/modifier-N-vr.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gv -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma riscv intrinsic "vector"
+
+void foo() {
+/*
+** foo:
+**   ...
+**   vadd.vv\s*v0,\s*1,\s*2
+**   ...
+*/
+  register vint32m1_t v1 __asm__ ("v1");
+  register vint32m1_t v2 __asm__ ("v2");
+  __asm__ volatile("vadd.vv v0, %N0, %N1" : : "vr" (v1), "vr" (v2) : "memory");
+}
diff --git a/gcc/testsuite/gcc.target/riscv/modifier-N.c 
b/gcc/testsuite/gcc.target/riscv/modifier-N.c
new file mode 100644
index 000..fef281611ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/modifier-N.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+
+void foo() {
+/*
+** foo:
+

Re: [PATCH] arm: [MVE intrinsics] Fix condition for vec_extract patterns

2024-12-12 Thread Richard Earnshaw (lists)


On 14/11/2024 10:41, Christophe Lyon wrote:

Remove floating-point condition from mve_vec_extract_sext_internal and
mve_vec_extract_zext_internal, since the MVE_2 iterator does not
include any FP mode.

gcc/ChangeLog:

* config/arm/mve.md (mve_vec_extract_sext_internal): Fix
condition.
(mve_vec_extract_zext_internal): Likewise.
---
  gcc/config/arm/mve.md | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index a0a59da4040..70f6ec6c2cc 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -4191,7 +4191,7 @@ (define_insn "*mve_vec_extract_sext_internal"
   (match_operand:MVE_2 1 "s_register_operand" "w")
   (parallel [(match_operand:SI 2 "immediate_operand" "i")]]
"(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (mode))
-   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (mode))"
+   "


I'd just put the closing quote on the previous line.  It looks odd 
dangling like that.

  {
if (BYTES_BIG_ENDIAN)
  {
@@ -4210,7 +4210,7 @@ (define_insn "*mve_vec_extract_zext_internal"
   (match_operand:MVE_2 1 "s_register_operand" "w")
   (parallel [(match_operand:SI 2 "immediate_operand" "i")]]
"(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (mode))
-   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (mode))"
+   "

Same


  {
if (BYTES_BIG_ENDIAN)
  {


Otherwise OK.

R.

Re: [PATCH] Add COBOL to gcc

2024-12-12 Thread Richard Biener

On Wed, Dec 11, 2024 at 4:19 PM James K. Lowden
 wrote:
>
> I think the term of art is "ping"?
>
> If GCC needs something from me to proceed with this, please tell me what
> it is.

I think we're waiting on the rest of the patches adding the Cobol
frontend at this point.

Richard.

> --jkl
>
> On Thu, 7 Nov 2024 17:28:33 -0500
> "James K. Lowden"  wrote:
>
> > On Fri, 8 Nov 2024 13:52:55 +0100
> > Jakub Jelinek  wrote:
> >
> > > Rather than a diff from /dev/null,
> > > > it's a blob with the exact file contents.  I hope it is correct in
> > > > this form.
> > >
> > > That is just how the web git viewer presents new file commits.
> > > On gcc-patches those should be posted as normal patches.
> >
> > Below is hopefully a well formed patch.  It adds ChangeLogs for the
> > COBOL front end.
> >
> > [snip]
> > From 304f3678dbade1f60abdadb9ddd2baffae88013dpre.patch 4 Oct 2024
> > 12:01:22 -0400 From: "James K. Lowden" 
> > Date: Fri 08 Nov 2024 03:30:08 PM EST
> > Subject: [PATCH]  Add 'cobol' to 2 files
> >
> > gcc/cobol/ChangeLog   [new file with mode: 0644]
> > libgcobol/ChangeLog   [new file with mode: 0644]
> >
> > ---
> > gcc/cobol/ChangeLog | ++-
> > libgcobol/ChangeLog | ++
> > 2 files changed, 12 insertions(+), 2 deletions(-)
> > diff --git a/gcc/cobol/ChangeLog b/gcc/cobol/ChangeLog
> > new file mode 100644
> > index 000..2988f44a1f1
> > --- /dev/null
> > +++ b/gcc/cobol/ChangeLog
> > @@ -0,0 +1,6 @@
> > +^L
> > +Copyright (C) 2022 Free Software Foundation, Inc.
> > +
> > +Copying and distribution of this file, with or without modification,
> > +are permitted in any medium without royalty provided the copyright
> > +notice and this notice are preserved.
> > diff --git a/libgcobol/ChangeLog b/libgcobol/ChangeLog
> > new file mode 100644
> > index 000..2988f44a1f1
> > --- /dev/null
> > +++ b/libgcobol/ChangeLog
> > @@ -0,0 +1,6 @@
> > +^L
> > +Copyright (C) 2022 Free Software Foundation, Inc.
> > +
> > +Copying and distribution of this file, with or without modification,
> > +are permitted in any medium without royalty provided the copyright
> > +notice and this notice are preserved.
> > [pins]

Re: [PATCH] Add COBOL to gcc

2024-12-12 Thread Jakub Jelinek

On Wed, Dec 11, 2024 at 11:39:51AM -0500, James K. Lowden wrote:
> On Thu, 12 Dec 2024 15:07:35 +0100
> Richard Biener  wrote:
> 
> > On Wed, Dec 11, 2024 at 4:19?PM James K. Lowden
> >  wrote:
> > >
> > > I think the term of art is "ping"?
> > >
> > > If GCC needs something from me to proceed with this, please tell me
> > > what it is.
> > 
> > I think we're waiting on the rest of the patches adding the Cobol
> > frontend at this point.
> 
> Ah, the ever careful cross of wires!  I thought I understood Jakub to
> have asked me to hold off sending further patches until these were
> committed.  

No, that wasn't the intention.
What I wanted is that everything is posted, if/when everything is approved,
then some changes need to be committed first (the contrib/ changes and patch
adding the almost empty ChangeLog files), then one of us needs to do some
updates on the server and only when that is in effect the some the rest
can be committed.
And in all cases, all the commits should be ordered not to break building of
gcc, even temporarily.

Jakub

Re: [PATCH] Add COBOL to gcc

2024-12-12 Thread Richard Biener

On Thu, Dec 12, 2024 at 4:05 PM James K. Lowden
 wrote:
>
> On Thu, 12 Dec 2024 15:07:35 +0100
> Richard Biener  wrote:
>
> > On Wed, Dec 11, 2024 at 4:19?PM James K. Lowden
> >  wrote:
> > >
> > > I think the term of art is "ping"?
> > >
> > > If GCC needs something from me to proceed with this, please tell me
> > > what it is.
> >
> > I think we're waiting on the rest of the patches adding the Cobol
> > frontend at this point.
>
> Ah, the ever careful cross of wires!  I thought I understood Jakub to
> have asked me to hold off sending further patches until these were
> committed.

No, we just need to commit those first when merging things, but don't
expect any parts to be actually committed until all the rest of the patches
gone though review.

Richard.

> Sancho!  Fetch me my steed!
>
> --jkl

Re: [PATCH 1/2]AArch64: Add CMP+CSEL and CMP+CSET for cores that support it

2024-12-12 Thread Kyrylo Tkachov

Thanks for doing this Tamar,

> On 11 Dec 2024, at 10:54, Tamar Christina  wrote:
> 
>> -Original Message-
>> From: Richard Sandiford 
>> Sent: Wednesday, December 11, 2024 9:50 AM
>> To: Tamar Christina 
>> Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw
>> ; ktkac...@gcc.gnu.org
>> Subject: Re: [PATCH 1/2]AArch64: Add CMP+CSEL and CMP+CSET for cores that
>> support it
>> 
>> Tamar Christina  writes:
 -Original Message-
 From: Richard Sandiford 
 Sent: Wednesday, December 11, 2024 9:32 AM
 To: Tamar Christina 
 Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw
 ; ktkac...@gcc.gnu.org
 Subject: Re: [PATCH 1/2]AArch64: Add CMP+CSEL and CMP+CSET for cores that
 support it
 
 Tamar Christina  writes:
> Hi All,
> 
> GCC 15 added two new fusions CMP+CSEL and CMP+CSET.
> 
> This patch enables them for cores that support based on their Software
> Optimization Guides and generically on Armv9-A.   Even if a core does not
> support it there's no negative performance impact.
> 
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> * config/aarch64/aarch64-fusion-pairs.def
 (AARCH64_FUSE_NEOVERSE_BASE):
> New.
> * config/aarch64/tuning_models/cortexx925.h: Use it.
> * config/aarch64/tuning_models/generic_armv9_a.h: Use it.
> * config/aarch64/tuning_models/neoverse512tvb.h: Use it.
> * config/aarch64/tuning_models/neoversen2.h: Use it.
> * config/aarch64/tuning_models/neoversen3.h: Use it.
> * config/aarch64/tuning_models/neoversev1.h: Use it.
> * config/aarch64/tuning_models/neoversev2.h: Use it.
> * config/aarch64/tuning_models/neoversev3.h: Use it.
> * config/aarch64/tuning_models/neoversev3ae.h: Use it.
> 
> ---
> 
> diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def
 b/gcc/config/aarch64/aarch64-fusion-pairs.def
> index
 
>> f8413ab0c802c28290ebcc171bfd131622cb33be..0123430d988b96b20d20376
 df9ae7a196031286d 100644
> --- a/gcc/config/aarch64/aarch64-fusion-pairs.def
> +++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
> @@ -45,4 +45,8 @@ AARCH64_FUSION_PAIR ("cmp+cset", CMP_CSET)
> /* Baseline fusion settings suitable for all cores.  */
> #define AARCH64_FUSE_BASE (AARCH64_FUSE_CMP_BRANCH |
 AARCH64_FUSE_AES_AESMC)
> 
> +/* Baseline fusion settings suitable for all Neoverse cores.  */
> +#define AARCH64_FUSE_NEOVERSE_BASE (AARCH64_FUSE_BASE |
 AARCH64_FUSE_CMP_CSEL \
> +| AARCH64_FUSE_CMP_CSET)
> +
> #define AARCH64_FUSE_MOVK (AARCH64_FUSE_MOV_MOVK |
 AARCH64_FUSE_MOVK_MOVK)
> diff --git a/gcc/config/aarch64/tuning_models/cortexx925.h
 b/gcc/config/aarch64/tuning_models/cortexx925.h
> index
 
>> eb9b89984b0472858bc08dba924c962ec4ba53bd..b3ae1576ade1f701b775496
 def277230e193d20f 100644
> --- a/gcc/config/aarch64/tuning_models/cortexx925.h
> +++ b/gcc/config/aarch64/tuning_models/cortexx925.h
> @@ -205,7 +205,7 @@ static const struct tune_params cortexx925_tunings =
> 2 /* store_pred.  */
>   }, /* memmov_cost.  */
>   10, /* issue_rate  */
> -  AARCH64_FUSE_BASE, /* fusible_ops  */
> +  AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops  */
>   "32:16", /* function_align.  */
>   "4", /* jump_align.  */
>   "32:16", /* loop_align.  */
> diff --git a/gcc/config/aarch64/tuning_models/generic_armv9_a.h
 b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
> index
 
>> 48353a59939d84647c6981d6d0551af7ce9df751..e971e645dfc4b805b7f994a1
 5a5df7803ff4dc6c 100644
> --- a/gcc/config/aarch64/tuning_models/generic_armv9_a.h
> +++ b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
> @@ -236,7 +236,7 @@ static const struct tune_params
 generic_armv9_a_tunings =
> 1 /* store_pred.  */
>   }, /* memmov_cost.  */
>   3, /* issue_rate  */
> -  AARCH64_FUSE_BASE, /* fusible_ops  */
> +  AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops  */
>   "32:16", /* function_align.  */
>   "4", /* jump_align.  */
>   "32:16", /* loop_align.  */
 
 Having AARCH64_FUSE_NEOVERSE_BASE seems like a good thing, but I think
 we have to be careful about using it for generic tuning.  generic-armv9-a
 mustn't become "generic Neoverse", since it's supposed to be good for
 non-Neoverse (and non-Arm) Armv9-A cores as well.
 
 So perhaps here we should expand the macro.  Alternatively, we could add
 a comment saying that fusion macros for other CPUs can be added here as
 well, if they are unlikely to have a negative impact on other cores.
 
 Perhaps we should expand the macro for cortex-x925 as well.
>>> 
>>> Sorry that certainly was not the intention, I thought about naming it
>>> AARCH64_FUSE_ARMV9_BASE instead,  but since I was also u

Re: [PATCH v2] libstdc++: add initializer_list constructor to std::span (P2447)

2024-12-12 Thread Giuseppe D'Angelo


Hi,

On 12/12/2024 01:04, Jonathan Wakely wrote:

I'll prepare a patch to do that,

Et voila:
https://gcc.gnu.org/pipermail/gcc-patches/2024-December/671432.html


Thanks! All done, new patch is attached.



These mem-initializers are in the wrong order (we had an existing
constructor with the same problem, but I pushed a fix less than an
hour ago).


Which of course I c&p from. Are there some tests meant to be ran under 
-Wreorder?


Cheers,
--
Giuseppe D'Angelo
From ccbf0a10faaeb38ee3de1336be53926f021236d0 Mon Sep 17 00:00:00 2001
From: Giuseppe D'Angelo 
Date: Tue, 3 Dec 2024 16:56:45 +0100
Subject: [PATCH] libstdc++: add initializer_list constructor to std::span
 (P2447R6)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit implements P2447R6. The code is straightforward (just one
extra constructor, with constraints and conditional explicit).

I decided to suppress -Winit-list-lifetime because otherwise it would
give too many false positives. The new constructor is meant to be used
as a parameter-passing interface (this is a design choice, see
P2447R6/§2) and, as such, the initializer_list won't dangle despite
GCC's warnings.

The new constructor isn't 100% backwards compatible. A couple of
examples are included in Annex C, but I have also lifted some more
from R4. A new test checks for the old and the new behaviors.

libstdc++-v3/ChangeLog:

	* include/bits/version.def: Added the new feature-testing macro.
	* include/bits/version.h (defined): Regenerated.
	* include/std/span: Added constructor from initializer_list.
	* testsuite/23_containers/span/init_list_cons.cc: New test.
	* testsuite/23_containers/span/init_list_cons_neg.cc: New test.
---
 libstdc++-v3/include/bits/version.def |  8 +++
 libstdc++-v3/include/bits/version.h   | 10 +++
 libstdc++-v3/include/std/span | 17 +
 .../23_containers/span/init_list_cons.cc  | 65 +++
 .../23_containers/span/init_list_cons_neg.cc  | 31 +
 5 files changed, 131 insertions(+)
 create mode 100644 libstdc++-v3/testsuite/23_containers/span/init_list_cons.cc
 create mode 100644 libstdc++-v3/testsuite/23_containers/span/init_list_cons_neg.cc

diff --git a/libstdc++-v3/include/bits/version.def b/libstdc++-v3/include/bits/version.def
index 8d4b8e9b383..cfa0469fb2d 100644
--- a/libstdc++-v3/include/bits/version.def
+++ b/libstdc++-v3/include/bits/version.def
@@ -1853,6 +1853,14 @@ ftms = {
   };
 };
 
+ftms = {
+  name = span_initializer_list;
+  values = {
+v = 202311;
+cxxmin = 26;
+  };
+};
+
 ftms = {
   name = text_encoding;
   values = {
diff --git a/libstdc++-v3/include/bits/version.h b/libstdc++-v3/include/bits/version.h
index c556aca38fa..6a2c66bdf81 100644
--- a/libstdc++-v3/include/bits/version.h
+++ b/libstdc++-v3/include/bits/version.h
@@ -2055,6 +2055,16 @@
 #endif /* !defined(__cpp_lib_saturation_arithmetic) && defined(__glibcxx_want_saturation_arithmetic) */
 #undef __glibcxx_want_saturation_arithmetic
 
+#if !defined(__cpp_lib_span_initializer_list)
+# if (__cplusplus >  202302L)
+#  define __glibcxx_span_initializer_list 202311L
+#  if defined(__glibcxx_want_all) || defined(__glibcxx_want_span_initializer_list)
+#   define __cpp_lib_span_initializer_list 202311L
+#  endif
+# endif
+#endif /* !defined(__cpp_lib_span_initializer_list) && defined(__glibcxx_want_span_initializer_list) */
+#undef __glibcxx_want_span_initializer_list
+
 #if !defined(__cpp_lib_text_encoding)
 # if (__cplusplus >  202302L) && _GLIBCXX_HOSTED && (_GLIBCXX_USE_NL_LANGINFO_L)
 #  define __glibcxx_text_encoding 202306L
diff --git a/libstdc++-v3/include/std/span b/libstdc++-v3/include/std/span
index 67227348c90..9f2fd2ce8d6 100644
--- a/libstdc++-v3/include/std/span
+++ b/libstdc++-v3/include/std/span
@@ -39,6 +39,7 @@
 #endif
 
 #define __glibcxx_want_span
+#define __glibcxx_want_span_initializer_list
 #include 
 
 #ifdef __cpp_lib_span // C++ >= 20 && concepts
@@ -46,6 +47,9 @@
 #include 
 #include 
 #include 
+#ifdef __cpp_lib_span_initializer_list
+# include 
+#endif
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -226,6 +230,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	}
 	}
 
+#if __cpp_lib_span_initializer_list >= 202311L // >= C++26
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winit-list-lifetime"
+	constexpr
+	explicit(extent != dynamic_extent)
+	span(initializer_list __il)
+	requires (is_const_v<_Type>)
+	: _M_ptr(__il.begin()), _M_extent(__il.size())
+	{
+	}
+#pragma GCC diagnostic pop
+#endif
+
   constexpr
   span(const span&) noexcept = default;
 
diff --git a/libstdc++-v3/testsuite/23_containers/span/init_list_cons.cc b/libstdc++-v3/testsuite/23_containers/span/init_list_cons.cc
new file mode 100644
index 000..1dc30ab1a50
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/span/init_list_cons.cc
@@ -0,0 +1,65 @@
+// { dg-do compile { target c++26 } }
+
+#include 
+#include

[patch,avr.applied] Fix memcpy from address-spaces

2024-12-12 Thread Georg-Johann Lay


* rampz_rtx et al. were missing MEM_VOLATILE_P.  This is needed because
  avr_emit_cpymemhi is setting RAMPZ explicitly with an own insn.

* avr_out_cpymem was missing a final RAMPZ = 0 on EBI devices.

This only affects the __flash1 ... __flash5 spaces since the other ASes
use different routines,

Applies as obvious.

Johann

--

AVR: target/118000 - Fix copymem from address-spaces.

* rampz_rtx et al. were missing MEM_VOLATILE_P.  This is needed because
  avr_emit_cpymemhi is setting RAMPZ explicitly with an own insn.

* avr_out_cpymem was missing a final RAMPZ = 0 on EBI devices.

This only affects the __flash1 ... __flash5 spaces since the other ASes
use different routines,

gcc/
PR target/118000
* config/avr/avr.cc (avr_init_expanders) 
: Set MEM_VOLATILE_P.
(avr_out_cpymem) [ELPM && EBI]: Restore RAMPZ to 0 after.


diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index ef236016990..05a6905b5d6 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -596,6 +596,12 @@ avr_init_expanders (void)
   rampy_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampy));
   rampz_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampz));

+  MEM_VOLATILE_P (sreg_rtx) = 1;
+  MEM_VOLATILE_P (rampd_rtx) = 1;
+  MEM_VOLATILE_P (rampx_rtx) = 1;
+  MEM_VOLATILE_P (rampy_rtx) = 1;
+  MEM_VOLATILE_P (rampz_rtx) = 1;
+
   xstring_empty = gen_rtx_CONST_STRING (VOIDmode, "");
   xstring_e = gen_rtx_CONST_STRING (VOIDmode, "e");

@@ -14857,9 +14863,16 @@ avr_out_cpymem (rtx_insn * /*insn*/, rtx *op, 
int *plen)

   "sbci %B1,0", xop, plen, 2);
 }

-  /* Loop until zero */
+  // Loop until zero.
+  avr_asm_len ("brne 0b", xop, plen, 1);
+

-  return avr_asm_len ("brne 0b", xop, plen, 1);
+  // Restore RAMPZ on EBI devices.
+  if (as >= ADDR_SPACE_FLASH1
+  && AVR_HAVE_ELPM && AVR_HAVE_RAMPD)
+avr_asm_len ("out %i0,__zero_reg__", &rampz_rtx, plen, 1);
+
+  return "";
 }

Re: [PATCH] testsuite: arm: Fix build error for thumb2-slow-flash-data-3.c test

2024-12-12 Thread Torbjorn SVENSSON





On 2024-12-04 12:46, Richard Earnshaw (lists) wrote:

On 21/11/2024 17:23, Torbjörn SVENSSON wrote:

I'm not sure how to verify that adding the parameter won't destroy the test.
I've tried to repoduce the ICE on old Arm builds of arm-none-eabi, but none of
them ICE. I suppose it should be safe to add the parameter as the PR talks
about the literal pools.

Ok for trunk and releases/gcc-14?

--

Without this change, build fails with:

.../thumb2-slow-flash-data-3.c: In function 'fn3':
.../thumb2-slow-flash-data-3.c:23:3: error: too many arguments to function 'fn1'
.../thumb2-slow-flash-data-3.c:10:6: note: declared here

gcc/testsuite/ChangeLog:

* gcc.target/arm/thumb2-slow-flash-data-3.c: Added argument to
fn1 to avoid compile error.

Signed-off-by: Torbjörn SVENSSON 


This is OK.

The alternative would be to force an older version of the C standard via 
dg-options, but I don't think that's necessary here.

R.



Pushed as r15-6167-g09499ffbb30 and r14.2.0-555-gadd35600a69.

Kind regards,
Torbjörn

Re: [PATCH v2 3/5] testsuite: arm: Use -mcpu=unset when overriding -march

2024-12-12 Thread Torbjorn SVENSSON





On 2024-12-04 13:03, Richard Earnshaw (lists) wrote:

On 21/11/2024 14:24, Torbjörn SVENSSON wrote:

Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:
* gcc.dg/pr41574.c: Added option "-mcpu=unset".
* gcc.dg/pr59418.c: Likewise.
* lib/target-supports.exp (add_options_for_vect_early_break):
Likewise.
(add_options_for_arm_v8_neon): Likewise.
(check_effective_target_arm_neon_ok_nocache): Likewise.
(check_effective_target_arm_simd32_ok_nocache): Likewise.
(check_effective_target_arm_sat_ok_nocache): Likewise.
(check_effective_target_arm_dsp_ok_nocache): Likewise.
(check_effective_target_arm_crc_ok_nocache): Likewise.
(check_effective_target_arm_v8_neon_ok_nocache): Likewise.
(check_effective_target_arm_v8_1m_mve_fp_ok_nocache): Likewise.
(check_effective_target_arm_v8_1a_neon_ok_nocache): Likewise.
(check_effective_target_arm_v8_2a_fp16_scalar_ok_nocache):
Likewise.
(check_effective_target_arm_v8_2a_fp16_neon_ok_nocache):
Likewise.
(check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache):
Likewise.
(check_effective_target_arm_v8_1m_mve_ok_nocache): Likewise.
(check_effective_target_arm_v8_2a_i8mm_ok_nocache): Likewise.
(check_effective_target_arm_fp16fml_neon_ok_nocache): Likewise.
(check_effective_target_arm_v8_2a_bf16_neon_ok_nocache):
Likewise.
(check_effective_target_arm_v8m_main_cde_ok_nocache): Likewise.
(check_effective_target_arm_v8m_main_cde_fp_ok_nocache):
Likewise.
(check_effective_target_arm_v8_1m_main_cde_mve_ok_nocache):
Likewise.
(check_effective_target_arm_v8_1m_main_cde_mve_fp_ok_nocache):
Likewise.
(check_effective_target_arm_v8_3a_complex_neon_ok_nocache):
Likewise.
(check_effective_target_arm_v8_3a_fp16_complex_neon_ok_nocache):
Likewise.
(check_effective_target_arm_v8_1_lob_ok): Likewise.
---
  gcc/testsuite/gcc.dg/pr41574.c|  2 +-
  gcc/testsuite/gcc.dg/pr59418.c|  2 +-
  gcc/testsuite/lib/target-supports.exp | 60 +--
  3 files changed, 32 insertions(+), 32 deletions(-)


This seems to be lacking your usual DCO statement.  But perhaps you'd like to 
consider adding a permanent DCO statement to the MAINTAINERS file, then this 
wouldn't be needed on individual patches.


Feel away when I did the removal of the aarch64 changes. Fixed in what I 
pushed.




Otherwise, this is OK.

R.




Pushed as r15-6163-ga2006457c67.

Kind regards,
Torbjörn

Re: [PATCH] arm: [MVE intrinsics] remove V2DF from MVE_vecs iterator

2024-12-12 Thread Richard Earnshaw (lists)


On 14/11/2024 10:42, Christophe Lyon wrote:

V2DF is not supported by MVE, so remove it from the only iterator
which contains it.

gcc/ChangeLog:

* config/arm/iterators.md (MVE_vecs): Remove V2DF.
---
  gcc/config/arm/iterators.md | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 834c81da56e..1caf5d18ad6 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -271,7 +271,7 @@ (define_mode_iterator VBFCVTM [V2SI SF])
  
  ;; MVE mode iterator.

  (define_mode_iterator MVE_types [V16QI V8HI V4SI V2DI TI V8HF V4SF V2DF])
-(define_mode_iterator MVE_vecs [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
+(define_mode_iterator MVE_vecs [V16QI V8HI V4SI V2DI V8HF V4SF])
  (define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF])
  (define_mode_iterator MVE_VLD_ST_scatter [V16QI V8HI V4SI V8HF V4SF V2DI])
  (define_mode_iterator MVE_VLD_ST_scatter_shifted [V8HI V4SI V8HF V4SF V2DI])


OK

R.

[Fortran, Patch, PR116669, v1] Fix ICE in deallocation of derived types having cyclic dependencies

2024-12-12 Thread Andre Vehreschild

Hi all,

attached patch improves analysis of cycles in derived types, i.e. type
dependencies ala:

type(T)
  type(T2), allocatable :: c
end type

type(T2)
  type(T), allocatable :: t
end type

are now detected and deallocating an object that is of any of the types
now no longer crashes the compiler because of an endless recursion. To
accomplish this, I stored the symbols of the types seen in a C++ set
and checked if a component's type is already present in there. When a
type has such an indirect self-reference, it gets marked by setting its
symbol_attribute::recursive flag. Later steps then can make use of it.

Furthermore are _deallocate members of the vtab populated when a type
has the recursive and the alloc_comp flag set.

Bootstraps and regtests ok on x86_64-pc-linux-gnu / F41. Ok for
mainline?

Note: The patch was developed on top of my coarray patch, but should
apply with delta on a regular trunk w/o issues.

Regards,
Andre
--
Andre Vehreschild * Email: vehre ad gcc dot gnu dot org
From e1f0294f19a10164e932b697e8e2f7f3f59c85f7 Mon Sep 17 00:00:00 2001
From: Andre Vehreschild 
Date: Mon, 9 Dec 2024 14:56:27 +0100
Subject: [PATCH] Fortran: Extend cylic type detection for deallocate
 [PR116669]

Using cycles in derived/class types lead to the compiler doing a endless
recursion in several locations, when the cycle was not immediate.
An immediate cyclic dependency is present in, for example T T::comp.
Cylcic dependencies of the form T T2::comp; T2 T::comp2; are now
detected and the recursive bit in the derived type's attr is set.

gcc/fortran/ChangeLog:

	PR fortran/116669

	* class.cc (gfc_find_derived_vtab): Use attr to determine cyclic
	type dependendies.
	* expr.cc (gfc_has_default_initializer): Prevent endless
	recursion by storing already visited derived types.
	* resolve.cc (resolve_cyclic_derived_type): Determine if a type
	is used in its hierarchy in a cyclic way.
	(resolve_fl_derived0): Call resolve_cyclic_derived_type.
	(resolve_fl_derived): Ensure vtab is generated when cyclic
	derived types have allocatable components.
	* trans-array.cc (structure_alloc_comps): Prevent endless loop
	for derived type cycles.
	* trans-expr.cc (gfc_get_ultimate_alloc_ptr_comps_caf_token):
	Off topic, just prevent memory leaks.

gcc/testsuite/ChangeLog:

	* gfortran.dg/class_array_15.f03: Freeing more memory.
	* gfortran.dg/recursive_alloc_comp_6.f90: New test.
---
 gcc/fortran/class.cc  | 19 +-
 gcc/fortran/expr.cc   | 40 +
 gcc/fortran/resolve.cc| 60 +--
 gcc/fortran/trans-array.cc| 29 ++---
 gcc/fortran/trans-expr.cc | 10 +++-
 gcc/testsuite/gfortran.dg/class_array_15.f03  |  2 +-
 .../gfortran.dg/recursive_alloc_comp_6.f90| 28 +
 7 files changed, 144 insertions(+), 44 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/recursive_alloc_comp_6.f90

diff --git a/gcc/fortran/class.cc b/gcc/fortran/class.cc
index 64a0e726eeb..5017ee973e0 100644
--- a/gcc/fortran/class.cc
+++ b/gcc/fortran/class.cc
@@ -2507,20 +2507,6 @@ gfc_find_derived_vtab (gfc_symbol *derived)
 	{
 	  gfc_component *c;
 	  gfc_symbol *parent = NULL, *parent_vtab = NULL;
-	  bool rdt = false;
-
-	  /* Is this a derived type with recursive allocatable
-		 components?  */
-	  c = (derived->attr.unlimited_polymorphic
-		   || derived->attr.abstract) ?
-		  NULL : derived->components;
-	  for (; c; c= c->next)
-		if (c->ts.type == BT_DERIVED
-		&& c->ts.u.derived == derived)
-		  {
-		rdt = true;
-		break;
-		  }

 	  gfc_get_symbol (name, ns, &vtype);
 	  if (!gfc_add_flavor (&vtype->attr, FL_DERIVED, NULL,
@@ -2703,9 +2689,8 @@ gfc_find_derived_vtab (gfc_symbol *derived)
 	  c->attr.access = ACCESS_PRIVATE;
 	  c->tb = XCNEW (gfc_typebound_proc);
 	  c->tb->ppc = 1;
-	  if (derived->attr.unlimited_polymorphic
-		  || derived->attr.abstract
-		  || !rdt)
+	  if (derived->attr.unlimited_polymorphic || derived->attr.abstract
+		  || !derived->attr.recursive)
 		c->initializer = gfc_get_null_expr (NULL);
 	  else
 		{
diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc
index a997bdae726..148f5f90a43 100644
--- a/gcc/fortran/expr.cc
+++ b/gcc/fortran/expr.cc
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "constructor.h"
 #include "tree.h"

+#include 

 /* The following set of functions provide access to gfc_expr* of
various types - actual all but EXPR_FUNCTION and EXPR_VARIABLE.
@@ -5009,28 +5010,45 @@ is_non_empty_structure_constructor (gfc_expr * e)
 bool
 gfc_has_default_initializer (gfc_symbol *der)
 {
+  static std::set seen_derived_types;
   gfc_component *c;
+  /* The rewrite to a result variable and breaks is only needed, because
+ there is no scope_guard in C++ yet.  */
+  bool result = false;

   gcc_assert (gfc_fl_struct (der->attr.flavor));
+  seen_derive

Re: [PATCH] testsuite: arm: Update expected assembler for pr43920-2.c test

2024-12-12 Thread Torbjorn SVENSSON





On 2024-12-12 12:02, Richard Earnshaw (lists) wrote:

On 10/11/2024 10:02, Torbjörn SVENSSON wrote:

Ok for trunk, releases/gcc-12, releases/gcc-13 and releases/gcc-14?

--

In version 6-2017-q1-update of the "GNU Arm Embedded Toolchain" build,
there are 2 pop instructions. In version 7-2018-q2-update, the next
version that still have a binary build available on launchpad, there is
only a single pop instruction.
When I try to build vanilla GCC in the same version range, I always end
up with a single pop instruciton.

Since r12-5301-g04520645038, the generated assembler contains one more
registry move, and it's requested in PR103298 to allow it.


Is that reference correct?  Which comment are you referring to if so? 
That PR is about switch table optimizations.


Now I'm confused. PR103298 is about regressions introduced in r12-5301, 
right?


Anyway, I was referring to the comment 3 from Richard Biener:

"Again, ARM folks - please allow for the extra reg copy."

So, the size increase is due to an extra reg copy and the reduction in 
number of pops is that there is only a single pop and I see no obvious 
reason why there would possibly need to be a 2nd one.


Kind regards,
Torbjörn



R.



gcc/testsuite/ChangeLog:

PR testsuite/103298
* gcc.target/arm/pr43920-2.c: Increase allowed text size and
lower number of expected pop instructions.

Signed-off-by: Torbjörn SVENSSON 
---
  gcc/testsuite/gcc.target/arm/pr43920-2.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr43920-2.c b/gcc/testsuite/ 
gcc.target/arm/pr43920-2.c

index c367d6bc15d..80cc0b7d260 100644
--- a/gcc/testsuite/gcc.target/arm/pr43920-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr43920-2.c
@@ -27,6 +27,6 @@ int getFileStartAndLength (int fd, int *start_, 
size_t *length_)

    return 0;
  }
-/* { dg-final { scan-assembler-times "pop" 2 } } */
+/* { dg-final { scan-assembler-times "pop" 1 } } */
  /* { dg-final { scan-assembler-times "beq" 3 } } */
-/* { dg-final { object-size text <= 54 { target { ! arm*-*- 
uclinuxfdpiceabi } } } } */
+/* { dg-final { object-size text <= 56 { target { ! arm*-*- 
uclinuxfdpiceabi } } } } */

[PATCH 2/3 v2] dwarf: lto: Allow die_symbol outside of comp_unit.

2024-12-12 Thread Michal Jires

On Wed, 2024-11-27 at 15:18:39 +, Richard Biener wrote:
> I'm not sure it will work this way together with the output_die hunk,
> instead
> assemblers likely expect all this to happen close to the actual label
> emission, so I suggest to only split out the visibiltiy/globalizing fancy
> and emit it from output_die instead.

Thanks, apparently somehow I got the idea that the
globalization/weakening of symbols has to be together.
Which seems to be not needed, so I moved everything to output_die next
to label emission.
Michal

---

Die symbols are used for external references.
Typically during LTO, early debug emits 'die_symbol+offset' for each
possibly referenced DIE in future. Partitions in LTRANS phase then
use these references.

Originally die symbols are handled only in root comp_unit and
in attributes.

This patch allows die symbols to be attached to any DIE.
References then choose closest parent with die symbol.

gcc/ChangeLog:

* dwarf2out.cc (dwarf2out_die_ref_for_decl):
  Choose closest parent with die_symbol.
(output_die): Output asm label.
(output_comp_unit): Output die_symbol list.
(reset_dies): Reset all die_symbols.
(dwarf2out_finish): Don't reset comp_unit die_symbol.
---
 gcc/dwarf2out.cc | 69 +++-
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 6bb73c6e5c6..1e55b900712 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -6039,14 +6039,14 @@ dwarf2out_die_ref_for_decl (tree decl, const char **sym,
 
   /* Similar to get_ref_die_offset_label, but using the "correct"
  label.  */
-  *off = die->die_offset;
-  while (die->die_parent)
+  unsigned HOST_WIDE_INT unit_offset = die->die_offset;
+  while (die->die_parent && (die->comdat_type_p || !die->die_id.die_symbol))
 die = die->die_parent;
-  /* For the containing CU DIE we compute a die_symbol in
+  /* Root CU DIE always contains die_symbol computed in
  compute_comp_unit_symbol.  */
-  if (die->die_tag == DW_TAG_compile_unit)
+  if (!die->comdat_type_p && die->die_id.die_symbol)
 {
-  gcc_assert (die->die_id.die_symbol != NULL);
+  *off = unit_offset - die->die_offset;
   *sym = die->die_id.die_symbol;
   return true;
 }
@@ -10798,6 +10798,29 @@ output_die (dw_die_ref die)
   unsigned long size;
   unsigned ix;
 
+  /* Output die_symbol.  */
+  if ((flag_generate_lto || flag_generate_offload)
+  && !die->comdat_type_p && die->die_id.die_symbol)
+{
+  const char* sym = die->die_id.die_symbol;
+  /*tree decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+ get_identifier (sym), char_type_node);
+  TREE_PUBLIC (decl) = true;
+  TREE_STATIC (decl) = true;
+  DECL_ARTIFICIAL (decl) = true;
+  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
+  DECL_VISIBILITY_SPECIFIED (decl) = true;
+  targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);*/
+#ifdef ASM_WEAKEN_LABEL
+  /* We prefer a .weak because that handles duplicates from duplicate
+archive members in a graceful way.  */
+  ASM_WEAKEN_LABEL (asm_out_file, sym);
+#else
+  targetm.asm_out.globalize_label (asm_out_file, sym);
+#endif
+  ASM_OUTPUT_LABEL (asm_out_file, sym);
+}
+
   dw2_asm_output_data_uleb128 (die->die_abbrev, "(DIE (%#lx) %s)",
   (unsigned long)die->die_offset,
   dwarf_tag_name (die->die_tag));
@@ -11234,8 +11257,6 @@ static void
 output_comp_unit (dw_die_ref die, int output_if_empty,
  const unsigned char *dwo_id)
 {
-  const char *oldsym;
-
   /* Unless we are outputting main CU, we may throw away empty ones.  */
   if (!output_if_empty && die->die_child == NULL)
 return;
@@ -11267,35 +11288,10 @@ output_comp_unit (dw_die_ref die, int output_if_empty,
 : DWARF_COMPILE_UNIT_HEADER_SIZE);
   calc_die_sizes (die);
 
-  oldsym = die->die_id.die_symbol;
-
   switch_to_section (debug_info_section);
   ASM_OUTPUT_LABEL (asm_out_file, debug_info_section_label);
   info_section_emitted = true;
 
-  /* For LTO cross unit DIE refs we want a symbol on the start of the
- debuginfo section, not on the CU DIE.  */
-  if ((flag_generate_lto || flag_generate_offload) && oldsym)
-{
-  /* ???  No way to get visibility assembled without a decl.  */
-  tree decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
- get_identifier (oldsym), char_type_node);
-  TREE_PUBLIC (decl) = true;
-  TREE_STATIC (decl) = true;
-  DECL_ARTIFICIAL (decl) = true;
-  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
-  DECL_VISIBILITY_SPECIFIED (decl) = true;
-  targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
-#ifdef ASM_WEAKEN_LABEL
-  /* We prefer a .weak because that handles duplicates from duplicate
- archive

Re: [COMMITTED] i386: regenerate i386.opt.urls

2024-12-12 Thread Sandra Loosemore


On 12/12/24 03:53, Sam James wrote:

r15-6128-gfa878dc8c45fa3 missed the regeneration of the URL doc map, so
regenerate it here to make the buildbots happy.


I apologize for this breakage.  :-(  Can someone explain how I can 
detect this problem *before* submitting patches, and how to fix it, so 
that I can script that into my build process?  And/or could it be 
possible to check this at commit time (as we do with ChangeLogs, etc) or 
at least make the buildbot produce a more helpful message that actually 
explains what the problem is and how to fix it?


-Sandra

[PATCH 3/3 v2] lto: Remap node order for stability.

2024-12-12 Thread Michal Jires

On Sun, 2024-11-17 at 19:15:04 +, Jan Hubicka wrote:
> 
> I would suggest renaming produce_asm to produce_symbol_asm 
> and making produce_asm wrapper which passes fn=NULL and output_order=-1,
> so we do not have odd parameters everywhere in streaming code.
> 
> OK with this change.
> Honza

Applied suggested change.



This patch adds remapping of node order for each lto partition.
Resulting order conserves relative order inside partition, but
is independent of outside symbols. So if lto partition contains
identical set of symbols, their remapped order will be stable
between compilations.

This stability is needed for Incremental LTO.

gcc/ChangeLog:

* ipa-devirt.cc (ipa_odr_summary_write):
  Add unused argument.
* ipa-fnsummary.cc (ipa_fn_summary_write): Likewise.
* ipa-icf.cc (sem_item_optimizer::write_summary): Likewise.
* ipa-modref.cc (modref_write): Likewise.
* ipa-prop.cc (ipa_prop_write_jump_functions): Likewise.
(ipcp_write_transformation_summaries): Likewise.
* ipa-sra.cc (ipa_sra_write_summary): Likewise.
* lto-cgraph.cc (lto_symtab_encoder_delete): Delete remap.
(lto_output_node): Remap order.
(lto_output_varpool_node): Likewise.
(output_cgraph_opt_summary): Add unused argument.
* lto-streamer-out.cc (produce_symbol_asm): Renamed. Use remapped order.
(produce_asm): Rename. New wrapper.
(output_function): Propagate remapped order.
(output_constructor): Likewise.
(copy_function_or_variable): Likewise.
(cmp_int): New.
(create_order_remap): New.
(lto_output): Create remap. Remap order.
* lto-streamer.h (struct lto_symtab_encoder_d): Remap hash_map.
(produce_asm): Add order argument.
---
 gcc/ipa-devirt.cc   |  2 +-
 gcc/ipa-fnsummary.cc|  2 +-
 gcc/ipa-icf.cc  |  2 +-
 gcc/ipa-modref.cc   |  4 +-
 gcc/ipa-prop.cc |  4 +-
 gcc/ipa-sra.cc  |  2 +-
 gcc/lto-cgraph.cc   | 10 +++--
 gcc/lto-streamer-out.cc | 93 +++--
 gcc/lto-streamer.h  |  5 ++-
 9 files changed, 99 insertions(+), 25 deletions(-)

diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
index e88e9db781e..cdd520ba76b 100644
--- a/gcc/ipa-devirt.cc
+++ b/gcc/ipa-devirt.cc
@@ -4131,7 +4131,7 @@ ipa_odr_summary_write (void)
   odr_enum_map = NULL;
 }
 
-  produce_asm (ob, NULL);
+  produce_asm (ob);
   destroy_output_block (ob);
 }
 
diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc
index 3f5e09960ef..c057536f551 100644
--- a/gcc/ipa-fnsummary.cc
+++ b/gcc/ipa-fnsummary.cc
@@ -5091,7 +5091,7 @@ ipa_fn_summary_write (void)
}
 }
   streamer_write_char_stream (ob->main_stream, 0);
-  produce_asm (ob, NULL);
+  produce_asm (ob);
   destroy_output_block (ob);
 
   ipa_prop_write_jump_functions ();
diff --git a/gcc/ipa-icf.cc b/gcc/ipa-icf.cc
index 60152e60bc5..e9c5ae764f0 100644
--- a/gcc/ipa-icf.cc
+++ b/gcc/ipa-icf.cc
@@ -2216,7 +2216,7 @@ sem_item_optimizer::write_summary (void)
 }
 
   streamer_write_char_stream (ob->main_stream, 0);
-  produce_asm (ob, NULL);
+  produce_asm (ob);
   destroy_output_block (ob);
 }
 
diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc
index 7449041c102..e68f434aa10 100644
--- a/gcc/ipa-modref.cc
+++ b/gcc/ipa-modref.cc
@@ -3746,7 +3746,7 @@ modref_write ()
 {
   streamer_write_uhwi (ob, 0);
   streamer_write_char_stream (ob->main_stream, 0);
-  produce_asm (ob, NULL);
+  produce_asm (ob);
   destroy_output_block (ob);
   return;
 }
@@ -3821,7 +3821,7 @@ modref_write ()
}
 }
   streamer_write_char_stream (ob->main_stream, 0);
-  produce_asm (ob, NULL);
+  produce_asm (ob);
   destroy_output_block (ob);
 }
 
diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index 9070a45f683..86044e392aa 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -5338,7 +5338,7 @@ ipa_prop_write_jump_functions (void)
 ipa_write_node_info (ob, node);
 }
   streamer_write_char_stream (ob->main_stream, 0);
-  produce_asm (ob, NULL);
+  produce_asm (ob);
   destroy_output_block (ob);
 }
 
@@ -5536,7 +5536,7 @@ ipcp_write_transformation_summaries (void)
write_ipcp_transformation_info (ob, cnode, ts);
 }
   streamer_write_char_stream (ob->main_stream, 0);
-  produce_asm (ob, NULL);
+  produce_asm (ob);
   destroy_output_block (ob);
 }
 
diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
index 04920f2aa8e..e6a75139eb0 100644
--- a/gcc/ipa-sra.cc
+++ b/gcc/ipa-sra.cc
@@ -2898,7 +2898,7 @@ ipa_sra_write_summary (void)
 isra_write_node_summary (ob, node);
 }
   streamer_write_char_stream (ob->main_stream, 0);
-  produce_asm (ob, NULL);
+  produce_asm (ob);
   destroy_output_block (ob);
 }
 
diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc
index d1d63fd90ea..14275ed7c42 100644
--- a/gcc/lto-cgraph.cc
+++ b/gcc/lto-cgraph.cc
@@ -96,6 +96,8 @@ lto_symtab_encoder_delete

Re: [PATCH] match.pd: Defer some CTZ/CLZ foldings until after ubsan pass for -fsanitize=builtin [PR115127]

2024-12-12 Thread Richard Biener




> Am 12.12.2024 um 11:28 schrieb Jakub Jelinek :
> 
> Hi!
> 
> As the following testcase shows, -fsanitize=builtin instruments the
> builtins in the ubsan pass which is done shortly after going into
> SSA, but if optimizations optimize the builtins away before that,
> nothing is instrumented.  Now, I think it is just fine if the
> result of the builtins isn't used in any way and we just DCE them,
> but in the following optimizations the result is used.
> So, the following patch for -fsanitize=builtin only defers the
> optimizations that might turn single argument CLZ/CTZ (aka undefined
> at zero) until the ubsan pass is done.
> Now, we don't have PROP_ubsan and am not sure it is worth adding it,
> there is PROP_ssa set by the ssa pass which is 3 passes before
> ubsan, but there are only 2 warning passes in between, so PROP_ssa
> looked good enough to me.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
Ok

Richard 


> 2024-12-12  Jakub Jelinek  
> 
>PR sanitizer/115127
>* match.pd (clz (X) == C, ctz (X) == C, ctz (X) >= C): Don't
>optimize if -fsanitize=builtin and not yet in SSA form.
> 
>* c-c++-common/ubsan/builtin-2.c: New test.
> 
> --- gcc/match.pd.jj2024-12-06 11:00:27.937579733 +0100
> +++ gcc/match.pd2024-12-11 19:55:08.978334222 +0100
> @@ -9636,13 +9636,17 @@ (define_operator_list SYNC_FETCH_AND_AND
>   cmp (lt ge)
>   (simplify
>(op (clz:s@2 @0) INTEGER_CST@1)
> -   (if (integer_zerop (@1) && single_use (@2))
> -/* clz(X) == 0 is (int)X < 0 and clz(X) != 0 is (int)X >= 0.  */
> -(with { tree stype = signed_type_for (TREE_TYPE (@0)); }
> - (cmp (convert:stype @0) { build_zero_cst (stype); }))
> -/* clz(X) == (prec-1) is X == 1 and clz(X) != (prec-1) is X != 1.  */
> -(if (wi::to_wide (@1) == TYPE_PRECISION (TREE_TYPE (@0)) - 1)
> - (op @0 { build_one_cst (TREE_TYPE (@0)); }))
> +   (if (!sanitize_flags_p (SANITIZE_BUILTIN)
> +/* For -fsanitize=builtin give ubsan pass a chance
> +   to instrument it first.  */
> +|| (cfun && (cfun->curr_properties & PROP_ssa) != 0))
> +(if (integer_zerop (@1) && single_use (@2))
> + /* clz(X) == 0 is (int)X < 0 and clz(X) != 0 is (int)X >= 0.  */
> + (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
> +  (cmp (convert:stype @0) { build_zero_cst (stype); }))
> + /* clz(X) == (prec-1) is X == 1 and clz(X) != (prec-1) is X != 1.  */
> + (if (wi::to_wide (@1) == TYPE_PRECISION (TREE_TYPE (@0)) - 1)
> +  (op @0 { build_one_cst (TREE_TYPE (@0)); })))
> (for op (eq ne)
>  cmp (lt ge)
>  (simplify
> @@ -9682,7 +9686,13 @@ (define_operator_list SYNC_FETCH_AND_AND
>(op (ctz:s @0) INTEGER_CST@1)
> (with { bool ok = true;
>HOST_WIDE_INT val = 0;
> -if (!tree_fits_shwi_p (@1))
> +if (sanitize_flags_p (SANITIZE_BUILTIN)
> +/* For -fsanitize=builtin give ubsan pass a chance
> +   to instrument it first.  */
> +&& (!cfun
> +|| (cfun->curr_properties & PROP_ssa) == 0))
> +  ok = false;
> +else if (!tree_fits_shwi_p (@1))
>  ok = false;
>else
>  {
> @@ -9713,8 +9723,15 @@ (define_operator_list SYNC_FETCH_AND_AND
>(op (ctz:s @0) INTEGER_CST@1)
> (with { tree type0 = TREE_TYPE (@0);
>int prec = TYPE_PRECISION (type0);
> +bool ok = true;
> +if (sanitize_flags_p (SANITIZE_BUILTIN)
> +/* For -fsanitize=builtin give ubsan pass a chance
> +   to instrument it first.  */
> +&& (!cfun
> +|| (cfun->curr_properties & PROP_ssa) == 0))
> +  ok = false;
>  }
> - (if (prec <= MAX_FIXED_MODE_SIZE)
> + (if (ok && prec <= MAX_FIXED_MODE_SIZE)
>   (if (tree_int_cst_sgn (@1) < 0 || wi::to_widest (@1) >= prec)
>{ constant_boolean_node (op == EQ_EXPR ? false : true, type); }
>(op (bit_and @0 { wide_int_to_tree (type0,
> @@ -9815,7 +9832,13 @@ (define_operator_list SYNC_FETCH_AND_AND
>   else if (TYPE_PRECISION (type0)
>== TYPE_PRECISION (long_long_unsigned_type_node))
> cfn = CFN_BUILT_IN_CTZLL;
> - } }
> + }
> +   if (sanitize_flags_p (SANITIZE_BUILTIN)
> +   /* For -fsanitize=builtin give ubsan pass a chance
> +  to instrument it first.  */
> +   && (!cfun
> +   || (cfun->curr_properties & PROP_ssa) == 0))
> +cfn = CFN_LAST; }
> (if (cfn == CFN_CTZ)
>  (IFN_CTZ (convert:type0 @0))
>  (if (cfn == CFN_BUILT_IN_CTZ)
> --- gcc/testsuite/c-c++-common/ubsan/builtin-2.c.jj2024-12-11 
> 19:49:42.072941749 +0100
> +++ gcc/testsuite/c-c++-common/ubsan/builtin-2.c2024-12-11 
> 19:51:21.503540338 +0100
> @@ -0,0 +1,89 @@
> +/* PR sanitizer/115127 */
> +/* { dg-do run } */
> +/* { dg-options "-fsanitize=undefined" } */
> +
> +#include 
> +
> +__attribute__((noipa)) int
> +f1 (unsigned a)
> +{
> +  return __builtin_clz (a) == 0;
> +}
> +
> +__attribut

[Ada] Fix PR ada/117996

2024-12-12 Thread Eric Botcazou

This fixes a precondition failure triggered when the Eigenvalues routine of 
Ada.Numerics.Generic_Real_Arrays is instantiated with -gnata, beause it calls 
Sort_Eigensystem on an empty vector.

Tested on x86-64/Linux, applied on all active branches.


2024-12-12  Eric Botcazou  

PR ada/117996
* libgnat/a-ngrear.adb (Jacobi): Remove default value for
Compute_Vectors formal parameter.
(Sort_Eigensystem): Add Compute_Vectors formal parameter.  Do not
modify the Vectors if Compute_Vectors is False.
(Eigensystem): Pass True as Compute_Vectors to Sort_Eigensystem.
(Eigenvalues): Pass False as Compute_Vectors to Sort_Eigensystem.


2024-12-12  Eric Botcazou  

* gnat.dg/matrix1.adb: New test.

-- 
Eric Botcazoudiff --git a/gcc/ada/libgnat/a-ngrear.adb b/gcc/ada/libgnat/a-ngrear.adb
index e70617f2096..6778a56e45c 100644
--- a/gcc/ada/libgnat/a-ngrear.adb
+++ b/gcc/ada/libgnat/a-ngrear.adb
@@ -96,7 +96,7 @@ package body Ada.Numerics.Generic_Real_Arrays is
  (A   : Real_Matrix;
   Values  : out Real_Vector;
   Vectors : out Real_Matrix;
-  Compute_Vectors : Boolean := True);
+  Compute_Vectors : Boolean);
--  Perform Jacobi's eigensystem algorithm on real symmetric matrix A
 
function Length is new Square_Matrix_Length (Real'Base, Real_Matrix);
@@ -107,8 +107,9 @@ package body Ada.Numerics.Generic_Real_Arrays is
--  Perform a Givens rotation
 
procedure Sort_Eigensystem
- (Values  : in out Real_Vector;
-  Vectors : in out Real_Matrix);
+ (Values  : in out Real_Vector;
+  Vectors : in out Real_Matrix;
+  Compute_Vectors : Boolean);
--  Sort Values and associated Vectors by decreasing absolute value
 
procedure Swap (Left, Right : in out Real);
@@ -486,7 +487,7 @@ package body Ada.Numerics.Generic_Real_Arrays is
is
begin
   Jacobi (A, Values, Vectors, Compute_Vectors => True);
-  Sort_Eigensystem (Values, Vectors);
+  Sort_Eigensystem (Values, Vectors, Compute_Vectors => True);
end Eigensystem;
 
-
@@ -500,7 +501,7 @@ package body Ada.Numerics.Generic_Real_Arrays is
 Vectors : Real_Matrix (1 .. 0, 1 .. 0);
  begin
 Jacobi (A, Values, Vectors, Compute_Vectors => False);
-Sort_Eigensystem (Values, Vectors);
+Sort_Eigensystem (Values, Vectors, Compute_Vectors => False);
  end;
   end return;
end Eigenvalues;
@@ -522,7 +523,7 @@ package body Ada.Numerics.Generic_Real_Arrays is
  (A   : Real_Matrix;
   Values  : out Real_Vector;
   Vectors : out Real_Matrix;
-  Compute_Vectors : Boolean := True)
+  Compute_Vectors : Boolean)
is
   --  This subprogram uses Carl Gustav Jacob Jacobi's iterative method
   --  for computing eigenvalues and eigenvectors and is based on
@@ -731,8 +732,9 @@ package body Ada.Numerics.Generic_Real_Arrays is
--
 
procedure Sort_Eigensystem
- (Values  : in out Real_Vector;
-  Vectors : in out Real_Matrix)
+ (Values  : in out Real_Vector;
+  Vectors : in out Real_Matrix;
+  Compute_Vectors : Boolean)
is
   procedure Swap (Left, Right : Integer);
   --  Swap Values (Left) with Values (Right), and also swap the
@@ -748,8 +750,10 @@ package body Ada.Numerics.Generic_Real_Arrays is
   procedure Swap (Left, Right : Integer) is
   begin
  Swap (Values (Left), Values (Right));
- Swap_Column (Vectors, Left - Values'First + Vectors'First (2),
-   Right - Values'First + Vectors'First (2));
+ if Compute_Vectors then
+Swap_Column (Vectors, Left - Values'First + Vectors'First (2),
+  Right - Values'First + Vectors'First (2));
+ end if;
   end Swap;
 
begin
-- { dg-do run }
-- { dg-options "-gnata" }

with Ada.Numerics.Generic_Real_Arrays;

procedure Matrix1 is

  package GRA is new Ada.Numerics.Generic_Real_Arrays (real => float);
  use GRA;

  M : constant Real_Matrix (1..2, 1..2) := ((1.0, 0.0), (0.0, 2.0));
  E : constant Real_Vector := Eigenvalues (M);

begin
  null;
end;

[Patch] OpenMP: Enable has_device_addr clause for 'dispatch' in C/C++

2024-12-12 Thread Tobias Burnus


This simple patch improves the diagnostic and prepares for some future work.
Note: has_device_addr for C is permitted but pointless, for C++ it
requires some follow-up work to be useful.
For C, a hint that 'need_device_addr' it not valid has been added and
for 'has_device_addr' a middle-end warning has been added if the user
writes inconsistent code.

Comments, remarks, suggestions before I commit it?

BTW: For C++, I think we mishandle *reference to pointer type*, both by
permitting invalid code and producing and producing wrong code. But I
still have to fully understand what GCC currently does, what the spec
says it should do, and whether we want to permit a bit more for
legacy-support reasons (for this to check: GCC's testcases + older
OpenMP specifications). The topic relates to need_device_{addr,ptr},
use_device_{addr,ptr}, is_device_ptr and has_device_addr in C++, only.

Tobias
OpenMP: Enable has_device_addr clause for 'dispatch' in C/C++

The 'has_device_addr' of 'dispatch' has to be seen in conjunction with the
'need_device_addr' modifier to the 'adjust_args' clause of 'declare variant'.
As the latter has not yet been implemented, 'has_device_addr' has no real
affect. However, to prepare for 'need_device_addr' and as service to the user:

For C, where 'need_device_addr' is not permitted (contrary to C++ and Fortran),
a note is output when then the user tries to use it (alongside the existing
error that either 'nothing' or 'need_device_ptr' was expected).

And, on the ME side, is is lightly handled by diagnosing when for the
same argument, there is a mismatch between the variant's adjust_args
'need_device_ptr' and dispatch haveing an 'has_device_addr' (or
need_device_addr/is_device_ptr) as according to the spec, those are completely
separate.  Namely, 'dispatch' will still do the host to device pointer
conversion for a 'need_device_ptr' argument, even if it appeared in a
'has_device_addr' clause.

gcc/c/ChangeLog:

	* c-parser.cc (OMP_DISPATCH_CLAUSE_MASK): Add has_device_addr clause.
	(c_finish_omp_declare_variant): Add an 'inform' telling the user that
	'need_device_addr' is invalid for C.

gcc/cp/ChangeLog:

	* parser.cc (OMP_DISPATCH_CLAUSE_MASK): Add has_device_addr clause.

gcc/ChangeLog:

	* gimplify.cc (gimplify_call_expr): When handling OpenMP's dispatch,
	add diagnostic when there is a ptr vs. addr mismatch between
	need_device_{addr,ptr} and {is,has}_device_{ptr,addr}, respectively.

gcc/testsuite/ChangeLog:

	* c-c++-common/gomp/adjust-args-3.c: New test.
	* gcc.dg/gomp/adjust-args-2.c: New test.

 gcc/c/c-parser.cc   |  4 ++
 gcc/cp/parser.cc|  1 +
 gcc/gimplify.cc | 75 --
 gcc/testsuite/c-c++-common/gomp/adjust-args-3.c | 85 +
 gcc/testsuite/gcc.dg/gomp/adjust-args-2.c   |  5 ++
 5 files changed, 152 insertions(+), 18 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 4ec0ee85ac4..d0235809fb3 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -25271,6 +25271,7 @@ c_parser_omp_dispatch_body (c_parser *parser)
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEPEND)   \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NOVARIANTS)   \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NOCONTEXT)\
+   | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR)  \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_INTEROP)  \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR)\
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NOWAIT))
@@ -26963,6 +26964,9 @@ c_finish_omp_declare_variant (c_parser *parser, tree fndecl, tree parms)
 		{
 		  error_at (c_parser_peek_token (parser)->location,
 			"expected % or %");
+		  if (strcmp (p, "need_device_addr") == 0)
+		inform (c_parser_peek_token (parser)->location,
+			"% is not valid for C");
 		  goto fail;
 		}
 	}
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 8a3472a4b34..15a5253b50d 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -49924,6 +49924,7 @@ cp_parser_omp_dispatch_body (cp_parser *parser)
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEPEND)   \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NOVARIANTS)   \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NOCONTEXT)\
+   | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR)  \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_INTEROP)  \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR)\
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NOWAIT))
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 8495c45eddb..dd0d992a958 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -4124,27 +4124,39 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pr

[PATCH v2 0/5] New Asm Constraints and Modifiers - RVC, Raw Encodings, Pairs

2024-12-12 Thread Kito Cheng

This patch set implements the proposal from riscv-c-api-doc[1].
It adds two constraints and one modifier with the goal of improving the user
experience for `.insn`, making it easier for users to experiment with new ISA
extensions.

A quick summary of this patch set:

- Add R constraint for even-odd pairs of general-purpose registers.
- Add cr and cf constraints for RVC-compatible registers.
- Add N modifier for the raw encoding of a register.

The c constraint and N modifier were already in use before, but only internally.
Therefore, I believe it is safe to rename them for broader use.

[1] https://github.com/riscv-non-isa/riscv-c-api-doc/pull/92

Changes in v2:
- Adjsut riscv_regno_to_class, riscv_secondary_memory_needed and
  riscv_register_move_cost for new register class, also I've make sure
  the cost model isn't change.


Kito Cheng (5):
  RISC-V: Rename constraint c0* to k0*
  RISC-V: Add cr and cf constraint
  RISC-V: Rename internal operand modifier N to n
  RISC-V: Implment N modifier for printing the register number rather
than the register name
  RISC-V: Add new constraint R for register even-odd pairs

 gcc/config/riscv/constraints.md   |  20 +-
 gcc/config/riscv/corev.md |  14 +-
 gcc/config/riscv/riscv-v.cc   |   2 +-
 gcc/config/riscv/riscv.cc |  55 ++-
 gcc/config/riscv/riscv.h  |   6 +
 gcc/config/riscv/riscv.md |   2 +-
 gcc/config/riscv/vector-iterators.md  | 444 +-
 gcc/doc/extend.texi   |   1 +
 gcc/doc/md.texi   |  10 +
 gcc/testsuite/gcc.target/riscv/constraint-R.c |  23 +
 .../gcc.target/riscv/constraint-cf-zfinx.c|  14 +
 .../gcc.target/riscv/constraint-cf.c  |  14 +
 .../gcc.target/riscv/constraint-cr.c  |  13 +
 .../gcc.target/riscv/modifier-N-fpr.c |  16 +
 .../gcc.target/riscv/modifier-N-vr.c  |  18 +
 gcc/testsuite/gcc.target/riscv/modifier-N.c   |  16 +
 16 files changed, 419 insertions(+), 249 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/constraint-R.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/constraint-cf-zfinx.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/constraint-cf.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/constraint-cr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/modifier-N-fpr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/modifier-N-vr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/modifier-N.c

-- 
2.34.1

Re: [PATCH v2 1/5] testsuite: arm: Use effective-target for pr56184.C and pr59985.C

2024-12-12 Thread Torbjorn SVENSSON





On 2024-12-04 12:53, Richard Earnshaw (lists) wrote:

On 21/11/2024 14:24, Torbjörn SVENSSON wrote:

Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* g++.dg/other/pr56184.C: Use effective-target
arm_arch_v7a_neon_thumb.
* g++.dg/other/pr59985.C: Use effective-target
arm_arch_v7a_fp_hard.
* lib/target-supports.exp: Define effective-target
arm_arch_v7a_fp_hard, arm_arch_v7a_neon_thumb

Signed-off-by: Torbjörn SVENSSON 


OK with Christophe's nit fixed.

R.



Pushed as r15-6161-g68b4d6bda82.

Kind regards,
Torbjörn

Re: [PATCH v2] testsuite: arm: Check that a far jump is used in thumb1-far-jump-2.c

2024-12-12 Thread Torbjorn SVENSSON





On 2024-12-04 12:41, Richard Earnshaw (lists) wrote:

On 22/11/2024 09:37, Torbjörn SVENSSON wrote:

Changes since v1:

- Rewrote the padding instructions in the macro to instead write to volatile
   memory. This ensures that every expansion of the base macro is exactly 2
   bytes.

If the `GO()` in f3 is removed, the generated assembly would be reduced to:

f3:
 @ args = 0, pretend = 0, frame = 0
 @ frame_needed = 0, uses_anonymous_args = 0
 push{lr}
 cmp r0, #0
 bne .LCB7
 bl  .L1 @far jump
.LCB7:
 movsr2, #1
 ldr r3, .L6
 str r2, [r3]
 ...
 str r2, [r3]
.L1:
 @ sp needed
 pop {pc}

Would this assembly be as stable as with the `GO()` in f3? If so, would it be
preferred to generate the simpler assembly in the test?

Ok for trunk as it is or perhaps with the simpler assembly?

--

With the changes in r15-1579-g792f97b44ff, the code used as "padding" in
the test case is optimized way. Prevent this optimization by forcing a
read of the volatile memory.
Also, validate that there is a far jump in the generated assembler.

Without this patch, the generated assembler is reduced to:
f3:
 cmp r0, #0
 beq .L1
 ldr r4, .L6
.L1:
 bx  lr
.L7:
 .align  2
.L6:
 .word   g_0_1

With the patch, the generated assembler is:
f3:
 movsr2, #1
 ldr r3, .L6
 push{lr}
 str r2, [r3]
 cmp r0, #0
 bne .LCB10
 bl  .L1 @far jump
.LCB10:
 b   .L7
.L8:
 .align  2
.L6:
 .word   .LANCHOR0
.L7:
 str r2, [r3]
 ...
 str r2, [r3]
.L1:
 pop {pc}

gcc/testsuite/ChangeLog:

* gcc.target/arm/thumb1-far-jump-2.c: Write to volatile memmory
in macro to avoid optimization.

Signed-off-by: Torbjörn SVENSSON 
---
  .../gcc.target/arm/thumb1-far-jump-2.c| 95 ++-
  1 file changed, 51 insertions(+), 44 deletions(-)



OK.

R.



Pushed as r15-6166-gb7e11b49992.

Kind regards,
Torbjörn

Re: [Ping^2][PATCH v2 00/12] AArch64/OpenMP: Test SVE ACLE types with various OpenMP constructs.

2024-12-12 Thread Tejas Belagod


Ping^2

Thanks,
Tejas.

On 11/4/24 10:06 AM, Tejas Belagod wrote:

Ping.

Thanks,
Tejas.

On 10/18/24 11:59 AM, Tejas Belagod wrote:

Hi Jakub,

Just wanted to add that I'm sorry for the delay in respinning the 
patchset - I was caught up with another piece of work. Thanks for the 
reviews so far and thank you for your patience.


Thanks,
Tejas.

On 10/18/24 11:52 AM, Tejas Belagod wrote:
The following patch series is reworked from its first version based 
on Jakub's

review comments in
   https://gcc.gnu.org/pipermail/gcc-patches/2024-August/659540.html

The changes in v2:

1. Moved all execute tests to under 
libgomp/testsuite/libgomp.target/aarch64/.

2. Retained gcc/testsuite/gcc.target/aarch64/sve/omp/ for compile tests.
3. Handled offloading SVE types differently based on sizeless and 
fixed-size

    types.  Also added more tests to check for VLA and VLS types.
4. Made tests more representative of real-world scenarios.
5. Converted some compile tests to execute tests.
6. For user-defined reductions, I have removed task and taskloop 
tests for now.
    I need to understand the constructs better before adding 
meaningful tests.
7. One known fail where declare simd uniform clones a function to a 
variant
    to support a particular type in the clause.  This fails on SVE 
with a decl

    without prototype error. It is unclear how this ought to be handled.
    I went ahead and posted the rest of the series as I didn't want 
this issue

    to block the rest of the patches.

The following patch series handles various scenarios with OpenMP and 
SVE types.
The starting point for the series follows a suggestion from Jakub to 
cover all
the possible scenarios that could arise when OMP constructs/clauses 
etc are
used with SVE ACLE types. Here are a few instances that this patch 
series test
and in some cases fixes the expected output.  This patch series does 
not follow
a formal definition or a spec of how OMP interacts with SVE ACLE 
types, so its

more of a proposed behaviour.  Comments and discussion welcome.

This list is not exhaustive, but covers most scenarios of how SVE 
ACLE types

ought to interact with OMP constructs/clauses.

1. Poly-int structures that represent variable-sized objects and OMP 
runtime.


Currently poly-int type structures are passed by value to OpenMP runtime
functions for shared clauses etc.  This patch improves on this by 
passing

around poly-int structures by address to avoid copy-overhead.

2. SVE ACLE types in OMP Shared clauses.

We test the behaviour where SVE ACLE type objects are shared in the 
following

methods into an OMP region:
   a. Explicit Shared clause on SVE ACLE type objects.
   b. Implicit shared clause.
   c. Implicit shared with default clause.
   d. SVE ALCE types in the presence of predetemined (static) shared 
objects.


The associated tests ensure that all such shared objects are passed 
by address

into the OMP runtime.  There are runtime tests to verify the functional
correctness of the change.

3. [tree] Add function to strip pointer type and get down to the 
actual pointee type.


Adds a support function in tree.h to strip pointer types to drill 
down to the pointee

type.

4. Offloading and SVE ACLE types.

The target clause in OpenMP is used to offload loop kernels to 
accelarator
peripeherals.  target's 'map' clause is used to move data from and to 
the
accelarator.  When the data is sizeless SVE type, it may be 
unsuitable due to

various reasons i.e. the two SVE targets may not agree on vector size or
some targets don't support variable vector size.  This makes sizeless 
SVE types
unsuitable for use in OMP's 'map' clause.  We diagnose all such cases 
and issue

errors where appropriate.  The cases we cover in this patch are:

   a. Implicitly-mapped SVE ACLE types in OMP target regions are 
diagnosed.
   b. Explicitly-mapped SVE ACLE types in OMP target regions using 
map clause

  are diagnosed.
   c. Explicilty-mapped SVLE ACLE types of various directions - to, 
from, tofrom

  in the map clause are diagnosed.
   d. target enter and exit data clauses with map on SVE ACLE types are
  diagnosed.
   e. target data map with alloc on SVE ACLE types are diagnosed.
   f. target update from clause on SVE ACLE types are diagnosed.
   g. target private firstprivate with SVE ACLE types are diagnosed.
   h. All combinations of target with work-sharing constructs like 
parallel,
  loop, simd, teams, distribute etc are also diagnosed when SVE 
ACLE types

  are involved.

For a fixed size SVE vector types(eg. fixed by arm_sve_vector_bits 
attribute),
we don't diagnose.  Fixed size vectors are allowed to be used in OMP 
offloading
constructs and clauses.  The only caveat is that LTO streamers that 
handle
streaming in the offloaded bytecode is expected to check for matching 
vector

size and diagnose as the attribute sizes are also streamed out.

5. Lastprivate and SVE ACLE types.

Various OpenMP lastprivate clause scenarios with SVE

Re: [patch] Allow target to chose address-space for artificial rodata lookup tables.

2024-12-12 Thread Georg-Johann Lay


For the avr.cc part, the __flashx named address space has been approved,
which means that the natural choice in the target hook would be
ADDR_SPACE_FLASHX instead of ADDR_SPACE_MEMX:


+/* Implement `TARGET_ADDR_SPACE_FOR_ARTIFICIAL_RODATA'.  */
+
+static addr_space_t
+avr_addr_space_for_artificial_rodata (tree /*type*/,
+ artificial_rodata /*kind*/)
+{
+  return avr_rodata_in_flash_p ()
+? ADDR_SPACE_GENERIC
+: avropt_n_flash > 1 ? ADDR_SPACE_MEMX : ADDR_SPACE_FLASH;
+}
+


Johann

Am 09.12.24 um 15:13 schrieb Georg-Johann Lay:

This patch adds a new target hook that allows to chose
a non-generic named address-space for compiler generated
lookup tables.

The purpose is that there are cases (on avr namely) where
the generic address space is sub-optimal because it must
put .rodata in RAM.  With this hook it is possible to
chose an address space that's better suited, like the
__flash address space that allocates to .progmem.data which
resides in flash.

The patch passes without regressions on avr.

On x86_64, it bootstraps and tests without regressions.

Ok for trunk?

Johann

p.s.  The feature has been discussed in the lists before,
and in all discussions I failed in getting across why a
different address space is needed.  Some background:

1) On AVR, you cannot just put data in a different section
without also using different instructions to access it.
In general a different section also requires different
address registers and different addressing modes and
different instructions.

2) It is *not* possible to do this during linker relaxation.
You cannot just change register allocation and address registers
in the linker.  You cannot just replace a 16-bit register like
X or Y by a 24-bit address that lives in Z (lower 16 bits) and
in some SFR (upper 8 bits).

3) You cannot just put all static storage read-only data into
a different address space.  For example, it is perfectly fine
for a C/C++ code to define a variable in static storage and
access it in assembly code.  The assembly code must know the
address space of the symbol, or otherwise the code is wrong.

4) From 3) it follows that you can only change the address space
of an object when it is hidden from the user, i.e. the compiler
is building the object and has control over all accesses, and
there's no way the user can get a reference to the object.

To date, there are only 2 lookup tables generated by GCC that
fit these criteria:

A) CSWTCH tables from tree-switch-conversion.cc.

B) crc_table_for_* tables from gimple-crc-optimization.cc.

Though B) may increase the code size by quite a lot.  For example,
size of gcc.dg/torture/crc-2.c will increase by more than 1500%
(and even more when a 24-bit address-space is required).  The
CRC optimizations uses some builtin magic, so it's unclear where
and how to introduce a different address space.

--

Allow target to chose address-space for artificial rodata.

gcc/
 * coretypes.h (enum artificial_rodata): New enum type.
 * doc/tm.texi: Rebuild.
 * doc/tm.texi.in (TARGET_ADDR_SPACE_FOR_ARTIFICIAL_RODATA):
 New hook.
 * target.def (addr_sapce.for_artificial_rodata): New DEFHOOK.
 * targhooks.cc (default_addr_space_convert): New function.
 * targhooks.h (default_addr_space_convert): New prototype.
 * tree-switch-conversion.cc (build_one_array) :
 Set type_quals address-space according to
 targetm.addr_space.for_artificial_rodata().

 * config/avr/avr.cc (avr_rodata_in_flash_p): Move up.
 (TARGET_ADDR_SPACE_FOR_ARTIFICIAL_RODATA): Define to...
 (avr_addr_space_for_artificial_rodata): ...this new function.

Re: [PATCH] testsuite: arm: Update expected RTL for reg_equal_test.c test

2024-12-12 Thread Richard Earnshaw (lists)


On 10/11/2024 13:38, Torbjörn SVENSSON wrote:

Hi Richard,

I'm not sure if I'm doing something wrong here, or if it was an oversight
when doing the update in r12-8108-g62082d278d1.
Anyway, the commit message suggest that it's only the constant that is of
interrest, so I updated the test to only check the constant. Do you think
this is enough, or is should the test case also verify that it's used in
a "set" expression?

Ok for trunk and releases/gcc-14?

--

The test case was re-writtend in r12-8108-g62082d278d1, but the expected
RTL was not updated.

The diff for the generated reg_equal_test.c.*r.expand files produced by
r12-8108-g62082d278d1 and r15-5047-g7e1d9f58858 is:

--- reg_equal_test.c.253r.expand-r12-8108-g62082d278d1  2024-11-10 
14:24:54.957438394 +0100
+++ reg_equal_test.c.268r.expand-r15-5047-g7e1d9f58858  2024-11-10 
14:30:13.633437178 +0100
@@ -1,5 +1,5 @@

-;; Function x (x, funcdef_no=0, decl_uid=4195, cgraph_uid=1, symbol_order=0)
+;; Function x (x, funcdef_no=0, decl_uid=4590, cgraph_uid=1, symbol_order=0)

  ;; Generating RTL for gimple basic block 2
@@ -25,6 +25,6 @@
  (note 1 0 3 NOTE_INSN_DELETED)
  (note 3 1 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
-(insn 5 2 0 2 (set (reg/v:SI 113 [ d ])
+(insn 5 2 0 2 (set (reg/v:SI 114 [ d ])
  (const_int -942519458 [0xc7d24b5e])) -1
   (nil))



That's not what I see if I compile with "-march=armv8-a -mthumb".  I get 
the reg_equal note that I expect and the insn is something like:


(insn 6 5 0 2 (set (zero_extract:SI (reg/v:SI 114 [ d ])
(const_int 16 [0x10])
(const_int 16 [0x10]))
(const_int 51154 [0xc7d2])) -1
 (expr_list:REG_EQUAL (const_int -942519458 [0xc7d24b5e])
(nil)))

Can you tell me the exact options you were using to get your output?

R.


In both versions, the constant is simply assigned, thus I updated the
expected RTL accordingly.

gcc/testsuite/ChangeLog:

* gcc.target/arm/reg_equal_test.c: Update expected RTL.

Signed-off-by: Torbjörn SVENSSON 
---
  gcc/testsuite/gcc.target/arm/reg_equal_test.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/reg_equal_test.c 
b/gcc/testsuite/gcc.target/arm/reg_equal_test.c
index d87c75cc27c..4337e3f0af5 100644
--- a/gcc/testsuite/gcc.target/arm/reg_equal_test.c
+++ b/gcc/testsuite/gcc.target/arm/reg_equal_test.c
@@ -12,4 +12,4 @@ x ()
return;
  }
  
-/* { dg-final { scan-rtl-dump "expr_list:REG_EQUAL \\(const_int -942519458" "expand" } } */

+/* { dg-final { scan-rtl-dump "\\(const_int -942519458" "expand" } } */

Re: [PATCH] testsuite: arm: Update expected assembler for pr43920-2.c test

2024-12-12 Thread Richard Earnshaw (lists)


On 12/12/2024 13:47, Torbjorn SVENSSON wrote:



On 2024-12-12 12:02, Richard Earnshaw (lists) wrote:

On 10/11/2024 10:02, Torbjörn SVENSSON wrote:

Ok for trunk, releases/gcc-12, releases/gcc-13 and releases/gcc-14?

--

In version 6-2017-q1-update of the "GNU Arm Embedded Toolchain" build,
there are 2 pop instructions. In version 7-2018-q2-update, the next
version that still have a binary build available on launchpad, there is
only a single pop instruction.
When I try to build vanilla GCC in the same version range, I always end
up with a single pop instruciton.

Since r12-5301-g04520645038, the generated assembler contains one more
registry move, and it's requested in PR103298 to allow it.


Is that reference correct?  Which comment are you referring to if so? 
That PR is about switch table optimizations.


Now I'm confused. PR103298 is about regressions introduced in r12-5301, 
right?


Ah, I see.  The pr was referring to two tests:

gcc.target/arm/pr42093.c  which is the jump tables test and

gcc.target/arm/pr43920-2.c which is the test you are working on.

the digits in those two PR numbers are identical, but in a different 
order.  When I looked earlier I hadn't noticed that and followed the 
link to the wrong PR.  My apologies for the confusion that caused.


Now back to the right testcase.  This test is horrible: it seems to be 
trying to test for a specific transformation not being performed by 
counting the number of return instructions and the number of beq 
instructions.  But neither of these is likely to be very stable given 
the way the jump threading optimizations work.


I wonder if it would be better here just to scan for the absence of 
{cmp,mov,orr,and}{eq,ne} in the code (ie for instructions that suggest 
the use of LOGICAL_OP_SHORT_CIRCUIT).


Increasing the length here is OK, but even that is clearly not 
guaranteed, hence the skip of that test on fdpic targets, it would be 
equally good here to just drop that.


R.




Anyway, I was referring to the comment 3 from Richard Biener:

"Again, ARM folks - please allow for the extra reg copy."

So, the size increase is due to an extra reg copy and the reduction in 
number of pops is that there is only a single pop and I see no obvious 
reason why there would possibly need to be a 2nd one.


Kind regards,
Torbjörn



R.



gcc/testsuite/ChangeLog:

PR testsuite/103298
* gcc.target/arm/pr43920-2.c: Increase allowed text size and
lower number of expected pop instructions.

Signed-off-by: Torbjörn SVENSSON 
---
  gcc/testsuite/gcc.target/arm/pr43920-2.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr43920-2.c b/gcc/ 
testsuite/ gcc.target/arm/pr43920-2.c

index c367d6bc15d..80cc0b7d260 100644
--- a/gcc/testsuite/gcc.target/arm/pr43920-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr43920-2.c
@@ -27,6 +27,6 @@ int getFileStartAndLength (int fd, int *start_, 
size_t *length_)

    return 0;
  }
-/* { dg-final { scan-assembler-times "pop" 2 } } */
+/* { dg-final { scan-assembler-times "pop" 1 } } */
  /* { dg-final { scan-assembler-times "beq" 3 } } */
-/* { dg-final { object-size text <= 54 { target { ! arm*-*- 
uclinuxfdpiceabi } } } } */
+/* { dg-final { object-size text <= 56 { target { ! arm*-*- 
uclinuxfdpiceabi } } } } */

Re: [PATCH v2 5/5] testsuite: arm: Use effective-target for pr96939 test

2024-12-12 Thread Torbjorn SVENSSON





On 2024-12-04 19:54, Richard Earnshaw (lists) wrote:

On 21/11/2024 14:24, Torbjörn SVENSSON wrote:

Update test case to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/lto/pr96939_0.c: Use effective-target
arm_arch_v8a.
* gcc.target/arm/lto/pr96939_1.c: Remove dg-options.

Signed-off-by: Torbjörn SVENSSON 


OK

R.



Pushed as r15-6165-g95a0c716127.

Kind regards,
Torbjörn

Re: [PATCH v2 4/5] testsuite: arm: Use effective-target for its.c test [PR94531]

2024-12-12 Thread Torbjorn SVENSSON





On 2024-12-04 19:52, Richard Earnshaw (lists) wrote:

On 21/11/2024 14:24, Torbjörn SVENSSON wrote:

The test case gcc.target/arm/its.c was created together with restriction
of IT blocks for Cortex-M7. As the test case fails on all tunes that
does not match Cortex-M7, explicitly test it for Cortex-M7. To have some
additional faith that GCC does the correct thing, I also added another
variant of the test for Cortex-M3 that should allow longer IT blocks.

gcc/testsuite/ChangeLog:

PR testsuite/94531
* gcc.target/arm/its.c: Removed.
* gcc.target/arm/its-1.c: Copy of gcc.target/arm/its.c. Use
effective-target arm_cpu_cortex_m7.
* gcc.target/arm/its-2.c: Copy of gcc.target/arm/its.c. Use
effective-target arm_cpu_cortex_m3.

Signed-off-by: Torbjörn SVENSSON 


OK

R.



Pushed as r15-6164-gbdf75257aad.

Kind regards,
Torbjörn

Re: [PATCH 4/7 v3] lto: Implement ltrans cache

2024-12-12 Thread Jan Hubicka

> gcc/ChangeLog:
> 
>   * Makefile.in: Add lto-ltrans-cache.o.
>   * lto-wrapper.cc: Use ltrans cache.
>   * lto-ltrans-cache.cc: New file.
>   * lto-ltrans-cache.h: New file.
OK.
> +
> +/* Computes checksum for given file, returns NULL_CHECKSUM if not
> +   possible.
> + */

As a formating nit, there should be "possible.  */"
> +  FILE *f_first = fopen (first_filename, "rb");
> +  if (!f_first)
> +return false;
> +
> +  FILE *f_second = fopen (second_filename, "rb");
> +  if (!f_second)
> +{
> +  fclose (f_first);
> +  return false;
> +}
> +
> +  for (;;)
> +{
> +  int c1, c2;
> +  c1 = fgetc (f_first);
> +  c2 = fgetc (f_second);

fgetc has kind of non-trivial overhead.  For non-MMAP systems 
(is Windows such?), I think allocating some buffer, say 64K
and doing fread/memcmp is probably better.
> +/* Reads next cache item from cachedata file.
> +   Adds `dir/` prefix to filenames.  */
> +static ltrans_file_cache::item*
> +read_cache_item (FILE* f, const char* dir)
> +{
> +  checksum_t checksum;
> +  uint32_t last_used;
> +
> +  if (fread (&checksum, 1, checksum.size (), f) != checksum.size ())
> +return NULL;
> +  if (fread (&last_used, sizeof (last_used), 1, f) != 1)
> +return NULL;
> +
> +  std::string input (dir);
> +  input.push_back ('/');
> +  std::string output = input; /* Copy.  */
> +
> +  int c;
> +  while ((c = getc (f)))
> +{
> +  if (c == EOF)
> + return NULL;
> +  input.push_back (c);
> +}
> +  input.push_back (0);
> +  while ((c = getc (f)))
> +{
> +  if (c == EOF)
> + return NULL;
> +  output.push_back (c);
> +}
> +  output.push_back (0);
Isn't std::string always 0 terminated?

Patch is OK, but please update the fgetc based file compare.
Honza

Re: [PATCH] testsuite: arm: Update expected RTL for reg_equal_test.c test

2024-12-12 Thread Richard Earnshaw (lists)


On 12/12/2024 13:36, Torbjorn SVENSSON wrote:



On 2024-12-12 12:26, Richard Earnshaw (lists) wrote:

On 10/11/2024 13:38, Torbjörn SVENSSON wrote:

Hi Richard,

I'm not sure if I'm doing something wrong here, or if it was an 
oversight

when doing the update in r12-8108-g62082d278d1.
Anyway, the commit message suggest that it's only the constant that 
is of
interrest, so I updated the test to only check the constant. Do you 
think

this is enough, or is should the test case also verify that it's used in
a "set" expression?

Ok for trunk and releases/gcc-14?

--

The test case was re-writtend in r12-8108-g62082d278d1, but the expected
RTL was not updated.

The diff for the generated reg_equal_test.c.*r.expand files produced by
r12-8108-g62082d278d1 and r15-5047-g7e1d9f58858 is:

--- reg_equal_test.c.253r.expand-r12-8108-g62082d278d1  2024-11-10 
14:24:54.957438394 +0100
+++ reg_equal_test.c.268r.expand-r15-5047-g7e1d9f58858  2024-11-10 
14:30:13.633437178 +0100

@@ -1,5 +1,5 @@

-;; Function x (x, funcdef_no=0, decl_uid=4195, cgraph_uid=1, 
symbol_order=0)
+;; Function x (x, funcdef_no=0, decl_uid=4590, cgraph_uid=1, 
symbol_order=0)


  ;; Generating RTL for gimple basic block 2
@@ -25,6 +25,6 @@
  (note 1 0 3 NOTE_INSN_DELETED)
  (note 3 1 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
-(insn 5 2 0 2 (set (reg/v:SI 113 [ d ])
+(insn 5 2 0 2 (set (reg/v:SI 114 [ d ])
  (const_int -942519458 [0xc7d24b5e])) -1
   (nil))



That's not what I see if I compile with "-march=armv8-a -mthumb".  I 
get the reg_equal note that I expect and the insn is something like:


(insn 6 5 0 2 (set (zero_extract:SI (reg/v:SI 114 [ d ])
 (const_int 16 [0x10])
 (const_int 16 [0x10]))
 (const_int 51154 [0xc7d2])) -1
  (expr_list:REG_EQUAL (const_int -942519458 [0xc7d24b5e])
 (nil)))

Can you tell me the exact options you were using to get your output?


Hmm.. This is interesting. With Cortex-A, I do see the same output that 
you get. With Cortex-M, it's instead my output.


You can get my output with any of the Cortex-M targets (M3 or above):

This is the line that I've used
arm-none-eabi-gcc gcc.target/arm/reg_equal_test.c  -mthumb - 
march=armv8.1-m.main -mfloat-abi=soft -fgimple -O1 -fdump-rtl-expand -S 
-o /dev/null


I suppose the change I propose will match both cases, but is there any 
backside of not checking the REG_EQUAL part?

Should the test case be Cortex-A only?



I don't think so.  We'd expect the code to be using MOVW/MOVT here and 
that's what the require rules seem to be saying.  That constant can't 
really be handled by a single mov, so it looks like for your case the 
compiler is expecting this value to be spilled to a constant pool later 
on.  It might be legitimate with some costing models, but it seems a bit 
unlikely, especially when not -Os.


R.


Kind regards,
Torbjörn




R.


In both versions, the constant is simply assigned, thus I updated the
expected RTL accordingly.

gcc/testsuite/ChangeLog:

* gcc.target/arm/reg_equal_test.c: Update expected RTL.

Signed-off-by: Torbjörn SVENSSON 
---
  gcc/testsuite/gcc.target/arm/reg_equal_test.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/reg_equal_test.c b/gcc/ 
testsuite/gcc.target/arm/reg_equal_test.c

index d87c75cc27c..4337e3f0af5 100644
--- a/gcc/testsuite/gcc.target/arm/reg_equal_test.c
+++ b/gcc/testsuite/gcc.target/arm/reg_equal_test.c
@@ -12,4 +12,4 @@ x ()
    return;
  }
-/* { dg-final { scan-rtl-dump "expr_list:REG_EQUAL \\(const_int 
-942519458" "expand" } } */

+/* { dg-final { scan-rtl-dump "\\(const_int -942519458" "expand" } } */

[PATCH] c++: Only prune capture proxies for constant variables at instantiation time [PR114292]

2024-12-12 Thread Simon Martin

We currently ICE upon the following valid (under -Wno-vla) code

=== cut here ===
void f(int c) {
  constexpr int r = 4;
  [&](auto) { int t[r * c]; }(0);
}
=== cut here ===

The problem is that when parsing the lambda body, and more specifically
the multiplication, we mark the lambda as LAMBDA_EXPR_CAPTURE_OPTIMIZED
even though the replacement of r by 4 is "undone" by the call to
build_min_non_dep in build_x_binary_op. This makes prune_lambda_captures
remove the proxy declaration while it should not, and we trip on an
assert at instantiation time.

This patch fixes the ICE by making sure that lambdas are only marked as
LAMBDA_EXPR_CAPTURE_OPTIMIZED when they're instantiated (I tried other
strategies like not undoing constant folding in build_min_non_dep, but
it is pretty intrusive and breaks lots of things).

The test I added also shows that we don't always optimize out captures
to constants for lambdas that are not within a template (see ok_2 for
example, or ok_3 that unlike ok_2 "regresses" a bit with my patch) - I'm
curious if we consider it a problem or not? If so, I can try to fix this
in a follow-up patch.

Successfully tested on x86_64-pc-linux-gnu.

PR c++/114292

gcc/cp/ChangeLog:

* cp-tree.h (mark_const_var_capture_optimized): Declare.
* expr.cc (mark_use): Call mark_const_var_capture_optimized.
* lambda.cc (mark_const_var_capture_optimized): New. Only set
LAMBDA_EXPR_CAPTURE_OPTIMIZED at lambda instantiation time.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/lambda-ice4.C: New test.

---
 gcc/cp/cp-tree.h |  1 +
 gcc/cp/expr.cc   | 10 ++
 gcc/cp/lambda.cc | 13 +++
 gcc/testsuite/g++.dg/cpp1y/lambda-ice4.C | 44 
 4 files changed, 60 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-ice4.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index c5e0fc5c440..ce050032fdb 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -8058,6 +8058,7 @@ extern bool is_constant_capture_proxy   (tree);
 extern void register_capture_members   (tree);
 extern tree lambda_expr_this_capture(tree, int);
 extern void maybe_generic_this_capture (tree, tree);
+extern void mark_const_var_capture_optimized   (void);
 extern tree maybe_resolve_dummy(tree, bool);
 extern tree current_nonlambda_function (void);
 extern tree nonlambda_method_basetype  (void);
diff --git a/gcc/cp/expr.cc b/gcc/cp/expr.cc
index de4991e616c..d6a2454c46e 100644
--- a/gcc/cp/expr.cc
+++ b/gcc/cp/expr.cc
@@ -120,10 +120,7 @@ mark_use (tree expr, bool rvalue_p, bool read_p,
{
  tree val = RECUR (cap);
  if (!is_capture_proxy (val))
-   {
- tree l = current_lambda_expr ();
- LAMBDA_EXPR_CAPTURE_OPTIMIZED (l) = true;
-   }
+   mark_const_var_capture_optimized ();
  return val;
}
}
@@ -171,10 +168,7 @@ mark_use (tree expr, bool rvalue_p, bool read_p,
{
  tree val = RECUR (cap);
  if (!is_capture_proxy (val))
-   {
- tree l = current_lambda_expr ();
- LAMBDA_EXPR_CAPTURE_OPTIMIZED (l) = true;
-   }
+   mark_const_var_capture_optimized ();
  return val;
}
}
diff --git a/gcc/cp/lambda.cc b/gcc/cp/lambda.cc
index d8a15d97d5d..4fd3b39c99b 100644
--- a/gcc/cp/lambda.cc
+++ b/gcc/cp/lambda.cc
@@ -945,6 +945,19 @@ resolvable_dummy_lambda (tree object)
   return NULL_TREE;
 }
 
+/* Called when optimizing out a capture to a const variable.  */
+
+void
+mark_const_var_capture_optimized ()
+{
+  /* The actual optimizing out only occurs when instantiating the lambda.  */
+  if (processing_template_decl)
+return;
+
+  tree l = current_lambda_expr ();
+  LAMBDA_EXPR_CAPTURE_OPTIMIZED (l) = true;
+}
+
 /* We don't want to capture 'this' until we know we need it, i.e. after
overload resolution has chosen a non-static member function.  At that
point we call this function to turn a dummy object into a use of the
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-ice4.C 
b/gcc/testsuite/g++.dg/cpp1y/lambda-ice4.C
new file mode 100644
index 000..fe8df52b827
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-ice4.C
@@ -0,0 +1,44 @@
+// PR c++/114292
+// { dg-do "compile" { target c++14 } }
+// { dg-additional-options "-Wno-vla" }
+
+#define ASSERT_CAPTURE_NUMBER(Lambda, NumCaptures) \
+  { \
+auto oneCapture = [&](auto) { int t[c]; }; \
+const auto sizeOneCapture = sizeof (oneCapture); \
+const auto expected = NumCaptures ? NumCaptures * sizeOneCapture : 1; \
+static_assert (sizeof (Lambda) == expected, ""); \
+  }
+
+void foo (int c)
+{
+  constexpr int r = 4;
+
+  /

[PATCH] c++: ICE in TARGET_EXPR evaluation in cp_fold_r [PR117980]

2024-12-12 Thread Marek Polacek

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
This ICE started with the recent prvalue optimization (r15-6052).  In
cp_fold_r we have:

  if (tree &init = TARGET_EXPR_INITIAL (stmt))
{
  cp_walk_tree (&init, cp_fold_r, data, NULL);
  // ...
  tree folded = maybe_constant_init (init, TARGET_EXPR_SLOT (stmt));

What can happen here is that originally the TARGET_EXPR is:

TARGET_EXPR >>
  &TARGET_EXPR }> 

but after the first cp_walk_tree we fold the D.2707 TARGET_EXPR into:

TARGET_EXPR  

and then we pass the EXPR_STMT to maybe_constant_init, with D.2707 as
the object.  But their types don't match anymore, so we crash.  We'd
have to pass D.2707.it as the object for it to work.

But I don't think we need to pass any object to maybe_constant_init;
it'll grab the appropriate one itself.

constexpr-prvalue3.C is reduced from a large std::ranges libstdc++ test.

PR c++/117980

gcc/cp/ChangeLog:

* cp-gimplify.cc (cp_fold_r) : Don't pass an object
to maybe_constant_init.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/constexpr-prvalue2.C: New test.
* g++.dg/cpp0x/constexpr-prvalue3.C: New test.
---
 gcc/cp/cp-gimplify.cc |  2 +-
 .../g++.dg/cpp0x/constexpr-prvalue2.C | 15 +++
 .../g++.dg/cpp0x/constexpr-prvalue3.C | 26 +++
 3 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue2.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue3.C

diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc
index 623e2ee6e96..a861331c7c7 100644
--- a/gcc/cp/cp-gimplify.cc
+++ b/gcc/cp/cp-gimplify.cc
@@ -1477,7 +1477,7 @@ cp_fold_r (tree *stmt_p, int *walk_subtrees, void *data_)
  *walk_subtrees = 0;
  if (!flag_no_inline)
{
- tree folded = maybe_constant_init (init, TARGET_EXPR_SLOT (stmt));
+ tree folded = maybe_constant_init (init);
  if (folded != init && TREE_CONSTANT (folded))
init = folded;
}
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue2.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue2.C
new file mode 100644
index 000..46053231cf8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue2.C
@@ -0,0 +1,15 @@
+// PR c++/117980
+// { dg-do compile { target c++11 } }
+// { dg-options "-O" }
+
+struct S {
+  constexpr S(S &); // { dg-warning "used but never defined" }
+  ~S();
+};
+struct B {
+  S s;
+};
+struct A {
+  B b;
+};
+void fn(B b) { A{b}; }
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue3.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue3.C
new file mode 100644
index 000..a2eb12c02d7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue3.C
@@ -0,0 +1,26 @@
+// PR c++/117980
+// { dg-do compile { target c++11 } }
+// { dg-options "-O" }
+
+struct _Safe_iterator  {
+ _Safe_iterator();
+  ~_Safe_iterator();
+};
+template 
+struct vector {
+  vector(int) {}
+  constexpr _Safe_iterator end() {
+return _Safe_iterator();
+  }
+};
+template  struct sentinel {
+  It it;
+};
+template 
+struct subrange {
+  subrange(sentinel<_Safe_iterator>) {}
+};
+void test01() {
+  vector v{0};
+  subrange>{sentinel<_Safe_iterator>{v.end()}};
+}

base-commit: 2cbb2408a830a63fbd901a4da3bfd341cec4b6ef
-- 
2.47.1

[PATCH] c++: current inst w/ indirect dependent bases [PR117993]

2024-12-12 Thread Patrick Palka

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?  The older regression does not seem worth fixing.

-- >8 --

In the first testcase we're overeagerly diagnosing qualified name lookup
failure for f from the current instantiation B::C ahead of time
because we (correctly) deem C to not have any direct dependent bases:
its only base is B which is part of the current instantiation and
therefore not a dependent base, and we decide it's safe to diagnose name
lookup failure ahead of time.

But this testcase demonstrates it's not enough to consider only direct
dependent bases: f is defined in A which is a dependent base of
B, so qualified name lookup won't search it ahead of time and in
turn name lookup won't be exhaustive, and so it's wrong to diagnose
lookup failure ahead of time.  This ultimately suggests that
any_dependent_bases_p needs to consider indirect bases as well.

It seems sufficient to recurse into any !BINFO_DEPENDENT_BASE_P base
since the recursive call will exit early for non-dependent types.
So effectively we'll only recurse into bases belonging to the current
instantiation.

I considered making only dependentish_scope_p consider indirect
dependent bases, but it seems other any_dependent_bases_p callers
also want this behavior, e.g. build_new_method_call for benefit of
the second testcase (which is an even older regression since GCC 7).

PR c++/117993

gcc/cp/ChangeLog:

* search.cc (any_dependent_bases_p): Recurse into bases (of
dependent type) that are not BINFO_DEPENDENT_BASE_P.  Document
default argument.

gcc/testsuite/ChangeLog:

* g++.dg/template/dependent-base4.C: New test.
* g++.dg/template/dependent-base5.C: New test.
---
 gcc/cp/search.cc  |  5 ++--
 .../g++.dg/template/dependent-base4.C | 23 +++
 .../g++.dg/template/dependent-base5.C | 22 ++
 3 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/dependent-base4.C
 create mode 100644 gcc/testsuite/g++.dg/template/dependent-base5.C

diff --git a/gcc/cp/search.cc b/gcc/cp/search.cc
index d438e18b74b..8a65dd3b68e 100644
--- a/gcc/cp/search.cc
+++ b/gcc/cp/search.cc
@@ -2843,7 +2843,7 @@ original_binfo (tree binfo, tree here)
TYPE).  */
 
 bool
-any_dependent_bases_p (tree type)
+any_dependent_bases_p (tree type /* = current_nonlambda_class_type () */)
 {
   if (!type || !CLASS_TYPE_P (type) || !uses_template_parms (type))
 return false;
@@ -2858,7 +2858,8 @@ any_dependent_bases_p (tree type)
   unsigned i;
   tree base_binfo;
   FOR_EACH_VEC_SAFE_ELT (BINFO_BASE_BINFOS (TYPE_BINFO (type)), i, base_binfo)
-if (BINFO_DEPENDENT_BASE_P (base_binfo))
+if (BINFO_DEPENDENT_BASE_P (base_binfo)
+   || any_dependent_bases_p (BINFO_TYPE (base_binfo)))
   return true;
 
   return false;
diff --git a/gcc/testsuite/g++.dg/template/dependent-base4.C 
b/gcc/testsuite/g++.dg/template/dependent-base4.C
new file mode 100644
index 000..84e53b5f3fb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/dependent-base4.C
@@ -0,0 +1,23 @@
+// PR c++/117993
+
+template
+struct A {
+  void f();
+  typedef void type;
+};
+
+template
+struct B : A {
+  template struct C;
+};
+
+template
+template
+struct B::C : B {
+  void g(C& c) {
+this->f();   // { dg-bogus "member" }
+c.f();   // { dg-bogus "member" }
+C::f();  // { dg-bogus "member" }
+typename C::type* p; // { dg-bogus "not name a type" }
+  }
+};
diff --git a/gcc/testsuite/g++.dg/template/dependent-base5.C 
b/gcc/testsuite/g++.dg/template/dependent-base5.C
new file mode 100644
index 000..2e9cdaa242b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/dependent-base5.C
@@ -0,0 +1,22 @@
+template
+struct A { };
+
+template
+struct B : A {
+  template struct C;
+};
+
+struct D { void f(); };
+
+template
+template
+struct B::C : B {
+  void g() {
+D::f(); // { dg-bogus "without object" }
+  }
+};
+
+template<>
+struct A : D { };
+
+template struct B::C;
-- 
2.47.1.440.gcaacdb5dfd

[PATCH 2/2] c++: constexpr potentiality of CAST_EXPR [PR117925]

2024-12-12 Thread Patrick Palka

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
OK for trunk/14?

This fixes the testcase in the PR but doesn't thoroughly fix the
underlying issue since if we replace fnPtr with e.g. a constexpr variable
so that the callee is truly potentially constant then the ICE reappears
(due to encountering CAST_EXPR during constexpr evaluation from
maybe_warn_nodiscard), but with the previous patch this should now only
happen in checking mode.  I suspect a call to fold_non_dependent_expr is
missing, but I'm not sure where would be best to put it.

-- >8 --

We're incorrectly treating the templated callee (FnPtr)fnPtr, represented
as CAST_EXPR of TREE_LIST, as potentially constant here due to failing
to look through the TREE_LIST in the CAST_EXPR case of p_c_e_1.

PR c++/117925

gcc/cp/ChangeLog:

* constexpr.cc (potential_constant_expression_1) :
Fix check for class conversion to literal type to properly look
through the TREE_LIST operand of a CAST_EXPR.

gcc/testsuite/ChangeLog:

* g++.dg/template/non-dependent35.C: New test.
---
 gcc/cp/constexpr.cc | 11 ---
 gcc/testsuite/g++.dg/template/non-dependent35.C |  8 
 2 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/non-dependent35.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 221d57f6ffa..cda49008f81 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -10262,9 +10262,14 @@ potential_constant_expression_1 (tree t, bool 
want_rval, bool strict, bool now,
   && (dependent_type_p (TREE_TYPE (t))
   || !COMPLETE_TYPE_P (TREE_TYPE (t))
   || literal_type_p (TREE_TYPE (t)))
-  && TREE_OPERAND (t, 0))
-   {
- tree type = TREE_TYPE (TREE_OPERAND (t, 0));
+  && TREE_OPERAND (t, 0)
+  && (TREE_CODE (t) != CAST_EXPR
+  || !TREE_CHAIN (TREE_OPERAND (t, 0
+   {
+ tree from = TREE_OPERAND (t, 0);
+ if (TREE_CODE (t) == CAST_EXPR)
+   from = TREE_VALUE (from);
+ tree type = TREE_TYPE (from);
  /* If this is a dependent type, it could end up being a class
 with conversions.  */
  if (type == NULL_TREE || WILDCARD_TYPE_P (type))
diff --git a/gcc/testsuite/g++.dg/template/non-dependent35.C 
b/gcc/testsuite/g++.dg/template/non-dependent35.C
new file mode 100644
index 000..7e3ba99b023
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/non-dependent35.C
@@ -0,0 +1,8 @@
+// PR c++/117925
+
+typedef int(*FnPtr)();
+
+template
+void fnICE(void* fnPtr) {
+  ((FnPtr)fnPtr)();
+}
-- 
2.47.1.440.gcaacdb5dfd

[PATCH 1/2] c++: relax ICE for unexpected trees during constexpr [PR117925]

2024-12-12 Thread Patrick Palka

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
OK for trunk and perhpas 14?

-- >8 --

When we encounter an unexpected (likely templated) tree code during
constexpr evaluation we currently ICE even in release mode.  But it
seems more user-friendly to just gracefully treat the expression as
non-constant, which will be harmless most of the time (e.g. in the case
of warning-specific or speculative constexpr folding as in the PR), and
at worst would transform an ICE-on-valid bug into a rejects-valid bug.
This is also what e.g. tsubst_expr does when it encounters an unexpected
(likely non-templated) tree code.

PR c++/117925

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_constant_expression) :
Relax ICE when encountering an unexpected tree code into a
checking ICE guarded by flag_checking.
---
 gcc/cp/constexpr.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 55e44fcbafb..221d57f6ffa 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -8613,7 +8613,7 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
error_at (EXPR_LOCATION (t),
  "statement is not a constant expression");
}
-  else
+  else if (flag_checking)
internal_error ("unexpected expression %qE of kind %s", t,
get_tree_code_name (TREE_CODE (t)));
   *non_constant_p = true;
-- 
2.47.1.440.gcaacdb5dfd

Re: Should -fsanitize=bounds support counted-by attribute for pointers inside a structure?

2024-12-12 Thread Martin Uecker

Am Montag, dem 09.12.2024 um 16:20 + schrieb Qing Zhao:
> 
> > On Dec 7, 2024, at 03:57, Martin Uecker  wrote:
> > 
> > Am Freitag, dem 06.12.2024 um 16:13 + schrieb Qing Zhao:
> > > 
> > > > On Dec 6, 2024, at 10:56, Martin Uecker  wrote:
> > > > 
> > > > Am Freitag, dem 06.12.2024 um 14:16 + schrieb Qing Zhao:
> > > > > 
> > 
> > ...
> > 
> > > > > > 
> > > > > > I think the relevant scenario is where you assign the struct and
> > > > > > not a pointer to the struct, i.e. something like the following:
> > > > > > 
> > > > > > #include "builtin-object-size-common.h"
> > > > > > 
> > > > > > struct annotated {
> > > > > > int b;
> > > > > > int *c __attribute__ ((counted_by (b)));
> > > > > > };
> > > > > > 
> > > > > > struct annotated __attribute__((__noinline__)) setup (int 
> > > > > > attr_count)
> > > > > > {
> > > > > > struct annotated p_array_annotated;
> > > > > > p_array_annotated.c = (int *) malloc (sizeof (int) * attr_count);
> > > > > > p_array_annotated.b = attr_count;
> > > > > > 
> > > > > > return p_array_annotated;
> > > > > > }
> > > > > > 
> > > > > > 
> > > > > > int main(int argc, char *argv[])
> > > > > > {
> > > > > > struct annotated x = setup (10); 
> > > > > > int *p = x.c;
> > > > > > x = setup (20);
> > > > > > EXPECT(__builtin_dynamic_object_size (p, 1), 10 * sizeof (int));
> > > > > > EXPECT(__builtin_dynamic_object_size (x.c, 1), 20 * sizeof (int));
> > > > > > DONE ();
> > > > > > }
> > > > > > 
> > > > > 
> > > > > With the above testing case, my current implementation based on 
> > > > > .ACCESS_WITH_SIZE succeed without any issue.  -:)
> > > > > The design of .ACCESS_WITH_SIZE already resolved this issue.
> > > > 
> > > > Ok, thanks!  But I am a bit confused, because it seems it behaves 
> > > > this way also for FAMs 
> > > > 
> > > > https://godbolt.org/z/64a6z4cna
> > > 
> > > The behavior of the above testing case is exactly the additional feature 
> > > we provided for counted_by attribute for FAM:
> > > 
> > > =
> > > One important feature of the attribute is, a reference to the
> > > flexible array member field uses the latest value assigned to the
> > > field that represents the number of the elements before that
> > > reference.  For example,
> > > 
> > >p->count = val1;
> > >p->array[20] = 0;  // ref1 to p->array
> > >p->count = val2;
> > >p->array[30] = 0;  // ref2 to p->array
> > > 
> > > in the above, 'ref1' uses 'val1' as the number of the elements in
> > > 'p->array', and 'ref2' uses 'val2' as the number of elements in
> > > 'p->array’.
> > > =
> > > 
> > > So, it’s the correct behavior for the counted_by attribute for FAM based 
> > > on our previous discussion and agreement. 
> > 
> > If it is indeed that the value of p->count last stored before p->array is
> > *referenced* which counts, then everything is well.  
> 
> Yes, For FAM, every “reference” to p->array will be converted as a call to 
> (*.ACCESS_WITH_SIZE (p->array, &p->count, …))

Can you remind why we have to pass the address of p->count, i.e. &p->count 
instead of its value?

> 
> The count value for p->array is  *(&p->count), which is guaranteed to be the 
> last stored value of the address of p->count before the current reference to 
> p->array. 
> 
> Similarly, for the pointer array,  every “reference” to p->pa will be 
> converted as a call to .ACCESS_WITH_SIZE(p->pa, &p->count…). The count value 
> of the pointer array p->pa is *(&p->count), which is also guaranteed to be 
> the last stored value of the address of p->count before the current reference 
> to p->pa. 
> 
> > Somehow I thought for FAMs it is the value p->count last stored before
> > p->array is *accessed* (possibly indirectly via another pointer).  Probably
> > it was just me being confused.
> > 
> > > 
> > > However, as you pointed out, when the “counted_by” attribute is extended 
> > > to  the pointer field, this feature will be problematic.
> > > And we need to add the following additional new requirement for the 
> > > “counted_by” attribute of pointer field:
> > > 
> > > p->count and  p->array  can only be changed by changing the whole 
> > > structure at the same time.
> > 
> > Actually, I am then not even sure we need this requirement. My point was 
> > only that
> > setting the whole structure at the time should work correctly, i.e. without 
> > changing
> > the bounds for old pointers which were stored in the struct previously.  
> > With the
> > semantics  above it seems this case also works automatically.
> 
> For pointer field with counted_by attribute, if the p->count and p->pa are 
> not set together when changing the whole structure, then for example: 
> 
> struct annotated {
>   int b;
>   int *c __attribute__ ((counted_by (b)));
> };
> 
> /* note, this routine only allocate the space for the pointer array field, 
> but does NOT set the counted_by field.  */
> struct annotated __attribute__((__noinline__)) se

[PATCH] Apply lambda section attributes to static thunks

2024-12-12 Thread Campbell Suter

Each lambda that can be converted to a plain function pointer has a
thunk generated for it, which invokes the body of the lambda function.

When a section attribute is added to a lambda function, it only applies
to the body of the lambda function, and not the thunk. When a lambda is
only ever used by converting it to a function pointer, the body of the
lambda is inlined into this thunk. As a result, the section attribute
is effectively ignored: the function it applied to is gone, and the thunk
does not have the section attribute applied to it either.

This patch checks if a section attribute is present on a lambda, and
applies it to the thunk.

The motivation for this change is embedded devices where most code is
executed from flash, but code which must execute while the device is
being reprogrammed can be moved to RAM by placing it in a different
section.

This patch was tested with bootstrapping on x86-64 under WSL, and
the newly added test was also run on 32-bit ARM.

gcc/cp/ChangeLog:

* lambda.cc (maybe_add_lambda_conv_op): Don't ignore section
attributes on lambda functions which are converted to plain
function pointers.

gcc/testsuite/ChangeLog:

* g++.dg/ext/attr-section-lambda.C: New test.

Signed-off-by: Campbell Suter 
---
 gcc/cp/lambda.cc  |  8 
 .../g++.dg/ext/attr-section-lambda.C  | 42 +++
 2 files changed, 50 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-section-lambda.C

diff --git a/gcc/cp/lambda.cc b/gcc/cp/lambda.cc
index d8a15d97d..e8937cc0d 100644
--- a/gcc/cp/lambda.cc
+++ b/gcc/cp/lambda.cc
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify.h"
 #include "target.h"
 #include "decl.h"
+#include "attribs.h"
 
 /* Constructor for a lambda expression.  */
 
@@ -1376,6 +1377,13 @@ maybe_add_lambda_conv_op (tree type)
   if (generic_lambda_p)
 fn = add_inherited_template_parms (fn, DECL_TI_TEMPLATE (callop));
 
+  if (lookup_attribute ("section", DECL_ATTRIBUTES (callop)))
+{
+  duplicate_one_attribute(&DECL_ATTRIBUTES (fn),
+   DECL_ATTRIBUTES (callop), "section");
+  set_decl_section_name (statfn, callop);
+}
+
   if (flag_sanitize & SANITIZE_NULL)
 /* Don't UBsan this function; we're deliberately calling op() with a null
object argument.  */
diff --git a/gcc/testsuite/g++.dg/ext/attr-section-lambda.C 
b/gcc/testsuite/g++.dg/ext/attr-section-lambda.C
new file mode 100644
index 0..4202ea943
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/attr-section-lambda.C
@@ -0,0 +1,42 @@
+// Make sure that attributes apply to lambda functions properly, and aren't
+// broken by inlining with their static thunks.
+//
+// { dg-do compile { target { c++11 && named_sections } } }
+// { dg-options "-O2" }
+
+
+extern int i;
+
+void func_ptr_consumer (int (*)());
+
+void __attribute__((section(".outer_regular")))
+test_func_regular ()
+{
+  func_ptr_consumer ([]() __attribute__((section(".lambda_regular"))) {
+return i;
+  });
+}
+
+template
+void __attribute__((section(".outer_template"))) __attribute__((noinline))
+test_func_generic ()
+{
+  func_ptr_consumer ([]() __attribute__((section(".lambda_template"))) {
+return i + T::a;
+  });
+}
+
+struct
+A
+{
+  static constexpr int a = 0xaabbccdd;
+};
+
+template void test_func_generic();
+
+// Since we've enabled optimisations, the _FUN thunk will have absorbed the
+// lambda bodies due to inlining.
+// Thus if these sections exist, they must be from the thunk - and thus the
+// thunk has it's section set.
+// { dg-final { scan-assembler {\.(section|csect)[ \t]+"?\.lambda_regular} } }
+// { dg-final { scan-assembler {\.(section|csect)[ \t]+"?\.lambda_template} } }
-- 
2.34.1

[COMMITTED] Clean up documentation of -Wsuggest-attribute= [PR115532]

2024-12-12 Thread Sandra Loosemore

The list of -Wsuggest-attribute= variants was out of date in the option
summary (and getting too long to fit on one line), and an index entry was
missing for -Wsuggest-attribute=returns_nonnull.

gcc/c-family/ChangeLog
PR c/115532
* c.opt.urls: Regenerated.

gcc/ChangeLog
PR c/115532
* common.opt.urls: Regenerated.
* doc/invoke.texi (Option Summary): Don't try to list all the
-Wsuggest-attribute= variants inline here.
(Warning Options): Likewise.  Add @opindex for
Wsuggest-attribute=returns_nonnull and its no- form.  Remove
@itemx for no- form.

Co-Authored-By: Peter Eisentraut 
---
 gcc/c-family/c.opt.urls |  3 ---
 gcc/common.opt.urls |  6 ++
 gcc/doc/invoke.texi | 12 +++-
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/gcc/c-family/c.opt.urls b/gcc/c-family/c.opt.urls
index 8fbe3bd2698..6c08b0ae052 100644
--- a/gcc/c-family/c.opt.urls
+++ b/gcc/c-family/c.opt.urls
@@ -864,9 +864,6 @@ UrlSuffix(gcc/Warning-Options.html#index-Wno-system-headers)
 Wtemplates
 UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-Wno-templates)
 
-Wtautological-compare
-UrlSuffix(gcc/Warning-Options.html#index-Wno-tautological-compare)
-
 Wtemplate-body
 UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-Wno-template-body)
 
diff --git a/gcc/common.opt.urls b/gcc/common.opt.urls
index 773c021dd94..577e00d7a27 100644
--- a/gcc/common.opt.urls
+++ b/gcc/common.opt.urls
@@ -223,6 +223,9 @@ 
UrlSuffix(gcc/Warning-Options.html#index-Wno-suggest-attribute_003dnoreturn)
 Wsuggest-attribute=malloc
 UrlSuffix(gcc/Warning-Options.html#index-Wno-suggest-attribute_003dmalloc)
 
+Wsuggest-attribute=returns_nonnull
+UrlSuffix(gcc/Warning-Options.html#index-Wno-suggest-attribute_003dreturns_005fnonnull)
+
 Wsuggest-final-types
 UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-Wno-suggest-final-types)
 
@@ -235,6 +238,9 @@ 
UrlSuffix(gcc/Warning-Options.html#index-Wno-switch-unreachable)
 Wsystem-headers
 UrlSuffix(gcc/Warning-Options.html#index-Wno-system-headers)
 
+Wtautological-compare
+UrlSuffix(gcc/Warning-Options.html#index-Wno-tautological-compare)
+
 Wtrampolines
 UrlSuffix(gcc/Warning-Options.html#index-Wno-trampolines)
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b85084459b1..67a3c8fd91e 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -416,7 +416,7 @@ Objective-C and Objective-C++ Dialects}.
 -Wstring-compare
 -Wno-stringop-overflow -Wno-stringop-overread
 -Wno-stringop-truncation  -Wstrict-flex-arrays
--Wsuggest-attribute=@r{[}pure@r{|}const@r{|}noreturn@r{|}format@r{|}malloc@r{]}
+-Wsuggest-attribute=@var{attribute-name}
 -Wswitch  -Wno-switch-bool  -Wswitch-default  -Wswitch-enum
 -Wno-switch-outside-range  -Wno-switch-unreachable  -Wsync-nand
 -Wsystem-headers  -Wtautological-compare  -Wtrailing-whitespace
@@ -8354,9 +8354,9 @@ even without optimization.
 
 @opindex Wsuggest-attribute=
 @opindex Wno-suggest-attribute=
-@item 
-Wsuggest-attribute=@r{[}pure@r{|}const@r{|}noreturn@r{|}format@r{|}cold@r{|}malloc@r{]}returns_nonnull@r{|}
+@item -Wsuggest-attribute=@var{attribute-name}
 Warn for cases where adding an attribute may be beneficial. The
-attributes currently supported are listed below.
+@var{attribute-name}s currently supported are listed below.
 
 @table @gcctabopt
 @opindex Wsuggest-attribute=pure
@@ -8369,16 +8369,18 @@ attributes currently supported are listed below.
 @opindex Wno-missing-noreturn
 @opindex Wsuggest-attribute=malloc
 @opindex Wno-suggest-attribute=malloc
+@opindex Wsuggest-attribute=returns_nonnull
+@opindex Wno-suggest-attribute=returns_nonnull
 @item -Wsuggest-attribute=pure
 @itemx -Wsuggest-attribute=const
 @itemx -Wsuggest-attribute=noreturn
 @itemx -Wmissing-noreturn
 @itemx -Wsuggest-attribute=malloc
 @itemx -Wsuggest-attribute=returns_nonnull
-@itemx -Wno-suggest-attribute=returns_nonnull
 
 Warn about functions that might be candidates for attributes
-@code{pure}, @code{const}, @code{noreturn}, @code{malloc} or 
@code{returns_nonnull}. The compiler
+@code{pure}, @code{const}, @code{noreturn}, @code{malloc} or
+@code{returns_nonnull}.  The compiler
 only warns for functions visible in other compilation units or (in the case of
 @code{pure} and @code{const}) if it cannot prove that the function returns
 normally. A function returns normally if it doesn't contain an infinite loop or
-- 
2.25.1

1 2 >

1 - 100 of 112 matches

Mail list logo