[PATCH] Add ia64*-*-* to the list of obsolete targets

2024-02-23 Thread Richard Biener
The following deprecates ia64*-*-* for GCC 14.  Since we plan to
force LRA for GCC 15 and the target only has slim chances of getting
updated this notifies people in advance.  Given both Linux and
glibc have axed the target further development is also made difficult.

"Tested" for ia64-elf and x86_64-unknown-linux-gnu.

OK?  There's no listed ia64 maintainer to CC.

Thanks,
Richard.

gcc/
* config.cc: Add ia64*-*-* to the list of obsoleted targets.

contrib/
* config-list.mk (LIST): --enable-obsolete for ia64*-*-*.
---
 contrib/config-list.mk | 5 +++--
 gcc/config.gcc | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/contrib/config-list.mk b/contrib/config-list.mk
index 0694cc128fa..16df66f0fc6 100644
--- a/contrib/config-list.mk
+++ b/contrib/config-list.mk
@@ -60,8 +60,9 @@ LIST = \
   i686-pc-linux-gnu i686-pc-msdosdjgpp i686-lynxos i686-nto-qnx \
   i686-rtems i686-solaris2.11 i686-wrs-vxworks \
   i686-wrs-vxworksae \
-  i686-cygwinOPT-enable-threads=yes i686-mingw32crt ia64-elf \
-  ia64-linux ia64-hpux ia64-hp-vms iq2000-elf lm32-elf \
+  i686-cygwinOPT-enable-threads=yes i686-mingw32crt 
ia64-elfOPT-enable-obsolete \
+  ia64-linuxOPT-enable-obsolete ia64-hpuxOPT-enable-obsolete \
+  ia64-hp-vmsOPT-enable-obsolete iq2000-elf lm32-elf \
   lm32-rtems lm32-uclinux \
   loongarch64-linux-gnuf64 loongarch64-linux-gnuf32 loongarch64-linux-gnusf \
   m32c-elf m32r-elf m32rle-elf \
diff --git a/gcc/config.gcc b/gcc/config.gcc
index a0f9c672308..2e35a112040 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -273,6 +273,7 @@ esac
 # Obsolete configurations.
 case ${target}${target_min} in
 *-*-solaris2.11.[0-3]* \
+   | ia64*-*-* \
  )
 if test "x$enable_obsolete" != xyes; then
   echo "*** Configuration ${target}${target_min} is obsolete." >&2
-- 
2.35.3


[PATCH v1] RISC-V: Introduce gcc option mrvv-vector-bits for RVV

2024-02-23 Thread pan2 . li
From: Pan Li 

This patch would like to introduce one new gcc option for RVV. To
appoint the bits size of one RVV vector register. Valid arguments to
'-mrvv-vector-bits=' are:

* 64
* 128
* 256
* 512
* 1024
* 2048
* 4096
* 8192
* 16384
* 32768
* 65536
* scalable
* zvl

1. The scalable will be the default values which take min_vlen for
   the riscv_vector_chunks.
2. The zvl will pick up the zvl*b from the march option. For example,
   the mrvv-vector-bits will be 1024 when march=rv64gcv_zvl1024b.
3. Otherwise, it will take the value provide and complain error if none
   of above valid value is given.

This option may influence the code gen when auto-vector. For example,

void test_rvv_vector_bits (int *a, int *b, int *out)
{
  for (int i = 0; i < 8; i++)
out[i] = a[i] + b[i];
}

It will generate code similar to below when build with
  -march=rv64gcv_zvl128b -mabi=lp64 -mrvv-vector-bits=zvl

test_rvv_vector_bits:
  ...
  vsetivli  zero,4,e32,m1,ta,ma
  vle32.v   v1,0(a0)
  vle32.v   v2,0(a1)
  vadd.vv   v1,v1,v2
  vse32.v   v1,0(a2)
  ...
  vle32.v   v1,0(a0)
  vle32.v   v2,0(a1)
  vadd.vv   v1,v1,v2
  vse32.v   v1,0(a2)

And it will become more simply similar to below when build with
  -march=rv64gcv_zvl128b -mabi=lp64 -mrvv-vector-bits=256

test_rvv_vector_bits:
  ...
  vsetivli  zero,8,e32,m2,ta,ma
  vle32.v   v2,0(a0)
  vle32.v   v4,0(a1)
  vadd.vv   v2,v2,v4
  vse32.v   v2,0(a2)

Passed the regression test of rvv.

gcc/ChangeLog:

* config/riscv/riscv-opts.h (enum rvv_vector_bits_enum): New enum for
different RVV vector bits.
* config/riscv/riscv.cc (riscv_convert_vector_bits): New func to
get the RVV vector bits, with given min_vlen.
(riscv_convert_vector_chunks): Combine the mrvv-vector-bits
option with min_vlen to RVV vector chunks.
(riscv_override_options_internal): Update comments and rename the
vector chunks.
* config/riscv/riscv.opt: Add option mrvv-vector-bits.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/rvv-vector-bits-1.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-2.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-3.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-4.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-opts.h | 16 ++
 gcc/config/riscv/riscv.cc | 49 ---
 gcc/config/riscv/riscv.opt| 47 ++
 .../riscv/rvv/base/rvv-vector-bits-1.c|  6 +++
 .../riscv/rvv/base/rvv-vector-bits-2.c| 20 
 .../riscv/rvv/base/rvv-vector-bits-3.c| 25 ++
 .../riscv/rvv/base/rvv-vector-bits-4.c|  6 +++
 7 files changed, 163 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-4.c

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 4edddbadc37..b2141190731 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -129,6 +129,22 @@ enum vsetvl_strategy_enum {
   VSETVL_OPT_NO_FUSION,
 };
 
+enum rvv_vector_bits_enum {
+  RVV_VECTOR_BITS_SCALABLE,
+  RVV_VECTOR_BITS_ZVL,
+  RVV_VECTOR_BITS_64 = 64,
+  RVV_VECTOR_BITS_128 = 128,
+  RVV_VECTOR_BITS_256 = 256,
+  RVV_VECTOR_BITS_512 = 512,
+  RVV_VECTOR_BITS_1024 = 1024,
+  RVV_VECTOR_BITS_2048 = 2048,
+  RVV_VECTOR_BITS_4096 = 4096,
+  RVV_VECTOR_BITS_8192 = 8192,
+  RVV_VECTOR_BITS_16384 = 16384,
+  RVV_VECTOR_BITS_32768 = 32768,
+  RVV_VECTOR_BITS_65536 = 65536,
+};
+
 #define TARGET_ZICOND_LIKE (TARGET_ZICOND || (TARGET_XVENTANACONDOPS && 
TARGET_64BIT))
 
 /* Bit of riscv_zvl_flags will set contintuly, N-1 bit will set if N-bit is
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5e984ee2a55..366d7ece383 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -8801,13 +8801,50 @@ riscv_init_machine_status (void)
   return ggc_cleared_alloc ();
 }
 
-/* Return the VLEN value associated with -march.
+static int
+riscv_convert_vector_bits (int min_vlen)
+{
+  int rvv_bits = 0;
+
+  switch (rvv_vector_bits)
+{
+  case RVV_VECTOR_BITS_SCALABLE:
+  case RVV_VECTOR_BITS_ZVL:
+   rvv_bits = min_vlen;
+   break;
+  case RVV_VECTOR_BITS_64:
+  case RVV_VECTOR_BITS_128:
+  case RVV_VECTOR_BITS_256:
+  case RVV_VECTOR_BITS_512:
+  case RVV_VECTOR_BITS_1024:
+  case RVV_VECTOR_BITS_2048:
+  case RVV_VECTOR_BITS_4096:
+  case RVV_VECTOR_BITS_8192:
+  case RVV_VECTOR_BITS_16384:
+  case RVV_VECTOR_BITS_32768:
+  case RVV_VECTOR_BITS_65536:
+   rvv_bits = rvv_vector_bits;
+   

[PATCH] bitintlower: Fix .{ADD,SUB}_OVERFLOW lowering [PR114040]

2024-02-23 Thread Jakub Jelinek
Hi!

The following testcases show 2 bugs in the .{ADD,SUB}_OVERFLOW lowering,
both related to storing of the REALPART_EXPR part of the result.
On the first testcase prec is 255, prec_limbs is 4 and for the second limb
in the loop the store of the REALPART_EXPR of .USUBC (_30) is stored through:
  if (_27 <= 3)
goto ; [80.00%]
  else
goto ; [20.00%]

   [local count: 1073741824]:
  if (_27 < 3)
goto ; [80.00%]
  else
goto ; [20.00%]

   [local count: 1073741824]:
  bitint.3[_27] = _30;
  goto ; [100.00%]

   [local count: 858993464]:
  MEM[(unsigned long *)&bitint.3 + 24B] = _30;

   [local count: 1073741824]:
The first check is right, as prec_limbs is 4, we don't want to store
bitint.3[4] or above at all, those limbs are just computed for the overflow
checking and nothing else, so _27 > 4 leads to no store.
But the other condition is exact opposite of what should be done, if
the current index of the second limb (_27) is < 3, then it should
  bitint.3[_27] = _30;
and if it is == 3, it should
  MEM[(unsigned long *)&bitint.3 + 24B] = _30;
and (especially important for the targets which would bitinfo.extended = 1)
should actually in this case zero extend it from the 63 bits to 64, that is
the handling of the partial limb.  The if_then_if_then_else helper if
there are 2 conditions sets m_gsi to be at the start of the
edge_true_false->dest bb, i.e. when the first condition is true and second
false, and that is where we store the SSA_NAME indexed limb store, so the
condition needs to be reversed.

The following patch does that and adds the cast as well, the usual
assumption that already handle_operand has the partial limb type doesn't
have to be true here, because the source operand could have much larger
precision than the REALPART_EXPR of the lhs.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-02-23  Jakub Jelinek  

PR tree-optimization/114040
* gimple-lower-bitint.cc (bitint_large_huge::lower_addsub_overflow):
Use EQ_EXPR rather than LT_EXPR for g2 condition and change its
probability from likely to unlikely.  When handling the true true
store, first cast to limb_access_type and then to l's type.

* gcc.dg/torture/bitint-60.c: New test.
* gcc.dg/torture/bitint-61.c: New test.

--- gcc/gimple-lower-bitint.cc.jj   2024-02-22 10:13:54.123058152 +0100
+++ gcc/gimple-lower-bitint.cc  2024-02-22 14:51:01.655335350 +0100
@@ -4255,12 +4255,12 @@ bitint_large_huge::lower_addsub_overflow
 NULL_TREE, NULL_TREE);
  gimple *g2 = NULL;
  if (!single_comparison)
-   g2 = gimple_build_cond (LT_EXPR, idx,
+   g2 = gimple_build_cond (EQ_EXPR, idx,
size_int (prec_limbs - 1),
NULL_TREE, NULL_TREE);
  edge edge_true_true, edge_true_false, edge_false;
  if_then_if_then_else (g, g2, profile_probability::likely (),
-   profile_probability::likely (),
+   profile_probability::unlikely (),
edge_true_true, edge_true_false,
edge_false);
  tree l = limb_access (type, var ? var : obj, idx, true);
@@ -4269,8 +4269,11 @@ bitint_large_huge::lower_addsub_overflow
  if (!single_comparison)
{
  m_gsi = gsi_after_labels (edge_true_true->src);
- l = limb_access (type, var ? var : obj,
-  size_int (prec_limbs - 1), true);
+ tree plm1idx = size_int (prec_limbs - 1);
+ tree plm1type = limb_access_type (type, plm1idx);
+ l = limb_access (type, var ? var : obj, plm1idx, true);
+ if (!useless_type_conversion_p (plm1type, TREE_TYPE (rhs)))
+   rhs = add_cast (plm1type, rhs);
  if (!useless_type_conversion_p (TREE_TYPE (l),
  TREE_TYPE (rhs)))
rhs = add_cast (TREE_TYPE (l), rhs);
--- gcc/testsuite/gcc.dg/torture/bitint-60.c.jj 2024-02-22 14:54:52.996121115 
+0100
+++ gcc/testsuite/gcc.dg/torture/bitint-60.c2024-02-22 14:54:42.656264784 
+0100
@@ -0,0 +1,24 @@
+/* PR tree-optimization/114040 */
+/* { dg-do run { target bitint } } */
+/* { dg-options "-std=c23 -pedantic-errors" } */
+/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
+
+#if __BITINT_MAXWIDTH__ >= 8671
+__attribute__((noipa)) unsigned
+foo (unsigned _BitInt(8671) x, unsigned y, unsigned _BitInt(512) z)
+{
+  unsigned _BitInt (8671) r
+= x * __builtin_sub_overflow_p (y * z, 0, (unsigned _BitInt(255)) 0);
+  return r;
+}
+#endif
+
+int
+main ()
+{
+#if __BITINT_MAXWIDTH__ >= 8671
+  if (foo (1, 1, 0xfffa4647

Re: [PATCH] Add ia64*-*-* to the list of obsolete targets

2024-02-23 Thread Sam James


Richard Biener  writes:

> The following deprecates ia64*-*-* for GCC 14.  Since we plan to
> force LRA for GCC 15 and the target only has slim chances of getting
> updated this notifies people in advance.  Given both Linux and
> glibc have axed the target further development is also made difficult.
>
> "Tested" for ia64-elf and x86_64-unknown-linux-gnu.
>
> OK?  There's no listed ia64 maintainer to CC.

Maybe tag PR90785. Anyway, no objection from us. It's not in a great
state anyway.

>
> Thanks,
> Richard.
>
> gcc/
>   * config.cc: Add ia64*-*-* to the list of obsoleted targets.
>
> contrib/
>   * config-list.mk (LIST): --enable-obsolete for ia64*-*-*.
> ---
>  contrib/config-list.mk | 5 +++--
>  gcc/config.gcc | 1 +
>  2 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/contrib/config-list.mk b/contrib/config-list.mk
> index 0694cc128fa..16df66f0fc6 100644
> --- a/contrib/config-list.mk
> +++ b/contrib/config-list.mk
> @@ -60,8 +60,9 @@ LIST = \
>i686-pc-linux-gnu i686-pc-msdosdjgpp i686-lynxos i686-nto-qnx \
>i686-rtems i686-solaris2.11 i686-wrs-vxworks \
>i686-wrs-vxworksae \
> -  i686-cygwinOPT-enable-threads=yes i686-mingw32crt ia64-elf \
> -  ia64-linux ia64-hpux ia64-hp-vms iq2000-elf lm32-elf \
> +  i686-cygwinOPT-enable-threads=yes i686-mingw32crt 
> ia64-elfOPT-enable-obsolete \
> +  ia64-linuxOPT-enable-obsolete ia64-hpuxOPT-enable-obsolete \
> +  ia64-hp-vmsOPT-enable-obsolete iq2000-elf lm32-elf \
>lm32-rtems lm32-uclinux \
>loongarch64-linux-gnuf64 loongarch64-linux-gnuf32 loongarch64-linux-gnusf \
>m32c-elf m32r-elf m32rle-elf \
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index a0f9c672308..2e35a112040 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -273,6 +273,7 @@ esac
>  # Obsolete configurations.
>  case ${target}${target_min} in
>  *-*-solaris2.11.[0-3]*   \
> +   | ia64*-*-*   \
>   )
>  if test "x$enable_obsolete" != xyes; then
>echo "*** Configuration ${target}${target_min} is obsolete." >&2



[PATCH] c: Improve some diagnostics for __builtin_stdc_bit_* [PR114042]

2024-02-23 Thread Jakub Jelinek
Hi!

The PR complains that for the __builtin_stdc_bit_* "builtins" the
diagnostics doesn't mention the name of the builtin the user used, but
instead __builtin_{clz,ctz,popcount}g instead (which is what the FE
immediately lowers it to).

The following patch repeats the checks from check_builtin_function_arguments
which are there done on BUILT_IN_{CLZ,CTZ,POPCOUNT}G, such that they
diagnose it with the name of the "builtin" user actually used before it
is gone.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-02-23  Jakub Jelinek  

PR c/114042
* c-parser.cc (c_parser_postfix_expression): Diagnose
__builtin_stdc_bit_* argument with ENUMERAL_TYPE or BOOLEAN_TYPE
type or if signed here rather than on the replacement builtins
in check_builtin_function_arguments.

* gcc.dg/builtin-stdc-bit-2.c: Adjust testcase for actual builtin
names rather than names of builtin replacements.

--- gcc/c/c-parser.cc.jj2024-02-22 16:11:05.320795586 +0100
+++ gcc/c/c-parser.cc   2024-02-22 18:21:03.789602019 +0100
@@ -11860,6 +11860,27 @@ c_parser_postfix_expression (c_parser *p
expr.set_error ();
break;
  }
+   if (TREE_CODE (TREE_TYPE (arg_p->value)) == ENUMERAL_TYPE)
+ {
+   error_at (loc, "argument %u in call to function "
+ "%qs has enumerated type", 1, name);
+   expr.set_error ();
+   break;
+ }
+   if (TREE_CODE (TREE_TYPE (arg_p->value)) == BOOLEAN_TYPE)
+ {
+   error_at (loc, "argument %u in call to function "
+ "%qs has boolean type", 1, name);
+   expr.set_error ();
+   break;
+ }
+   if (!TYPE_UNSIGNED (TREE_TYPE (arg_p->value)))
+ {
+   error_at (loc, "argument 1 in call to function "
+ "%qs has signed type", name);
+   expr.set_error ();
+   break;
+ }
tree arg = arg_p->value;
tree type = TYPE_MAIN_VARIANT (TREE_TYPE (arg));
/* Expand:
--- gcc/testsuite/gcc.dg/builtin-stdc-bit-2.c.jj2023-11-23 
10:32:33.385984072 +0100
+++ gcc/testsuite/gcc.dg/builtin-stdc-bit-2.c   2024-02-22 18:25:20.585105224 
+0100
@@ -14,9 +14,9 @@ foo (void)
   __builtin_stdc_leading_zeros ((struct S) { 0 }); /* { dg-error 
"'__builtin_stdc_leading_zeros' operand not an integral type" } */
   __builtin_stdc_leading_zeros (); /* { dg-error "wrong number of 
arguments to '__builtin_stdc_leading_zeros'" } */
   __builtin_stdc_leading_zeros (0U, 0U);   /* { dg-error "wrong number of 
arguments to '__builtin_stdc_leading_zeros'" } */
-  __builtin_stdc_leading_zeros ((_Bool) 0);/* { dg-error "argument 1 in 
call to function '__builtin_clzg' has boolean type" } */
-  __builtin_stdc_leading_zeros ((enum E) E0);  /* { dg-error "argument 1 in 
call to function '__builtin_clzg' has enumerated type" } */
-  __builtin_stdc_leading_zeros (0);/* { dg-error "argument 1 in 
call to function '__builtin_clzg' has signed type" } */
+  __builtin_stdc_leading_zeros ((_Bool) 0);/* { dg-error "argument 1 in 
call to function '__builtin_stdc_leading_zeros' has boolean type" } */
+  __builtin_stdc_leading_zeros ((enum E) E0);  /* { dg-error "argument 1 in 
call to function '__builtin_stdc_leading_zeros' has enumerated type" } */
+  __builtin_stdc_leading_zeros (0);/* { dg-error "argument 1 in 
call to function '__builtin_stdc_leading_zeros' has signed type" } */
   __builtin_stdc_leading_ones (0.0f);  /* { dg-error 
"'__builtin_stdc_leading_ones' operand not an integral type" } */
   __builtin_stdc_leading_ones (0.0);   /* { dg-error 
"'__builtin_stdc_leading_ones' operand not an integral type" } */
   __builtin_stdc_leading_ones (0.0L);  /* { dg-error 
"'__builtin_stdc_leading_ones' operand not an integral type" } */
@@ -24,9 +24,9 @@ foo (void)
   __builtin_stdc_leading_ones ((struct S) { 0 });  /* { dg-error 
"'__builtin_stdc_leading_ones' operand not an integral type" } */
   __builtin_stdc_leading_ones ();  /* { dg-error "wrong number of 
arguments to '__builtin_stdc_leading_ones'" } */
   __builtin_stdc_leading_ones (0U, 0U);/* { dg-error "wrong number of 
arguments to '__builtin_stdc_leading_ones'" } */
-  __builtin_stdc_leading_ones ((_Bool) 0); /* { dg-error "argument 1 in 
call to function '__builtin_clzg' has boolean type" } */
-  __builtin_stdc_leading_ones ((enum E) E0);   /* { dg-error "argument 1 in 
call to function '__builtin_clzg' has enumerated type" } */
-  __builtin_stdc_leading_ones (0); /* { dg-error "argument 1 in 
call to function '__builtin_clzg' has signed type" } */
+  __builtin_stdc_leading_ones ((_Bool) 0); /* { dg-error "argument 1 in 
call to function '__builtin_stdc_leading_ones' has boo

Re: [PATCH v1] RISC-V: Introduce gcc option mrvv-vector-bits for RVV

2024-02-23 Thread Kito Cheng
I would prefer to only keep zvl and scalable or zvl only, since I
don't see too much value in specifying a value which different from
zvl*b, that's a legacy option used before zvl*b option was introduced,
and the reason to add that is that could used for compatible with
clang/LLVM for riscv_rvv_vector_bits attribute I think?

On Fri, Feb 23, 2024 at 4:06 PM  wrote:
>
> From: Pan Li 
>
> This patch would like to introduce one new gcc option for RVV. To
> appoint the bits size of one RVV vector register. Valid arguments to
> '-mrvv-vector-bits=' are:
>
> * 64
> * 128
> * 256
> * 512
> * 1024
> * 2048
> * 4096
> * 8192
> * 16384
> * 32768
> * 65536
> * scalable
> * zvl
>
> 1. The scalable will be the default values which take min_vlen for
>the riscv_vector_chunks.
> 2. The zvl will pick up the zvl*b from the march option. For example,
>the mrvv-vector-bits will be 1024 when march=rv64gcv_zvl1024b.
> 3. Otherwise, it will take the value provide and complain error if none
>of above valid value is given.
>
> This option may influence the code gen when auto-vector. For example,
>
> void test_rvv_vector_bits (int *a, int *b, int *out)
> {
>   for (int i = 0; i < 8; i++)
> out[i] = a[i] + b[i];
> }
>
> It will generate code similar to below when build with
>   -march=rv64gcv_zvl128b -mabi=lp64 -mrvv-vector-bits=zvl
>
> test_rvv_vector_bits:
>   ...
>   vsetivli  zero,4,e32,m1,ta,ma
>   vle32.v   v1,0(a0)
>   vle32.v   v2,0(a1)
>   vadd.vv   v1,v1,v2
>   vse32.v   v1,0(a2)
>   ...
>   vle32.v   v1,0(a0)
>   vle32.v   v2,0(a1)
>   vadd.vv   v1,v1,v2
>   vse32.v   v1,0(a2)
>
> And it will become more simply similar to below when build with
>   -march=rv64gcv_zvl128b -mabi=lp64 -mrvv-vector-bits=256
>
> test_rvv_vector_bits:
>   ...
>   vsetivli  zero,8,e32,m2,ta,ma
>   vle32.v   v2,0(a0)
>   vle32.v   v4,0(a1)
>   vadd.vv   v2,v2,v4
>   vse32.v   v2,0(a2)
>
> Passed the regression test of rvv.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-opts.h (enum rvv_vector_bits_enum): New enum for
> different RVV vector bits.
> * config/riscv/riscv.cc (riscv_convert_vector_bits): New func to
> get the RVV vector bits, with given min_vlen.
> (riscv_convert_vector_chunks): Combine the mrvv-vector-bits
> option with min_vlen to RVV vector chunks.
> (riscv_override_options_internal): Update comments and rename the
> vector chunks.
> * config/riscv/riscv.opt: Add option mrvv-vector-bits.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/rvv-vector-bits-1.c: New test.
> * gcc.target/riscv/rvv/base/rvv-vector-bits-2.c: New test.
> * gcc.target/riscv/rvv/base/rvv-vector-bits-3.c: New test.
> * gcc.target/riscv/rvv/base/rvv-vector-bits-4.c: New test.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/config/riscv/riscv-opts.h | 16 ++
>  gcc/config/riscv/riscv.cc | 49 ---
>  gcc/config/riscv/riscv.opt| 47 ++
>  .../riscv/rvv/base/rvv-vector-bits-1.c|  6 +++
>  .../riscv/rvv/base/rvv-vector-bits-2.c| 20 
>  .../riscv/rvv/base/rvv-vector-bits-3.c| 25 ++
>  .../riscv/rvv/base/rvv-vector-bits-4.c|  6 +++
>  7 files changed, 163 insertions(+), 6 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-2.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-3.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-4.c
>
> diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
> index 4edddbadc37..b2141190731 100644
> --- a/gcc/config/riscv/riscv-opts.h
> +++ b/gcc/config/riscv/riscv-opts.h
> @@ -129,6 +129,22 @@ enum vsetvl_strategy_enum {
>VSETVL_OPT_NO_FUSION,
>  };
>
> +enum rvv_vector_bits_enum {
> +  RVV_VECTOR_BITS_SCALABLE,
> +  RVV_VECTOR_BITS_ZVL,
> +  RVV_VECTOR_BITS_64 = 64,
> +  RVV_VECTOR_BITS_128 = 128,
> +  RVV_VECTOR_BITS_256 = 256,
> +  RVV_VECTOR_BITS_512 = 512,
> +  RVV_VECTOR_BITS_1024 = 1024,
> +  RVV_VECTOR_BITS_2048 = 2048,
> +  RVV_VECTOR_BITS_4096 = 4096,
> +  RVV_VECTOR_BITS_8192 = 8192,
> +  RVV_VECTOR_BITS_16384 = 16384,
> +  RVV_VECTOR_BITS_32768 = 32768,
> +  RVV_VECTOR_BITS_65536 = 65536,
> +};
> +
>  #define TARGET_ZICOND_LIKE (TARGET_ZICOND || (TARGET_XVENTANACONDOPS && 
> TARGET_64BIT))
>
>  /* Bit of riscv_zvl_flags will set contintuly, N-1 bit will set if N-bit is
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 5e984ee2a55..366d7ece383 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -8801,13 +8801,50 @@ riscv_init_machine_status (void)
>return ggc_cleared_alloc ();
>  }
>
> -/* Return the VLEN value associated with -march.
> +static int
> 

[PATCH] expr: Fix REDUCE_BIT_FIELD in multiplication expansion [PR114054]

2024-02-23 Thread Jakub Jelinek
Hi!

The following testcase ICEs, because the REDUCE_BIT_FIELD macro uses
the target variable implicitly:
#define REDUCE_BIT_FIELD(expr)  (reduce_bit_field \
 ? reduce_to_bit_field_precision ((expr), \
  target, \
  type)   \
 : (expr))
and so when the code below reuses the target variable, documented to be
   The value may be stored in TARGET if TARGET is nonzero.
   TARGET is just a suggestion; callers must assume that
   the rtx returned may not be the same as TARGET.
for something unrelated (the value that should be returned), this misbehaves
(in the testcase target is set to a CONST_INT, which has VOIDmode and
reduce_to_bit_field_precision assert checking doesn't like that).
Needed to say that
   If TARGET is CONST0_RTX, it means that the value will be ignored.
but in expand_expr_real_2 does at the start:
  ignore = (target == const0_rtx
|| ((CONVERT_EXPR_CODE_P (code)
 || code == COND_EXPR || code == VIEW_CONVERT_EXPR)
&& TREE_CODE (type) == VOID_TYPE));

  /* We should be called only if we need the result.  */
  gcc_assert (!ignore);
- so such target is mainly meant for calls and the like in other routines.
Certainly doesn't expect that target changes from not being ignored
initially to ignore later on and other CONST_INT results as well as anything
which is not an object into which anything can be stored.

So, the following patch fixes that by using a more appripriate temporary
for the result, which other code is using.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-02-23  Jakub Jelinek  

PR rtl-optimization/114054
* expr.cc (expand_expr_real_2) : Use
temp variable instead of target parameter for result.

* gcc.dg/bitint-92.c: New test.

--- gcc/expr.cc.jj  2024-02-14 14:26:19.709811397 +0100
+++ gcc/expr.cc 2024-02-22 18:39:37.852431789 +0100
@@ -10259,12 +10259,12 @@ expand_expr_real_2 (sepops ops, rtx targ
  &algorithm, &variant, cost)
  : cost < mul_cost (speed, mode))
{
- target = bit0_p ? expand_and (mode, negate_rtx (mode, op0),
-   op1, target)
- : expand_and (mode, op0,
-   negate_rtx (mode, op1),
-   target);
- return REDUCE_BIT_FIELD (target);
+ temp = bit0_p ? expand_and (mode, negate_rtx (mode, op0),
+ op1, target)
+   : expand_and (mode, op0,
+ negate_rtx (mode, op1),
+ target);
+ return REDUCE_BIT_FIELD (temp);
}
}
}
--- gcc/testsuite/gcc.dg/bitint-92.c.jj 2024-02-22 18:43:56.433910671 +0100
+++ gcc/testsuite/gcc.dg/bitint-92.c2024-02-22 18:43:29.464277919 +0100
@@ -0,0 +1,17 @@
+/* PR rtl-optimization/114054 */
+/* { dg-do compile { target bitint } } */
+/* { dg-options "-Og -fwhole-program -fno-tree-ccp -fprofile-use 
-fno-tree-copy-prop -w" } */
+
+int x;
+
+void
+foo (int i, unsigned u)
+{
+  x = __builtin_mul_overflow_p ((unsigned _BitInt(1)) u, i, (_BitInt(33)) 0);
+}
+
+int
+main ()
+{
+  foo (11, 0);
+}

Jakub



Re: [PATCH] Add ia64*-*-* to the list of obsolete targets

2024-02-23 Thread Jeff Law




On 2/23/24 01:05, Richard Biener wrote:

The following deprecates ia64*-*-* for GCC 14.  Since we plan to
force LRA for GCC 15 and the target only has slim chances of getting
updated this notifies people in advance.  Given both Linux and
glibc have axed the target further development is also made difficult.

"Tested" for ia64-elf and x86_64-unknown-linux-gnu.

OK?  There's no listed ia64 maintainer to CC.

Thanks,
Richard.

gcc/
* config.cc: Add ia64*-*-* to the list of obsoleted targets.

contrib/
* config-list.mk (LIST): --enable-obsolete for ia64*-*-*.

OK.
jeff



Re: [PATCH v1] RISC-V: Introduce gcc option mrvv-vector-bits for RVV

2024-02-23 Thread Jeff Law




On 2/23/24 01:22, Kito Cheng wrote:

I would prefer to only keep zvl and scalable or zvl only, since I
don't see too much value in specifying a value which different from
zvl*b, that's a legacy option used before zvl*b option was introduced,
and the reason to add that is that could used for compatible with
clang/LLVM for riscv_rvv_vector_bits attribute I think?
And if we want this (I'm not sure), it really feels like it ought to 
defer to gcc-15.


jeff



Re: Re: [PATCH v1] RISC-V: Introduce gcc option mrvv-vector-bits for RVV

2024-02-23 Thread juzhe.zh...@rivai.ai
I personally think it's better to has VLS compile option and attribute in 
GCC-14.
Since there are many people porting different libraury 
(eigen/highway/xnnpack/openBLAS,...) with VLS feature,
they test them with Clang.

If we don't support it, we will end up with Clang can compile those lib but 
GCC-14 can't which will make RISC-V
folks think GCC is still pretty far behind than Clang.

Besides, VLS compile option and attribute are pretty safe codes, I would 
surprise that it will cause issues on current RVV support.

So, +1 from my side to support VLS compile option and attribute on GCC-14.

But I'd like to CC more RISC-V GCC folks to see the votes. 
If most of the people don't want this in GCC-14 and defer it to GCC-15, I won't 
insist on it.

Thanks.



juzhe.zh...@rivai.ai
 
From: Jeff Law
Date: 2024-02-23 16:29
To: Kito Cheng; pan2.li
CC: gcc-patches; juzhe.zhong; yanzhang.wang
Subject: Re: [PATCH v1] RISC-V: Introduce gcc option mrvv-vector-bits for RVV
 
 
On 2/23/24 01:22, Kito Cheng wrote:
> I would prefer to only keep zvl and scalable or zvl only, since I
> don't see too much value in specifying a value which different from
> zvl*b, that's a legacy option used before zvl*b option was introduced,
> and the reason to add that is that could used for compatible with
> clang/LLVM for riscv_rvv_vector_bits attribute I think?
And if we want this (I'm not sure), it really feels like it ought to 
defer to gcc-15.
 
jeff
 
 


[PATCH] c++: Fix ICE due to folding a call to constructor on cdtor_returns_this arches (aka arm32) [PR113083]

2024-02-23 Thread Jakub Jelinek
Hi!

When targetm.cxx.cdtor_returns_this () (aka on arm32 TARGET_AAPCS_BASED)
constructor is supposed to return this pointer, but when we cp_fold such
a call, we don't take that into account and just INIT_EXPR the object,
so we can later ICE during gimplification, because the expression doesn't
have the right type.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux and
tested with a cross to armv7-linux-gnueabi on the testcase, but
unfortunately there are no 32-bit arm boxes in cfarm and arm32 is gone from
Fedora for quite some time as well, so I have no easy way to test this.
Christophe, do you think you could test this?  Thanks.

2024-02-23  Jakub Jelinek  

PR c++/113083
* cp-gimplify.cc (cp_fold): For targetm.cxx.cdtor_returns_this ()
wrap r into a COMPOUND_EXPR and return folded CALL_EXPR_ARG (x, 0).

* g++.dg/cpp0x/constexpr-113083.C: New test.

--- gcc/cp/cp-gimplify.cc.jj2024-02-22 21:45:09.663430066 +0100
+++ gcc/cp/cp-gimplify.cc   2024-02-22 22:30:23.481428242 +0100
@@ -3412,9 +3412,15 @@ cp_fold (tree x, fold_flags_t flags)
if (DECL_CONSTRUCTOR_P (callee))
  {
loc = EXPR_LOCATION (x);
-   tree s = build_fold_indirect_ref_loc (loc,
- CALL_EXPR_ARG (x, 0));
+   tree a = CALL_EXPR_ARG (x, 0);
+   bool return_this = targetm.cxx.cdtor_returns_this ();
+   if (return_this)
+ a = cp_save_expr (a);
+   tree s = build_fold_indirect_ref_loc (loc, a);
r = cp_build_init_expr (s, r);
+   if (return_this)
+ r = build2_loc (loc, COMPOUND_EXPR, TREE_TYPE (x), r,
+ fold_convert_loc (loc, TREE_TYPE (x), a));
  }
x = r;
break;
--- gcc/testsuite/g++.dg/cpp0x/constexpr-113083.C.jj2024-01-13 
00:05:00.077372302 +0100
+++ gcc/testsuite/g++.dg/cpp0x/constexpr-113083.C   2024-02-22 
22:20:20.622618992 +0100
@@ -0,0 +1,16 @@
+// PR c++/113083
+// { dg-do compile { target c++11 } }
+// { dg-options "-Os" }
+
+struct A { constexpr A (); };
+
+void
+foo ()
+{
+  A b;
+}
+
+constexpr
+A::A ()
+{
+}

Jakub



[COMMITTED] testsuite: plugin: Fix gcc.dg/plugin/crash-test-write-though-null-sarif.c on Solaris

2024-02-23 Thread Rainer Orth
gcc.dg/plugin/crash-test-write-though-null-sarif.c FAILs on Solaris:

FAIL: gcc.dg/plugin/crash-test-write-though-null-sarif.c 
-fplugin=./crash_test_plugin.so  scan-sarif-file "text": "Segmentation fault

Comparing the sarif files between Linux and Solaris reveals

-"message": 
{"text": "Segmentation fault"},
+"message": 
{"text": "Segmentation Fault"},

This patch allows for both forms.

Tested on i386-pc-solaris2.11, sparc-sun-solaris2.11, and
x86_64-pc-linux-gnu.

Committed to trunk.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2024-02-22  Rainer Orth  

gcc/testsuite:
* gcc.dg/plugin/crash-test-write-though-null-sarif.c
(scan-sarif-file): Allow for "Segmentation Fault", too.

# HG changeset patch
# Parent  ed942d3ea6c0b97b1812ed2c6563fd5873c8edca
testsuite: plugin: Fix gcc.dg/plugin/crash-test-write-though-null-sarif.c on Solaris

diff --git a/gcc/testsuite/gcc.dg/plugin/crash-test-write-though-null-sarif.c b/gcc/testsuite/gcc.dg/plugin/crash-test-write-though-null-sarif.c
--- a/gcc/testsuite/gcc.dg/plugin/crash-test-write-though-null-sarif.c
+++ b/gcc/testsuite/gcc.dg/plugin/crash-test-write-though-null-sarif.c
@@ -61,4 +61,4 @@ void test_inject_write_through_null (voi
{ dg-final { scan-sarif-file "\"startColumn\": 3" } }
{ dg-final { scan-sarif-file "\"endColumn\": 31" } }
  { dg-final { scan-sarif-file "\"message\": " } }
-   { dg-final { scan-sarif-file "\"text\": \"Segmentation fault" } } */
+   { dg-final { scan-sarif-file "\"text\": \"Segmentation \[Ff\]ault" } } */


Re: [PATCH RFA] build: drop target libs from LD_LIBRARY_PATH [PR105688]

2024-02-23 Thread Iain Sandoe



> On 21 Feb 2024, at 23:36, Iain Sandoe  wrote:
> 
>> On 21 Feb 2024, at 23:06, Jason Merrill  wrote:
>> 
>> On 2/20/24 00:45, Alexandre Oliva wrote:
>>> On Feb 16, 2024, Jason Merrill  wrote:
 So, for stage2+, let's add just prev- libgcc.
>>> I'm pretty sure this will break bootstrap-lean where libgcc_s isn't a
>>> system library, and we're building post-bootstrap host tools :-(
>>> We need the current stage lib after the prev stage is removed.
>> 
>> That's a good point, we should make sure it doesn't break.  It looks to me 
>> like stage3-bubble removes stage1 after we're done building stage3, which 
>> should be fine, but compare removes the stage2 libgcc that we might still 
>> need to run stage3.  So indeed I guess we still want both prev and current 
>> libgcc directories in RPATH to handle the case where we've removed the 
>> previous stage, as below.
> 
> I’ll try that on darwin and aarch64 linux (I quite often need to use 
> bootstrap-lean on the latter becuase of low disk space)

I tested this addition with bootstrap-lean on i686-darwin9 (needs libgcc_s to 
boostrap), x86_64-darwin21/23 (does not use libstdc++ in the system) .. 
aarch64-linux-gnu
.. and it worked OK.

Note that the testsuite does still have some glitches with bootstrap-lean (but 
those are independent of this patch).
(we really need to adapt the idea of “host compiler/plugin compiler” to be the 
stage3 one when we bootstrap-lean).

Iain


> 
>>> I also doubt that TARGET_LIB_PATH was defined and used for no reason.
>>> My hunch is that bootstrap options and/or targets that don't have these
>>> libraries as system libraries will break in some obscure way without it.
>>> But I don't have the bandwidth to track down the history behind their
>>> inclusion.
>> 
>> That has not seemed to be the case in Iain's testing on a system without 
>> these libraries as system libraries.
> 
> Unless we change to (or add) a bootstrap where we use shared libstdc++ in the 
> compiler, I think that is the case.
> 
> As I mentioned in an earlier post, unfortunately we do not yet have a way to 
> distinguish module builds for host from module builds for target (when a 
> library is used for both - which is the case for libstdc++, libbacktrace and 
> libgrust at least),  This means that either the target library has to be 
> built without a shared version (libbacktrace does this), or the host versions 
> get built with a shared library which is not used (libstdc++) .. AFAICT the 
> only reason we build libgomp and libatomic in bootstrap phase 1 and 2 is 
> because they are dependents of the unused shared libstdc++.
> 
> Ideally, we’d fix Makefile.{tpl,def} to allow the same module to have 
> different recipies for host and target builds, but that’s also not a 5 minute 
> hack….
> 
>> I can't think of why we would need to depend on the current stage target 
>> libraries, and we already weren't depending on the previous stage target 
>> libraries.  I believe the only target code we run is tests, and if the tests 
>> need the target libraries in RPATH that should happen in the testsuite.
> 
> Which could also be improved (we do not in Dejagnu really distinguish 
> runpaths needed by the compiler from those needed by the built executables)
> 
>> It's arguable that we should pass TARGET_LIB_PATH down to make it easier for 
>> the testsuites to find them, in case they are currently relying on them 
>> being part of RPATH.  
> 
>> My impression from Iain's testing is that this isn't actually needed.
> 
> there’s actually a fair amount of specific code to locate dependent libs in 
> places (some of which I just cleaned up a bit since it was now causing fails 
> with Darwin’s new linker complaining about duplicated libs and so on).  So we 
> are not currently expecting this information to be passed down.
> 
>> I wouldn't mind keeping TARGET_LIB_PATH unused, but I'm not sure why that 
>> would be better than bringing it back if we turn out to need it.
> 
> +1
>> 
>> 
>>> I insist that the entire approach of choosing the same set of target
>>> library directories regardless of the freshness relationship between
>>> e.g. a system libstdc++ and the one we're building can't possibly be an
>>> overall improvement, it's only trading problems in some scenarios (where
>>> we're building an older libstdc++) for problems in other scenarios
>>> (where we're building a newer libstdc++).  The latter is unfortunately
>>> far more likely, which is reason enough for the current arrangement, but
>>> libstdc++ problems will likely only hit if the gap between system and
>>> being-built libraries is large enough (say, new symbols in the newer
>>> libstdc++ used by the compiler, but not available in the system
>>> library).
>> 
>> If bootstrap doesn't actually need the target libraries, as seems to be the 
>> case, then I think removing them from RPATH trades the former problem for no 
>> problem.
>> 
>>> I'm really uncomfortable with this change, especially at this stage.
>>>

Re: [PATCH] c++: Fix ICE due to folding a call to constructor on cdtor_returns_this arches (aka arm32) [PR113083]

2024-02-23 Thread Christophe Lyon
On Fri, 23 Feb 2024 at 09:42, Jakub Jelinek  wrote:
>
> Hi!
>
> When targetm.cxx.cdtor_returns_this () (aka on arm32 TARGET_AAPCS_BASED)
> constructor is supposed to return this pointer, but when we cp_fold such
> a call, we don't take that into account and just INIT_EXPR the object,
> so we can later ICE during gimplification, because the expression doesn't
> have the right type.
>
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux and
> tested with a cross to armv7-linux-gnueabi on the testcase, but
> unfortunately there are no 32-bit arm boxes in cfarm and arm32 is gone from
> Fedora for quite some time as well, so I have no easy way to test this.
> Christophe, do you think you could test this?  Thanks.

Hi Jakub,

Sadly our precommit CI could not apply your patch automatically (as
you can see in patchwork).

I'll test your patch manually.

Thanks,

Christophe

>
> 2024-02-23  Jakub Jelinek  
>
> PR c++/113083
> * cp-gimplify.cc (cp_fold): For targetm.cxx.cdtor_returns_this ()
> wrap r into a COMPOUND_EXPR and return folded CALL_EXPR_ARG (x, 0).
>
> * g++.dg/cpp0x/constexpr-113083.C: New test.
>
> --- gcc/cp/cp-gimplify.cc.jj2024-02-22 21:45:09.663430066 +0100
> +++ gcc/cp/cp-gimplify.cc   2024-02-22 22:30:23.481428242 +0100
> @@ -3412,9 +3412,15 @@ cp_fold (tree x, fold_flags_t flags)
> if (DECL_CONSTRUCTOR_P (callee))
>   {
> loc = EXPR_LOCATION (x);
> -   tree s = build_fold_indirect_ref_loc (loc,
> - CALL_EXPR_ARG (x, 0));
> +   tree a = CALL_EXPR_ARG (x, 0);
> +   bool return_this = targetm.cxx.cdtor_returns_this ();
> +   if (return_this)
> + a = cp_save_expr (a);
> +   tree s = build_fold_indirect_ref_loc (loc, a);
> r = cp_build_init_expr (s, r);
> +   if (return_this)
> + r = build2_loc (loc, COMPOUND_EXPR, TREE_TYPE (x), r,
> + fold_convert_loc (loc, TREE_TYPE (x), a));
>   }
> x = r;
> break;
> --- gcc/testsuite/g++.dg/cpp0x/constexpr-113083.C.jj2024-01-13 
> 00:05:00.077372302 +0100
> +++ gcc/testsuite/g++.dg/cpp0x/constexpr-113083.C   2024-02-22 
> 22:20:20.622618992 +0100
> @@ -0,0 +1,16 @@
> +// PR c++/113083
> +// { dg-do compile { target c++11 } }
> +// { dg-options "-Os" }
> +
> +struct A { constexpr A (); };
> +
> +void
> +foo ()
> +{
> +  A b;
> +}
> +
> +constexpr
> +A::A ()
> +{
> +}
>
> Jakub
>


[COMMITTED] testsuite: vect: Actually skip gcc.dg/vect/vect-bic-bitmask-12.c etc. on SPARC

2024-02-23 Thread Rainer Orth
gcc.dg/vect/vect-bic-bitmask-12.c and gcc.dg/vect/vect-bic-bitmask-23.c
currently FAIL on 32 and 64-bit Solaris/SPARC

FAIL: gcc.dg/vect/vect-bic-bitmask-12.c -flto -ffat-lto-objects  scan-tree-dump 
dce7 "<=s*.+{ 255,.+}"
FAIL: gcc.dg/vect/vect-bic-bitmask-12.c scan-tree-dump dce7 "<=s*.+{ 
255,.+}"
FAIL: gcc.dg/vect/vect-bic-bitmask-23.c -flto -ffat-lto-objects  scan-tree-dump 
dce7 "<=s*.+{ 255, 15, 1, 65535 }"
FAIL: gcc.dg/vect/vect-bic-bitmask-23.c scan-tree-dump dce7 "<=s*.+{ 255, 
15, 1, 65535 }"

although they should be skipped since

commit 5f07095d22f58572c06997aa6d4f3bc456e1925d
Author: Tamar Christina 
Date:   Tue Mar 8 11:32:59 2022 +

vect: disable bitmask tests on sparc

The problem is that dg-skip-if must come after dg-do, although this
isn't currently documented unfortunately.

Fixed by reordering the directives.

Tested on sparc-sun-solaris2.11 and i386-pc-solaris2.11.

Committed to trunk.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2024-02-22  Rainer Orth  

gcc/testsuite:
* gcc.dg/vect/vect-bic-bitmask-12.c: Move dg-skip-if down.
* gcc.dg/vect/vect-bic-bitmask-23.c: Likewise.

# HG changeset patch
# Parent  9c588428a6e298fa35c5bf75d7d374105b1575bd
testsuite: vect: Actually skip  gcc.dg/vect/vect-bic-bitmask-12.c etc. on SPARC

diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c
--- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c
@@ -1,6 +1,6 @@
-/* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */
 /* { dg-do compile } */
 /* { dg-additional-options "-O3 -fdump-tree-dce -w" } */
+/* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c
--- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c
@@ -1,6 +1,6 @@
-/* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */
 /* { dg-do compile } */
 /* { dg-additional-options "-O1 -fdump-tree-dce -w" } */
+/* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */
 
 #include 
 


RE: Re: [PATCH v1] RISC-V: Introduce gcc option mrvv-vector-bits for RVV

2024-02-23 Thread Li, Pan2
> I would prefer to only keep zvl and scalable or zvl only, since I

> don't see too much value in specifying a value which different from

> zvl*b, that's a legacy option used before zvl*b option was introduced,

> and the reason to add that is that could used for compatible with

> clang/LLVM for riscv_rvv_vector_bits attribute I think?



Yes, exactly to be compatible with clang/llvm. Just take zvl is good enough 
IMO, and update in v2 once we have alignment.



> And if we want this (I'm not sure), it really feels like it ought to defer to 
> gcc-15.

> But I'd like to CC more RISC-V GCC folks to see the votes.

> If most of the people don't want this in GCC-14 and defer it to GCC-15, I 
> won't insist on it.



Sure, let’s wait for a while.



Pan

From: juzhe.zh...@rivai.ai 
Sent: Friday, February 23, 2024 4:38 PM
To: jeffreyalaw ; kito.cheng ; Li, 
Pan2 
Cc: gcc-patches ; Wang, Yanzhang 
; Robin Dapp ; palmer 
; vineetg ; Patrick O'Neill 
; Edwin Lu 
Subject: Re: Re: [PATCH v1] RISC-V: Introduce gcc option mrvv-vector-bits for 
RVV

I personally think it's better to has VLS compile option and attribute in 
GCC-14.
Since there are many people porting different libraury 
(eigen/highway/xnnpack/openBLAS,...) with VLS feature,
they test them with Clang.

If we don't support it, we will end up with Clang can compile those lib but 
GCC-14 can't which will make RISC-V
folks think GCC is still pretty far behind than Clang.

Besides, VLS compile option and attribute are pretty safe codes, I would 
surprise that it will cause issues on current RVV support.

So, +1 from my side to support VLS compile option and attribute on GCC-14.

But I'd like to CC more RISC-V GCC folks to see the votes.
If most of the people don't want this in GCC-14 and defer it to GCC-15, I won't 
insist on it.

Thanks.


juzhe.zh...@rivai.ai

From: Jeff Law
Date: 2024-02-23 16:29
To: Kito Cheng; pan2.li
CC: gcc-patches; 
juzhe.zhong; 
yanzhang.wang
Subject: Re: [PATCH v1] RISC-V: Introduce gcc option mrvv-vector-bits for RVV


On 2/23/24 01:22, Kito Cheng wrote:
> I would prefer to only keep zvl and scalable or zvl only, since I
> don't see too much value in specifying a value which different from
> zvl*b, that's a legacy option used before zvl*b option was introduced,
> and the reason to add that is that could used for compatible with
> clang/LLVM for riscv_rvv_vector_bits attribute I think?
And if we want this (I'm not sure), it really feels like it ought to
defer to gcc-15.

jeff




[PATCH] rs6000: Fix issue in specifying PTImode as an attribute [PR106895]

2024-02-23 Thread jeevitha
Hi All,

The following patch has been bootstrapped and regtested on powerpc64le-linux.

PTImode attribute assists in generating even/odd register pairs on 128 bits.
When the user specifies PTImode as an attribute, it breaks because there is no
internal type to handle this mode . We have created a tree node with dummy type
to handle PTImode. We are not documenting this dummy type since users are not
allowed to use this type externally.

2024-02-23  Jeevitha Palanisamy  

gcc/
PR target/106895
* config/rs6000/rs6000.h (enum rs6000_builtin_type_index): Add fields
to hold PTImode type.
* config/rs6000/rs6000-builtin.cc (rs6000_init_builtins): Add node
for PTImode type.

gcc/testsuite/
PR target/106895
* gcc.target/powerpc/pr106895.c: New testcase.

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 6698274031b..f553c72779e 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -756,6 +756,15 @@ rs6000_init_builtins (void)
   else
 ieee128_float_type_node = NULL_TREE;
 
+  /* PTImode to get even/odd register pairs.  */
+  intPTI_type_internal_node = make_node(INTEGER_TYPE);
+  TYPE_PRECISION (intPTI_type_internal_node) = GET_MODE_BITSIZE (PTImode);
+  layout_type (intPTI_type_internal_node);
+  SET_TYPE_MODE (intPTI_type_internal_node, PTImode);
+  t = build_qualified_type (intPTI_type_internal_node, TYPE_QUAL_CONST);
+  lang_hooks.types.register_builtin_type (intPTI_type_internal_node,
+ "__dummypti");
+
   /* Vector pair and vector quad support.  */
   vector_pair_type_node = make_node (OPAQUE_TYPE);
   SET_TYPE_MODE (vector_pair_type_node, OOmode);
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 2291fe8d3a3..77bb937a28b 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2304,6 +2304,7 @@ enum rs6000_builtin_type_index
   RS6000_BTI_ptr_vector_quad,
   RS6000_BTI_ptr_long_long,
   RS6000_BTI_ptr_long_long_unsigned,
+  RS6000_BTI_PTI,
   RS6000_BTI_MAX
 };
 
@@ -2348,6 +2349,7 @@ enum rs6000_builtin_type_index
 #define uintDI_type_internal_node   
(rs6000_builtin_types[RS6000_BTI_UINTDI])
 #define intTI_type_internal_node
(rs6000_builtin_types[RS6000_BTI_INTTI])
 #define uintTI_type_internal_node   
(rs6000_builtin_types[RS6000_BTI_UINTTI])
+#define intPTI_type_internal_node   (rs6000_builtin_types[RS6000_BTI_PTI])
 #define float_type_internal_node
(rs6000_builtin_types[RS6000_BTI_float])
 #define double_type_internal_node   
(rs6000_builtin_types[RS6000_BTI_double])
 #define long_double_type_internal_node  
(rs6000_builtin_types[RS6000_BTI_long_double])
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106895.c 
b/gcc/testsuite/gcc.target/powerpc/pr106895.c
new file mode 100644
index 000..56547b7fa9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106895.c
@@ -0,0 +1,15 @@
+/* PR target/106895 */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-O2" } */
+
+/* Verify the following generates even/odd register pairs.  */
+
+typedef __int128 pti __attribute__((mode(PTI)));
+
+void
+set128 (pti val, pti *mem)
+{
+asm("stq %1,%0" : "=m"(*mem) : "r"(val));
+}
+
+/* { dg-final { scan-assembler "stq \[123\]?\[02468\]" } } */



[PATCH][www] Document ia64*-*-* obsolescence

2024-02-23 Thread Richard Biener
The following documents obsoleting of ia64*-*-*.

Pushed.

* gcc-14/changes.html: Document ia64*-*-* obsoleting.
---
 htdocs/gcc-14/changes.html | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index c2cdd87a..85ccc54d 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -45,7 +45,10 @@ a work-in-progress.
   -fcf-protection=none needs to be added and then
   with -fcf-protection=xxx.
   
-
+  Support for the ia64*-*- target ports which have been
+  unmaintained for quite a while has been declared obsolete in GCC 14.
+  The next release of GCC will have their sources permanently removed.
+  
 
 
 
-- 
2.35.3



Re: [PATCH] bitintlower: Fix .{ADD,SUB}_OVERFLOW lowering [PR114040]

2024-02-23 Thread Richard Biener
On Fri, 23 Feb 2024, Jakub Jelinek wrote:

> Hi!
> 
> The following testcases show 2 bugs in the .{ADD,SUB}_OVERFLOW lowering,
> both related to storing of the REALPART_EXPR part of the result.
> On the first testcase prec is 255, prec_limbs is 4 and for the second limb
> in the loop the store of the REALPART_EXPR of .USUBC (_30) is stored through:
>   if (_27 <= 3)
> goto ; [80.00%]
>   else
> goto ; [20.00%]
> 
>[local count: 1073741824]:
>   if (_27 < 3)
> goto ; [80.00%]
>   else
> goto ; [20.00%]
> 
>[local count: 1073741824]:
>   bitint.3[_27] = _30;
>   goto ; [100.00%]
> 
>[local count: 858993464]:
>   MEM[(unsigned long *)&bitint.3 + 24B] = _30;
> 
>[local count: 1073741824]:
> The first check is right, as prec_limbs is 4, we don't want to store
> bitint.3[4] or above at all, those limbs are just computed for the overflow
> checking and nothing else, so _27 > 4 leads to no store.
> But the other condition is exact opposite of what should be done, if
> the current index of the second limb (_27) is < 3, then it should
>   bitint.3[_27] = _30;
> and if it is == 3, it should
>   MEM[(unsigned long *)&bitint.3 + 24B] = _30;
> and (especially important for the targets which would bitinfo.extended = 1)
> should actually in this case zero extend it from the 63 bits to 64, that is
> the handling of the partial limb.  The if_then_if_then_else helper if
> there are 2 conditions sets m_gsi to be at the start of the
> edge_true_false->dest bb, i.e. when the first condition is true and second
> false, and that is where we store the SSA_NAME indexed limb store, so the
> condition needs to be reversed.
> 
> The following patch does that and adds the cast as well, the usual
> assumption that already handle_operand has the partial limb type doesn't
> have to be true here, because the source operand could have much larger
> precision than the REALPART_EXPR of the lhs.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2024-02-23  Jakub Jelinek  
> 
>   PR tree-optimization/114040
>   * gimple-lower-bitint.cc (bitint_large_huge::lower_addsub_overflow):
>   Use EQ_EXPR rather than LT_EXPR for g2 condition and change its
>   probability from likely to unlikely.  When handling the true true
>   store, first cast to limb_access_type and then to l's type.
> 
>   * gcc.dg/torture/bitint-60.c: New test.
>   * gcc.dg/torture/bitint-61.c: New test.
> 
> --- gcc/gimple-lower-bitint.cc.jj 2024-02-22 10:13:54.123058152 +0100
> +++ gcc/gimple-lower-bitint.cc2024-02-22 14:51:01.655335350 +0100
> @@ -4255,12 +4255,12 @@ bitint_large_huge::lower_addsub_overflow
>NULL_TREE, NULL_TREE);
> gimple *g2 = NULL;
> if (!single_comparison)
> - g2 = gimple_build_cond (LT_EXPR, idx,
> + g2 = gimple_build_cond (EQ_EXPR, idx,
>   size_int (prec_limbs - 1),
>   NULL_TREE, NULL_TREE);
> edge edge_true_true, edge_true_false, edge_false;
> if_then_if_then_else (g, g2, profile_probability::likely (),
> - profile_probability::likely (),
> + profile_probability::unlikely (),
>   edge_true_true, edge_true_false,
>   edge_false);
> tree l = limb_access (type, var ? var : obj, idx, true);
> @@ -4269,8 +4269,11 @@ bitint_large_huge::lower_addsub_overflow
> if (!single_comparison)
>   {
> m_gsi = gsi_after_labels (edge_true_true->src);
> -   l = limb_access (type, var ? var : obj,
> -size_int (prec_limbs - 1), true);
> +   tree plm1idx = size_int (prec_limbs - 1);
> +   tree plm1type = limb_access_type (type, plm1idx);
> +   l = limb_access (type, var ? var : obj, plm1idx, true);
> +   if (!useless_type_conversion_p (plm1type, TREE_TYPE (rhs)))
> + rhs = add_cast (plm1type, rhs);
> if (!useless_type_conversion_p (TREE_TYPE (l),
> TREE_TYPE (rhs)))
>   rhs = add_cast (TREE_TYPE (l), rhs);
> --- gcc/testsuite/gcc.dg/torture/bitint-60.c.jj   2024-02-22 
> 14:54:52.996121115 +0100
> +++ gcc/testsuite/gcc.dg/torture/bitint-60.c  2024-02-22 14:54:42.656264784 
> +0100
> @@ -0,0 +1,24 @@
> +/* PR tree-optimization/114040 */
> +/* { dg-do run { target bitint } } */
> +/* { dg-options "-std=c23 -pedantic-errors" } */
> +/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
> +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
> +
> +#if __BITINT_MAXWIDTH__ >= 8671
> +__attribute__((noipa)) unsigned
> +foo (unsigned _BitInt(8671) x, unsigned y, unsigned _BitInt(512) z)
> +{
> 

Re: [PATCH] expr: Fix REDUCE_BIT_FIELD in multiplication expansion [PR114054]

2024-02-23 Thread Richard Biener
On Fri, 23 Feb 2024, Jakub Jelinek wrote:

> Hi!
> 
> The following testcase ICEs, because the REDUCE_BIT_FIELD macro uses
> the target variable implicitly:
> #define REDUCE_BIT_FIELD(expr)  (reduce_bit_field \
>  ? reduce_to_bit_field_precision ((expr), \
>   target, \
>   type)   \
>  : (expr))
> and so when the code below reuses the target variable, documented to be
>The value may be stored in TARGET if TARGET is nonzero.
>TARGET is just a suggestion; callers must assume that
>the rtx returned may not be the same as TARGET.
> for something unrelated (the value that should be returned), this misbehaves
> (in the testcase target is set to a CONST_INT, which has VOIDmode and
> reduce_to_bit_field_precision assert checking doesn't like that).
> Needed to say that
>If TARGET is CONST0_RTX, it means that the value will be ignored.
> but in expand_expr_real_2 does at the start:
>   ignore = (target == const0_rtx
> || ((CONVERT_EXPR_CODE_P (code)
>  || code == COND_EXPR || code == VIEW_CONVERT_EXPR)
> && TREE_CODE (type) == VOID_TYPE));
> 
>   /* We should be called only if we need the result.  */
>   gcc_assert (!ignore);
> - so such target is mainly meant for calls and the like in other routines.
> Certainly doesn't expect that target changes from not being ignored
> initially to ignore later on and other CONST_INT results as well as anything
> which is not an object into which anything can be stored.
> 
> So, the following patch fixes that by using a more appripriate temporary
> for the result, which other code is using.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2024-02-23  Jakub Jelinek  
> 
>   PR rtl-optimization/114054
>   * expr.cc (expand_expr_real_2) : Use
>   temp variable instead of target parameter for result.
> 
>   * gcc.dg/bitint-92.c: New test.
> 
> --- gcc/expr.cc.jj2024-02-14 14:26:19.709811397 +0100
> +++ gcc/expr.cc   2024-02-22 18:39:37.852431789 +0100
> @@ -10259,12 +10259,12 @@ expand_expr_real_2 (sepops ops, rtx targ
> &algorithm, &variant, cost)
> : cost < mul_cost (speed, mode))
>   {
> -   target = bit0_p ? expand_and (mode, negate_rtx (mode, op0),
> - op1, target)
> -   : expand_and (mode, op0,
> - negate_rtx (mode, op1),
> - target);
> -   return REDUCE_BIT_FIELD (target);
> +   temp = bit0_p ? expand_and (mode, negate_rtx (mode, op0),
> +   op1, target)
> + : expand_and (mode, op0,
> +   negate_rtx (mode, op1),
> +   target);
> +   return REDUCE_BIT_FIELD (temp);
>   }
>   }
>   }
> --- gcc/testsuite/gcc.dg/bitint-92.c.jj   2024-02-22 18:43:56.433910671 
> +0100
> +++ gcc/testsuite/gcc.dg/bitint-92.c  2024-02-22 18:43:29.464277919 +0100
> @@ -0,0 +1,17 @@
> +/* PR rtl-optimization/114054 */
> +/* { dg-do compile { target bitint } } */
> +/* { dg-options "-Og -fwhole-program -fno-tree-ccp -fprofile-use 
> -fno-tree-copy-prop -w" } */
> +
> +int x;
> +
> +void
> +foo (int i, unsigned u)
> +{
> +  x = __builtin_mul_overflow_p ((unsigned _BitInt(1)) u, i, (_BitInt(33)) 0);
> +}
> +
> +int
> +main ()
> +{
> +  foo (11, 0);
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: PING: [PATCH] x86-64: Check R_X86_64_CODE_6_GOTTPOFF support

2024-02-23 Thread Uros Bizjak
On Fri, Feb 23, 2024 at 3:45 AM H.J. Lu  wrote:
>
> On Thu, Feb 22, 2024 at 6:39 PM Hongtao Liu  wrote:
> >
> > On Thu, Feb 22, 2024 at 10:33 PM H.J. Lu  wrote:
> > >
> > > On Sun, Feb 18, 2024 at 8:02 AM H.J. Lu  wrote:
> > > >
> > > > If assembler and linker supports
> > > >
> > > > add %reg1, name@gottpoff(%rip), %reg2
> > > >
> > > > with R_X86_64_CODE_6_GOTTPOFF, we can generate it instead of
> > > >
> > > > mov name@gottpoff(%rip), %reg2
> > > > add %reg1, %reg2
> > x86 part LGTM, but I'm not familiar with the changes in config related 
> > files.
>
> Jakub, Uros, Alexandre, can you review the configure.ac change in this patch?
>
> https://patchwork.sourceware.org/project/gcc/list/?series=31075
>
> Thanks.
>
> > > >
> > > > gcc/
> > > >
> > > > * configure.ac (HAVE_AS_R_X86_64_CODE_6_GOTTPOFF): Defined as 1
> > > > if R_X86_64_CODE_6_GOTTPOFF is supported.
> > > > * config.in: Regenerated.
> > > > * configure: Likewise.
> > > > * config/i386/predicates.md (apx_ndd_add_memory_operand): Allow
> > > > UNSPEC_GOTNTPOFF if R_X86_64_CODE_6_GOTTPOFF is supported.
> > > >
> > > > gcc/testsuite/
> > > >
> > > > * gcc.target/i386/apx-ndd-tls-1b.c: New test.
> > > > * lib/target-supports.exp
> > > > (check_effective_target_code_6_gottpoff_reloc): New.
> > > > ---
> > > >  gcc/config.in |  7 +++
> > > >  gcc/config/i386/predicates.md |  6 +-
> > > >  gcc/configure | 62 +++
> > > >  gcc/configure.ac  | 37 +++
> > > >  .../gcc.target/i386/apx-ndd-tls-1b.c  |  9 +++
> > > >  gcc/testsuite/lib/target-supports.exp | 48 ++
> > > >  6 files changed, 168 insertions(+), 1 deletion(-)
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-tls-1b.c
> > > >
> > > > diff --git a/gcc/config.in b/gcc/config.in
> > > > index ce1d073833f..f3de4ba6776 100644
> > > > --- a/gcc/config.in
> > > > +++ b/gcc/config.in
> > > > @@ -737,6 +737,13 @@
> > > >  #endif
> > > >
> > > >
> > > > +/* Define 0/1 if your assembler and linker support 
> > > > R_X86_64_CODE_6_GOTTPOFF.
> > > > +   */
> > > > +#ifndef USED_FOR_TARGET
> > > > +#undef HAVE_AS_R_X86_64_CODE_6_GOTTPOFF
> > > > +#endif
> > > > +
> > > > +
> > > >  /* Define if your assembler supports relocs needed by -fpic. */
> > > >  #ifndef USED_FOR_TARGET
> > > >  #undef HAVE_AS_SMALL_PIC_RELOCS
> > > > diff --git a/gcc/config/i386/predicates.md 
> > > > b/gcc/config/i386/predicates.md
> > > > index 4c1aedd7e70..391f108c360 100644
> > > > --- a/gcc/config/i386/predicates.md
> > > > +++ b/gcc/config/i386/predicates.md
> > > > @@ -2299,10 +2299,14 @@ (define_predicate "apx_ndd_memory_operand"
> > > >
> > > >  ;; Return true if OP is a memory operand which can be used in APX NDD
> > > >  ;; ADD with register source operand.  UNSPEC_GOTNTPOFF memory operand
> > > > -;; isn't allowed with APX NDD ADD.
> > > > +;; is allowed with APX NDD ADD only if R_X86_64_CODE_6_GOTTPOFF works.
> > > >  (define_predicate "apx_ndd_add_memory_operand"
> > > >(match_operand 0 "memory_operand")
> > > >  {
> > > > +  /* OK if "add %reg1, name@gottpoff(%rip), %reg2" is supported.  */
> > > > +  if (HAVE_AS_R_X86_64_CODE_6_GOTTPOFF)
> > > > +return true;
> > > > +
> > > >op = XEXP (op, 0);
> > > >
> > > >/* Disallow APX NDD ADD with UNSPEC_GOTNTPOFF.  */
> > > > diff --git a/gcc/configure b/gcc/configure
> > > > index 41b978b0380..c59c971862c 100755
> > > > --- a/gcc/configure
> > > > +++ b/gcc/configure
> > > > @@ -29834,6 +29834,68 @@ cat >>confdefs.h <<_ACEOF
> > > >  _ACEOF
> > > >
> > > >
> > > > +if echo "$ld_ver" | grep GNU > /dev/null; then
> > > > +  if $gcc_cv_ld -V 2>/dev/null | grep elf_x86_64_sol2 > /dev/null; 
> > > > then
> > > > +ld_ix86_gld_64_opt="-melf_x86_64_sol2"
> > > > +  else
> > > > +ld_ix86_gld_64_opt="-melf_x86_64"
> > > > +  fi
> > > > +fi
> > > > +conftest_s='
> > > > +   .text
> > > > +   .globl  _start
> > > > +   .type _start, @function
> > > > +_start:
> > > > +   addq%r23,foo@GOTTPOFF(%rip), %r15
> > > > +   .section .tdata,"awT",@progbits
> > > > +   .type foo, @object
> > > > +foo:
> > > > +   .quad 0'
> > > > +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for 
> > > > R_X86_64_CODE_6_GOTTPOFF reloc" >&5
> > > > +$as_echo_n "checking assembler for R_X86_64_CODE_6_GOTTPOFF reloc... " 
> > > > >&6; }
> > > > +if ${gcc_cv_as_x86_64_code_6_gottpoff+:} false; then :
> > > > +  $as_echo_n "(cached) " >&6
> > > > +else
> > > > +  gcc_cv_as_x86_64_code_6_gottpoff=no
> > > > +  if test x$gcc_cv_as != x; then
> > > > +$as_echo "$conftest_s" > conftest.s
> > > > +if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s 
> > > > >&5'
> > > > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> > > > +  (eva

Pushed: [PATCH] LoongArch: Don't falsely claim gold supported in toplevel configure

2024-02-23 Thread Xi Ruoyao
On Fri, 2024-02-23 at 11:37 +0800, chenglulu wrote:
> 
> 在 2024/2/23 上午11:27, Xi Ruoyao 写道:
> > On Fri, 2024-02-23 at 11:16 +0800, chenglulu wrote:
> > > 在 2024/2/22 下午5:17, Xi Ruoyao 写道:
> > > > The gold linker has never been ported to LoongArch (and it seems
> > > > unlikely to be ported in the future as the new architectures are
> > > > focusing on lld and/or mold for fast linkers).
> > > > 
> > > > ChangeLog:
> > > > 
> > > >     * configure.ac (ENABLE_GOLD): Remove loongarch*-*-* from target
> > > >     list.
> > > >     * configure: Regenerate.
> > > > ---
> > > > 
> > > > Ok for GCC trunk (to get synced into Binutils later)?
> > > I have no problem. But I have a question. Is this modification simply
> > > because we don’t
> > > 
> > > support it or is there an error somewhere?
> > If a user specify --enable-gold building Binutils, with loongarch in
> > this list the building system will attempt to build gold and fail.  If
> > removing loongarch from the list the building system will ignore --
> > enable-gold.
> > 
> Okay, I understand.

Pushed r14-9149 and the Binutils maintainer will pick it up before the
next Binutils release (AFAIK).

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Pushed: [GCC 13 PATCH] LoongArch: Don't default to -mno-explicit-relocs if -mno-relax

2024-02-23 Thread Xi Ruoyao
On Thu, 2024-02-22 at 19:09 +0800, chenglulu wrote:
> 
> 在 2024/2/22 下午6:20, Xi Ruoyao 写道:
> > To improve Binutils compatibility we've had to backported relaxation
> > support.  But if a user just updates to GCC 13.3 and sticks with
> > Binutils 2.41, there is no reason to use -mno-explicit-relocs as the
> > default because we are turning off relaxation for Binutils 2.41 (it
> > lacks conditional branch relaxation support) anyway.
> > 
> > So like GCC 14, make the default of -m[no-]explicit-relocs depend on
> > -m[no-]relax instead of HAVE_AS_MRELAX_OPTION.  Also update the doc
> > to
> > reflect the behavior change.
> > 
> > gcc/ChangeLog:
> > 
> > * config/loongarch/genopts/loongarch.opt.in
> > (TARGET_EXPLICIT_RELOCS): Init to M_OPTION_NOT_SEEN.
> > * config/loongarch/loongarch.opt: Regenerate.
> > * config/loongarch/loongarch.cc
> > (loongarch_option_override_internal): Set the default of
> > TARGET_EXPLICIT_RELOCS to HAVE_AS_EXPLICIT_RELOCS
> > && !loongarch_mrelax.
> > * doc/invoke.texi (-m[no-]explicit-relocs): Update for
> > LoongArch.
> > ---
> > 
> > Ok for releases/gcc-13?
> 
> LGTM!

Pushed r13-8357.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[PATCH v10 2/2] Add gcov MC/DC tests for GDC

2024-02-23 Thread Jørgen Kvalsvik
This is a mostly straight port from the gcov-19.c tests from the C test
suite. The only notable differences from C to D are that D flips the
true/false outcomes for loop headers, and the D front end ties loop and
ternary conditions to slightly different locus.

The test for >64 conditions warning is disabled as it either needs
support from the testing framework or a something similar to #pragma GCC
diagnostic push to not cause a test failure from detecting a warning.

gcc/testsuite/ChangeLog:

* gdc.dg/gcov.exp: New test.
* gdc.dg/gcov1.d: New test.
---
 gcc/testsuite/gdc.dg/gcov.exp |   44 +
 gcc/testsuite/gdc.dg/gcov1.d  | 1712 +
 2 files changed, 1756 insertions(+)
 create mode 100644 gcc/testsuite/gdc.dg/gcov.exp
 create mode 100644 gcc/testsuite/gdc.dg/gcov1.d

diff --git a/gcc/testsuite/gdc.dg/gcov.exp b/gcc/testsuite/gdc.dg/gcov.exp
new file mode 100644
index 000..4218771b208
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/gcov.exp
@@ -0,0 +1,44 @@
+#   Copyright (C) 1997-2023 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# .
+
+# Gcov test driver.
+
+# Load support procs.
+load_lib gdc-dg.exp
+load_lib gcov.exp
+
+global GDC_UNDER_TEST
+
+# For now find gcov in the same directory as $GDC_UNDER_TEST.
+if { ![is_remote host] && [string match "*/*" [lindex $GDC_UNDER_TEST 0]] } {
+set GCOV [file dirname [lindex $GDC_UNDER_TEST 
0]]/[gcc-transform-out-of-tree gcov]
+} else {
+set GCOV [gcc-transform-out-of-tree gcov]
+}
+
+# Initialize harness.
+dg-init
+
+# Delete old .gcda files.
+set files [glob -nocomplain gcov*.gcda]
+if { $files != "" } {
+eval "remote_file build delete $files"
+}
+
+# Main loop.
+gdc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/gcov*.d]] "" ""
+
+dg-finish
diff --git a/gcc/testsuite/gdc.dg/gcov1.d b/gcc/testsuite/gdc.dg/gcov1.d
new file mode 100644
index 000..10ffa4a0e30
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/gcov1.d
@@ -0,0 +1,1712 @@
+/* { dg-options "-fcondition-coverage -ftest-coverage" } */
+/* { dg-do run { target native } } */
+
+/* Some side effect to stop branches from being pruned.  */
+int x = 0;
+
+int id  (int x) { return  x; }
+int inv (int x) { return !x; }
+
+/* || works.  */
+void
+mcdc001a (int a, int b)
+{
+if (a || b) /* conditions(1/4) true(0) false(0 1) */
+   /* conditions(end) */
+   x = 1;
+else
+   x = 2;
+}
+
+void
+mcdc001b (int a, int b)
+{
+if (a || b) /* conditions(3/4) true(0) */
+   /* conditions(end) */
+   x = 1;
+else
+   x = 2;
+}
+
+void
+mcdc001c (int a, int b)
+{
+if (a || b) /* conditions(4/4) */
+   x = 1;
+else
+   x = 2;
+}
+
+void
+mcdc001d (int a, int b, int c)
+{
+if (a || b || c) /* conditions(2/6) false(0 1 2) true(2) */
+/* conditions(end) */
+   x = 1;
+}
+
+/* && works */
+void
+mcdc002a (int a, int b)
+{
+if (a && b) /* conditions(1/4) true(0 1) false(0) */
+   /* conditions(end) */
+   x = 1;
+else
+   x = 2;
+}
+
+void
+mcdc002b (int a, int b)
+{
+if (a && b) /* conditions(3/4) false(0) */
+   /* conditions(end) */
+   x = 1;
+else
+   x = 2;
+}
+
+void
+mcdc002c (int a, int b)
+{
+if (a && b) /* conditions(4/4) */
+   x = 1;
+else
+   x = 2;
+}
+
+void
+mcdc002d (int a, int b, int c)
+{
+if (a && b && c) /* conditions(4/6) false(0 2) */
+/* conditions(end) */
+   x = 1;
+}
+
+/* Negation works.  */
+void
+mcdc003a (int a, int b)
+{
+if (!a || !b) /* conditions(2/4) false(0 1) */
+ /* conditions(end) */
+   x = 1;
+else
+   x = 2;
+}
+
+/* Single conditionals with and without else.  */
+void
+mcdc004a (int a)
+{
+if (a) /* conditions(1/2) true(0) */
+  /* conditions(end) */
+   x = 1;
+else
+   x = 2;
+}
+
+void
+mcdc004b (int a)
+{
+if (a) /* conditions(2/2) */
+   x = 1;
+else
+   x = 2;
+}
+
+void
+mcdc004c (int a)
+{
+if (a) /* conditions(1/2) false(0) */
+  /* conditions(end) */
+   x = 1;
+}
+
+void
+mcdc004d (int a, int b, int c)
+{
+if (a)  /* conditions(2/2) */
+{
+   if (b || c) /* conditions(1/4) true(1) false(0 1) */
+   x = a + b + c;
+}
+}
+
+void
+mcdc004e (int a, int b, int c)
+{
+if (a) 

[PATCH V3 0/2] aarch64: Place target independent and dependent changed code in one file.

2024-02-23 Thread Ajit Agarwal
Hello Richard/Alex/Segher:

This patch adds the changed code for target independent and
dependent code for load store fusion.

Common infrastructure of load store pair fusion is
divided into target independent and target dependent
changed code.

Target independent code is the Generic code with
pure virtual function to interface betwwen target
independent and dependent code.

Target dependent code is the implementation of pure
virtual function for aarch64 target and the call
to target independent code.

Bootstrapped for aarch64-linux-gnu.

Thanks & Regards
Ajit

aarch64: Place target independent and dependent changed code in one file.

Common infrastructure of load store pair fusion is
divided into target independent and target dependent
changed code.

Target independent code is the Generic code with
pure virtual function to interface betwwen target
independent and dependent code.

Target dependent code is the implementation of pure
virtual function for aarch64 target and the call
to target independent code.

2024-02-23  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/aarch64/aarch64-ldp-fusion.cc: Place target
independent and dependent changed code.
---
 gcc/config/aarch64/aarch64-ldp-fusion.cc | 437 ---
 1 file changed, 305 insertions(+), 132 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index 22ed95eb743..2ef22ff1e96 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -40,10 +40,10 @@
 
 using namespace rtl_ssa;
 
-static constexpr HOST_WIDE_INT LDP_IMM_BITS = 7;
-static constexpr HOST_WIDE_INT LDP_IMM_SIGN_BIT = (1 << (LDP_IMM_BITS - 1));
-static constexpr HOST_WIDE_INT LDP_MAX_IMM = LDP_IMM_SIGN_BIT - 1;
-static constexpr HOST_WIDE_INT LDP_MIN_IMM = -LDP_MAX_IMM - 1;
+static constexpr HOST_WIDE_INT PAIR_MEM_IMM_BITS = 7;
+static constexpr HOST_WIDE_INT PAIR_MEM_IMM_SIGN_BIT = (1 << 
(PAIR_MEM_IMM_BITS - 1));
+static constexpr HOST_WIDE_INT PAIR_MEM_MAX_IMM = PAIR_MEM_IMM_SIGN_BIT - 1;
+static constexpr HOST_WIDE_INT PAIR_MEM_MIN_IMM = -PAIR_MEM_MAX_IMM - 1;
 
 // We pack these fields (load_p, fpsimd_p, and size) into an integer
 // (LFS) which we use as part of the key into the main hash tables.
@@ -138,8 +138,18 @@ struct alt_base
   poly_int64 offset;
 };
 
+// Virtual base class for load/store walkers used in alias analysis.
+struct alias_walker
+{
+  virtual bool conflict_p (int &budget) const = 0;
+  virtual insn_info *insn () const = 0;
+  virtual bool valid () const  = 0;
+  virtual void advance () = 0;
+};
+
+
 // State used by the pass for a given basic block.
-struct ldp_bb_info
+struct pair_fusion
 {
   using def_hash = nofree_ptr_hash;
   using expr_key_t = pair_hash>;
@@ -161,13 +171,13 @@ struct ldp_bb_info
   static const size_t obstack_alignment = sizeof (void *);
   bb_info *m_bb;
 
-  ldp_bb_info (bb_info *bb) : m_bb (bb), m_emitted_tombstone (false)
+  pair_fusion (bb_info *bb) : m_bb (bb), m_emitted_tombstone (false)
   {
 obstack_specify_allocation (&m_obstack, OBSTACK_CHUNK_SIZE,
obstack_alignment, obstack_chunk_alloc,
obstack_chunk_free);
   }
-  ~ldp_bb_info ()
+  ~pair_fusion ()
   {
 obstack_free (&m_obstack, nullptr);
 
@@ -177,10 +187,50 @@ struct ldp_bb_info
bitmap_obstack_release (&m_bitmap_obstack);
   }
   }
+  void track_access (insn_info *, bool load, rtx mem);
+  void transform ();
+  void cleanup_tombstones ();
+  virtual void set_multiword_subreg (insn_info *i1, insn_info *i2,
+bool load_p) = 0;
+  virtual rtx gen_load_store_pair (rtx *pats,  rtx writeback,
+  bool load_p) = 0;
+  void merge_pairs (insn_list_t &, insn_list_t &,
+   bool load_p, unsigned access_size);
+  virtual void transform_for_base (int load_size, access_group &group) = 0;
+
+  bool try_fuse_pair (bool load_p, unsigned access_size,
+insn_info *i1, insn_info *i2);
+
+  bool fuse_pair (bool load_p, unsigned access_size,
+ int writeback,
+ insn_info *i1, insn_info *i2,
+ base_cand &base,
+ const insn_range_info &move_range);
+
+  void do_alias_analysis (insn_info *alias_hazards[4],
+ alias_walker *walkers[4],
+ bool load_p);
+
+  void track_tombstone (int uid);
+
+  bool track_via_mem_expr (insn_info *, rtx mem, lfs_fields lfs);
+
+  virtual bool is_fpsimd_op_p (rtx reg_op, machine_mode mem_mode,
+  bool load_p) = 0;
+
+  virtual bool pair_operand_mode_ok_p (machine_mode mode) = 0;
+  virtual bool pair_trailing_writeback_p () = 0;
+  virtual bool pair_check_register_operand (bool load_p, rtx reg_op,
+   machine_mode mem_mode) = 0;
+  virtual int pair_mem_alias_check_limit () = 0;

Re: [PATCH RFA] build: drop target libs from LD_LIBRARY_PATH [PR105688]

2024-02-23 Thread Alexandre Oliva
On Feb 21, 2024, Jason Merrill  wrote:

> So indeed I guess we still
> want both prev and current libgcc directories in RPATH to handle the
> case where we've removed the previous stage, as below.

*nod*, thanks

> I can't think of why we would need to depend on the current stage
> target libraries,

> If bootstrap doesn't actually need the target libraries.

ISTM we may be miscommunicating.

I'm not so worried about bootstrap itself as I am about post-bootstrap
host tools.  Those are unusual in that, after native bootstraps, they're
built using the just-built (last-stage) compiler and target libraries,
rather than the host compiler and system libraries.  While configuring
them, we need LD_LIBRARY_PATH (or similar) set up so that native
execution tests can pass, at the very least; while building them, we may
need LD_LIBRARY_PATH set up so that dependent libraries are found and
link correctly.

> I'm hoping for a fix that doesn't require individual users to know
> about a workaround.

I'm sure we'd all appreciate that, but AFAICT the conflicting
requirements in scenarios of different freshness of libraries (system vs
build) and both system tools and host tools dependencies on them would
require a lot more intelligence in our build system to detect and react
to the circumstances, deciding whether or not native tools used behind
the scenes by the build will run with the just-built libraries, and
whether or not the post-bootstrap host tools linked with the just-built
libraries will run with the system libraries.  There may even be
scenarios in which these conflicting requirements would paint users into
an inescapable corner AFAICT.

The most worrying aspect is not libstdc++, but libgcc_s; it's needed as
a dynamic library by languages that support exceptions, and depending on
relative freshness of the toolchain being built and system libraries,
newer symbols may be required by system programs and by just-built host
tools.  There doesn't seem to be an easy way around that.  You have to
either privilege one scenario over the other, like we do now, or
introduce cleverness to detect and cope with such conflicting
requirements, which we're not even trying to do.

The approach in your patch changes the situation from privileging one
scenario to privileging a mixed scenario (just-built libgcc but system
libstdc++), and I'm pretty sure hunch that this mixed approach is likely
to break down the road, even if not right now.

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH 0/2 V2] aarch64: Place target independent and dependent code in one file.

2024-02-23 Thread Ajit Agarwal
Hello Richard:

On 23/02/24 1:19 am, Richard Sandiford wrote:
> Ajit Agarwal  writes:
>> Hello Alex/Richard:
>>
>> I have placed target indpendent and target dependent code in
>> aarch64-ldp-fusion for load store fusion.
>>
>> Common infrastructure of load store pair fusion is divided into
>> target independent and target dependent code.
>>
>> Target independent code is the Generic code with pure virtual
>> function to interface betwwen target independent and dependent
>> code.
>>
>> Target dependent code is the implementation of pure virtual
>> function for aarch64 target and the call to target independent
>> code.
> 
> Thanks for the update.  This is still quite hard to review though.
> Sorry to ask for another round, but could you split it up further?
> The ideal thing would be if patches that move code do nothing other
> than move code, and if patches that change code do those changes
> in-place.
> 

As per your suggestion I have submitted new patch with above changes.
Sorry for inconvenience caused.

Thanks & Regards
Ajit


> Richard
> 
>>
>> Bootstrapped in aarch64-linux-gnu.
>>
>> Thanks & Regards
>> Ajit
>>
>>
>> aarch64: Place target independent and dependent code in one file.
>>
>> Common infrastructure of load store pair fusion is divided into
>> target independent and target dependent code.
>>
>> Target independent code is the Generic code with pure virtual
>> function to interface betwwen target independent and dependent
>> code.
>>
>> Target dependent code is the implementation of pure virtual
>> function for aarch64 target and the call to target independent
>> code.
>>
>> 2024-02-15  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>>  * config/aarch64/aarch64-ldp-fusion.cc: Place target
>>  independent and dependent code.
>> ---
>>  gcc/config/aarch64/aarch64-ldp-fusion.cc | 3513 --
>>  1 file changed, 1842 insertions(+), 1671 deletions(-)
>>
>> diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
>> b/gcc/config/aarch64/aarch64-ldp-fusion.cc
>> index 22ed95eb743..0ab842e2bbb 100644
>> --- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
>> +++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
>> @@ -17,6 +17,7 @@
>>  // along with GCC; see the file COPYING3.  If not see
>>  // .
>>  
>> +
>>  #define INCLUDE_ALGORITHM
>>  #define INCLUDE_FUNCTIONAL
>>  #define INCLUDE_LIST
>> @@ -37,13 +38,12 @@
>>  #include "tree-hash-traits.h"
>>  #include "print-tree.h"
>>  #include "insn-attr.h"
>> -
>>  using namespace rtl_ssa;
>>  
>> -static constexpr HOST_WIDE_INT LDP_IMM_BITS = 7;
>> -static constexpr HOST_WIDE_INT LDP_IMM_SIGN_BIT = (1 << (LDP_IMM_BITS - 1));
>> -static constexpr HOST_WIDE_INT LDP_MAX_IMM = LDP_IMM_SIGN_BIT - 1;
>> -static constexpr HOST_WIDE_INT LDP_MIN_IMM = -LDP_MAX_IMM - 1;
>> +static constexpr HOST_WIDE_INT PAIR_MEM_IMM_BITS = 7;
>> +static constexpr HOST_WIDE_INT PAIR_MEM_IMM_SIGN_BIT = (1 << 
>> (PAIR_MEM_IMM_BITS - 1));
>> +static constexpr HOST_WIDE_INT PAIR_MEM_MAX_IMM = PAIR_MEM_IMM_SIGN_BIT - 1;
>> +static constexpr HOST_WIDE_INT PAIR_MEM_MIN_IMM = -PAIR_MEM_MAX_IMM - 1;
>>  
>>  // We pack these fields (load_p, fpsimd_p, and size) into an integer
>>  // (LFS) which we use as part of the key into the main hash tables.
>> @@ -138,8 +138,144 @@ struct alt_base
>>poly_int64 offset;
>>  };
>>  
>> +// Class that implements a state machine for building the changes needed to 
>> form
>> +// a store pair instruction.  This allows us to easily build the changes in
>> +// program order, as required by rtl-ssa.
>> +struct stp_change_builder
>> +{
>> +  enum class state
>> +  {
>> +FIRST,
>> +INSERT,
>> +FIXUP_USE,
>> +LAST,
>> +DONE
>> +  };
>> +
>> +  enum class action
>> +  {
>> +TOMBSTONE,
>> +CHANGE,
>> +INSERT,
>> +FIXUP_USE
>> +  };
>> +
>> +  struct change
>> +  {
>> +action type;
>> +insn_info *insn;
>> +  };
>> +
>> +  bool done () const { return m_state == state::DONE; }
>> +
>> +  stp_change_builder (insn_info *insns[2],
>> +  insn_info *repurpose,
>> +  insn_info *dest)
>> +: m_state (state::FIRST), m_insns { insns[0], insns[1] },
>> +  m_repurpose (repurpose), m_dest (dest), m_use (nullptr) {}
>> +
>> +  change get_change () const
>> +  {
>> +switch (m_state)
>> +  {
>> +  case state::FIRST:
>> +return {
>> +  m_insns[0] == m_repurpose ? action::CHANGE : action::TOMBSTONE,
>> +  m_insns[0]
>> +};
>> +  case state::LAST:
>> +return {
>> +  m_insns[1] == m_repurpose ? action::CHANGE : action::TOMBSTONE,
>> +  m_insns[1]
>> +};
>> +  case state::INSERT:
>> +return { action::INSERT, m_dest };
>> +  case state::FIXUP_USE:
>> +return { action::FIXUP_USE, m_use->insn () };
>> +  case state::DONE:
>> +break;
>> +  }
>> +
>> +gcc_unreachable ();
>> +  }
>> +
>> +  // Transition to the next state.
>> +  void advance ()
>> +  {
>> +switch (m_state)
>> +  {
>> +

[Patch] Fortran/Openmp: Use OPT_Wopenmp for gfc_match_omp_depobj warning

2024-02-23 Thread Tobias Burnus
When checking something else, I noticed that there was one warning in 
openmp.cc that did not use OPT_Wopenmp.


I intent to commit the attached patch later today as obvious.

Tobias
Fortran/Openmp: Use OPT_Wopenmp for gfc_match_omp_depobj warning

gcc/fortran/ChangeLog:

	* openmp.cc (gfc_match_omp_depobj): Use OPT_Wopenmp
	as warning category in gfc_warning.

 gcc/fortran/openmp.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc
index 77f6e1732f9..38de60238c0 100644
--- a/gcc/fortran/openmp.cc
+++ b/gcc/fortran/openmp.cc
@@ -4768,8 +4768,8 @@ gfc_match_omp_depobj (void)
   if (gfc_match (" ( %v ) ", &destroyobj) == MATCH_YES)
 	{
 	  if (destroyobj->symtree != depobj->symtree)
-	gfc_warning (0, "The same depend object should be used as DEPOBJ "
-			 "argument at %L and as DESTROY argument at %L",
+	gfc_warning (OPT_Wopenmp, "The same depend object should be used as"
+			 " DEPOBJ argument at %L and as DESTROY argument at %L",
 			 &depobj->where, &destroyobj->where);
 	  gfc_free_expr (destroyobj);
 	}


[committed] arm: fix ICE with vectorized reciprocal division [PR108120]

2024-02-23 Thread Richard Earnshaw

The expand pattern for reciprocal division was enabled for all math
optimization modes, but the patterns it was generating were not
enabled unless -funsafe-math-optimizations were enabled, this leads to
an ICE when the pattern we generate cannot be recognized.

Fixed by only enabling vector division when doing unsafe math.

gcc:

PR target/108120
* config/arm/neon.md (div3): Rename from div3.
Gate with ARM_HAVE_NEON__ARITH.

gcc/testsuite:
PR target/108120
* gcc.target/arm/neon-recip-div-1.c: New file.
---
 gcc/config/arm/neon.md  |  4 ++--
 gcc/testsuite/gcc.target/arm/neon-recip-div-1.c | 16 
 2 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/neon-recip-div-1.c

diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 17c90f436c6..fa4a7aeda35 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -553,11 +553,11 @@ (define_insn "*mul3_neon"
Enabled with -funsafe-math-optimizations -freciprocal-math
and disabled for -Os since it increases code size .  */
 
-(define_expand "div3"
+(define_expand "div3"
   [(set (match_operand:VCVTF 0 "s_register_operand")
 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand")
 		  (match_operand:VCVTF 2 "s_register_operand")))]
-  "TARGET_NEON && !optimize_size
+  "ARM_HAVE_NEON__ARITH && !optimize_size
&& flag_reciprocal_math"
   {
 rtx rec = gen_reg_rtx (mode);
diff --git a/gcc/testsuite/gcc.target/arm/neon-recip-div-1.c b/gcc/testsuite/gcc.target/arm/neon-recip-div-1.c
new file mode 100644
index 000..e15c3ca5fe9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-recip-div-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O3 -freciprocal-math -fno-unsafe-math-optimizations -save-temps" } */
+/* { dg-add-options arm_neon } */
+
+int *a;
+int n;
+void b() {
+  int c;
+  for (c = 0; c < 10; c++)
+a[c] = (float)c / n;
+}
+/* We should not ICE, or get a vectorized reciprocal instruction when unsafe
+   math optimizations are disabled.  */
+/* { dg-final { scan-assembler-not "vrecpe\\.f32\\t\[qd\].*" } } */
+/* { dg-final { scan-assembler-not "vrecps\\.f32\\t\[qd\].*" } } */


Re: [PATCH v10 2/2] Add gcov MC/DC tests for GDC

2024-02-23 Thread Iain Buclaw
Excerpts from Jørgen Kvalsvik's message of Februar 23, 2024 12:18 pm:
> This is a mostly straight port from the gcov-19.c tests from the C test
> suite. The only notable differences from C to D are that D flips the
> true/false outcomes for loop headers, and the D front end ties loop and
> ternary conditions to slightly different locus.
> 
> The test for >64 conditions warning is disabled as it either needs
> support from the testing framework or a something similar to #pragma GCC
> diagnostic push to not cause a test failure from detecting a warning.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gdc.dg/gcov.exp: New test.
>   * gdc.dg/gcov1.d: New test.
> ---
>  gcc/testsuite/gdc.dg/gcov.exp |   44 +
>  gcc/testsuite/gdc.dg/gcov1.d  | 1712 +
>  2 files changed, 1756 insertions(+)
>  create mode 100644 gcc/testsuite/gdc.dg/gcov.exp
>  create mode 100644 gcc/testsuite/gdc.dg/gcov1.d
> 

I think I said this before in the previous series, no objections to
adding more tests to the gdc testsuite.

OK.

Thanks,
Iain.


Re: [Patch] Fortran/Openmp: Use OPT_Wopenmp for gfc_match_omp_depobj warning

2024-02-23 Thread Jakub Jelinek
On Fri, Feb 23, 2024 at 12:25:54PM +0100, Tobias Burnus wrote:
> When checking something else, I noticed that there was one warning in
> openmp.cc that did not use OPT_Wopenmp.
> 
> I intent to commit the attached patch later today as obvious.
> 
> Tobias

> Fortran/Openmp: Use OPT_Wopenmp for gfc_match_omp_depobj warning
> 
> gcc/fortran/ChangeLog:
> 
>   * openmp.cc (gfc_match_omp_depobj): Use OPT_Wopenmp
>   as warning category in gfc_warning.

LGTM.

Jakub



RE: [PATCH V4 2/5] RISC-V: Add vector related pipelines

2024-02-23 Thread Li, Pan2
Hi Edwin,

Looks like 6ec84c45a19403d3435b2affe4ec60e518fc1f97 result in sorts of rvv.exp 
asm check failure (I list some but not all of them in below) in upstream.
Could you please help to double check about it? Ping me if any more information 
is needed. Thanks.

   = Summary of gcc testsuite =
| # of unexpected case / # of unique unexpected case
|  gcc |  g++ | gfortran |
 rv64imafdcv/  lp64d/ medlow |  237 /68 |1 / 1 |  - |
make: *** [Makefile:1067: report-gcc-newlib] Error 1

FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_single_block-18.c   -O2 -flto 
-fuse-linker-plugin -fno-fat-lto-objects   scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e8,\\s*mf4,\\s*t[au],\\s*m[au] 1
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_single_block-18.c   -O2 -flto 
-fuse-linker-plugin -fno-fat-lto-objects   scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e8,\\s*mf8,\\s*t[au],\\s*m[au] 1
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_single_block-18.c   -O3 -g   
scan-assembler-times vsetvli 3
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_single_block-18.c   -O3 -g   
scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e8,\\s*mf2,\\s*t[au],\\s*m[au] 1
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_single_block-18.c   -O3 -g   
scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e8,\\s*mf4,\\s*t[au],\\s*m[au] 1
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_single_block-18.c   -O3 -g   
scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e8,\\s*mf8,\\s*t[au],\\s*m[au] 1
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_single_block-19.c   -O3 -g   
scan-assembler-times vsetvli 15
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_single_block-19.c   -O3 -g   
scan-assembler-times 
vsetvli\\s+zero,\\s*zero,\\s*e64,\\s*m1,\\s*t[au],\\s*m[au] 1
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_single_block-19.c   -O3 -g   
scan-assembler-times 
vsetvli\\s+zero,\\s*zero,\\s*e64,\\s*m2,\\s*t[au],\\s*m[au] 1
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c   -O2   
scan-assembler-times vsetvli 9
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c   -O2   
scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e16,\\s*mf2,\\s*t[au],\\s*m[au] 2
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c   -O2   
scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e32,\\s*mf2,\\s*t[au],\\s*m[au] 3
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c   -O2 -flto 
-fno-use-linker-plugin -flto-partition=none   scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e16,\\s*mf2,\\s*t[au],\\s*m[au] 2
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c   -O2 -flto 
-fno-use-linker-plugin -flto-partition=none   scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e32,\\s*mf2,\\s*t[au],\\s*m[au] 3
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c   -O2 -flto 
-fuse-linker-plugin -fno-fat-lto-objects   scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e16,\\s*mf2,\\s*t[au],\\s*m[au] 2
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c   -O2 -flto 
-fuse-linker-plugin -fno-fat-lto-objects   scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e32,\\s*mf2,\\s*t[au],\\s*m[au] 3
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-11.c   -O2   
scan-assembler-times vsetvli 7
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-11.c   -O2 -flto 
-fno-use-linker-plugin -flto-partition=none   scan-assembler-times vsetvli 7
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-11.c   -O2 -flto 
-fuse-linker-plugin -fno-fat-lto-objects   scan-assembler-times vsetvli 7
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-12.c   -O2   
scan-assembler-times vsetvli 9
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-12.c   -O2 -flto 
-fno-use-linker-plugin -flto-partition=none   scan-assembler-times vsetvli 9
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-12.c   -O2 -flto 
-fuse-linker-plugin -fno-fat-lto-objects   scan-assembler-times vsetvli 9
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-4.c   -O2   
scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e16,\\s*mf2,\\s*t[au],\\s*m[au] 1
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-4.c   -O2   
scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e32,\\s*mf2,\\s*t[au],\\s*m[au] 2
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-5.c   -O2   
scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e16,\\s*mf2,\\s*t[au],\\s*m[au] 1
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-5.c   -O2   
scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e32,\\s*mf2,\\s*t[au],\\s*m[au] 2
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-6.c   -O2   
scan-assembler-times vsetvli 7
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-6.c   -O2   
scan-assembler-times 
vsetvli\\s+[a-x0-9]+,\\s*zero,\\s*e32,\\s*mf2,\\s*t[au],\\s*m[au] 2
FAIL: gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-6.c   -O2 -flto 
-fno-use-linker-plugin -flto-partiti

[PATCH] vect: Fix integer overflow calculating mask

2024-02-23 Thread Andrew Stubbs
This is a follow-up to the previous patch to ensure that integer vector
bit-masks do not have excess bits set. It fixes a bug, observed on
amdgcn, in which the mask could be incorrectly set to zero, resulting in
wrong-code.

The mask was broken when nunits==32. The patched version will probably
be broken for nunits==64, but I don't think any current targets have
masks with more than 64 bits.

OK for mainline?

Andrew

gcc/ChangeLog:

* expr.cc (store_constructor): Use 64-bit shifts.
---
 gcc/expr.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index e23880e..90de5decee3 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7879,7 +7879,7 @@ store_constructor (tree exp, rtx target, int cleared, 
poly_int64 size,
auto nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
if (maybe_ne (GET_MODE_PRECISION (mode), nunits))
  tmp = expand_binop (mode, and_optab, tmp,
- GEN_INT ((1 << nunits) - 1), target,
+ GEN_INT ((1UL << nunits) - 1), target,
  true, OPTAB_WIDEN);
if (tmp != target)
  emit_move_insn (target, tmp);
-- 
2.41.0



Re: [PATCH] vect: Fix integer overflow calculating mask

2024-02-23 Thread Jakub Jelinek
On Fri, Feb 23, 2024 at 12:58:53PM +, Andrew Stubbs wrote:
> This is a follow-up to the previous patch to ensure that integer vector
> bit-masks do not have excess bits set. It fixes a bug, observed on
> amdgcn, in which the mask could be incorrectly set to zero, resulting in
> wrong-code.
> 
> The mask was broken when nunits==32. The patched version will probably
> be broken for nunits==64, but I don't think any current targets have
> masks with more than 64 bits.
> 
> OK for mainline?
> 
> Andrew
> 
> gcc/ChangeLog:
> 
>   * expr.cc (store_constructor): Use 64-bit shifts.

No, this isn't 64-bit shift on all hosts.
Use HOST_WIDE_INT_1U instead.

> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -7879,7 +7879,7 @@ store_constructor (tree exp, rtx target, int cleared, 
> poly_int64 size,
>   auto nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
>   if (maybe_ne (GET_MODE_PRECISION (mode), nunits))
> tmp = expand_binop (mode, and_optab, tmp,
> -   GEN_INT ((1 << nunits) - 1), target,
> +   GEN_INT ((1UL << nunits) - 1), target,
> true, OPTAB_WIDEN);
>   if (tmp != target)
> emit_move_insn (target, tmp);
> -- 
> 2.41.0

Jakub



Re: PING: [PATCH] x86-64: Check R_X86_64_CODE_6_GOTTPOFF support

2024-02-23 Thread H.J. Lu
On Fri, Feb 23, 2024 at 11:12:41AM +0100, Uros Bizjak wrote:
> On Fri, Feb 23, 2024 at 3:45 AM H.J. Lu  wrote:
> >
> > On Thu, Feb 22, 2024 at 6:39 PM Hongtao Liu  wrote:
> > >
> > > On Thu, Feb 22, 2024 at 10:33 PM H.J. Lu  wrote:
> > > >
> > > > On Sun, Feb 18, 2024 at 8:02 AM H.J. Lu  wrote:
> > > > >
> > > > > If assembler and linker supports
> > > > >
> > > > > add %reg1, name@gottpoff(%rip), %reg2
> > > > >
> > > > > with R_X86_64_CODE_6_GOTTPOFF, we can generate it instead of
> > > > >
> > > > > mov name@gottpoff(%rip), %reg2
> > > > > add %reg1, %reg2
> > > x86 part LGTM, but I'm not familiar with the changes in config related 
> > > files.
> >
> > Jakub, Uros, Alexandre, can you review the configure.ac change in this 
> > patch?
> >
> > https://patchwork.sourceware.org/project/gcc/list/?series=31075
> >
> > Thanks.
> >
> > > > >
> > > > > gcc/
> > > > >
> > > > > * configure.ac (HAVE_AS_R_X86_64_CODE_6_GOTTPOFF): Defined as 
> > > > > 1
> > > > > if R_X86_64_CODE_6_GOTTPOFF is supported.
> > > > > * config.in: Regenerated.
> > > > > * configure: Likewise.
> > > > > * config/i386/predicates.md (apx_ndd_add_memory_operand): 
> > > > > Allow
> > > > > UNSPEC_GOTNTPOFF if R_X86_64_CODE_6_GOTTPOFF is supported.
> > > > >
> > > > > gcc/testsuite/
> > > > >
> > > > > * gcc.target/i386/apx-ndd-tls-1b.c: New test.
> > > > > * lib/target-supports.exp
> > > > > (check_effective_target_code_6_gottpoff_reloc): New.
> > > > > ---
> > > > >  gcc/config.in |  7 +++
> > > > >  gcc/config/i386/predicates.md |  6 +-
> > > > >  gcc/configure | 62 
> > > > > +++
> > > > >  gcc/configure.ac  | 37 +++
> > > > >  .../gcc.target/i386/apx-ndd-tls-1b.c  |  9 +++
> > > > >  gcc/testsuite/lib/target-supports.exp | 48 ++
> > > > >  6 files changed, 168 insertions(+), 1 deletion(-)
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-tls-1b.c
> > > > >
> > > > > diff --git a/gcc/config.in b/gcc/config.in
> > > > > index ce1d073833f..f3de4ba6776 100644
> > > > > --- a/gcc/config.in
> > > > > +++ b/gcc/config.in
> > > > > @@ -737,6 +737,13 @@
> > > > >  #endif
> > > > >
> > > > >
> > > > > +/* Define 0/1 if your assembler and linker support 
> > > > > R_X86_64_CODE_6_GOTTPOFF.
> > > > > +   */
> > > > > +#ifndef USED_FOR_TARGET
> > > > > +#undef HAVE_AS_R_X86_64_CODE_6_GOTTPOFF
> > > > > +#endif
> > > > > +
> > > > > +
> > > > >  /* Define if your assembler supports relocs needed by -fpic. */
> > > > >  #ifndef USED_FOR_TARGET
> > > > >  #undef HAVE_AS_SMALL_PIC_RELOCS
> > > > > diff --git a/gcc/config/i386/predicates.md 
> > > > > b/gcc/config/i386/predicates.md
> > > > > index 4c1aedd7e70..391f108c360 100644
> > > > > --- a/gcc/config/i386/predicates.md
> > > > > +++ b/gcc/config/i386/predicates.md
> > > > > @@ -2299,10 +2299,14 @@ (define_predicate "apx_ndd_memory_operand"
> > > > >
> > > > >  ;; Return true if OP is a memory operand which can be used in APX NDD
> > > > >  ;; ADD with register source operand.  UNSPEC_GOTNTPOFF memory operand
> > > > > -;; isn't allowed with APX NDD ADD.
> > > > > +;; is allowed with APX NDD ADD only if R_X86_64_CODE_6_GOTTPOFF 
> > > > > works.
> > > > >  (define_predicate "apx_ndd_add_memory_operand"
> > > > >(match_operand 0 "memory_operand")
> > > > >  {
> > > > > +  /* OK if "add %reg1, name@gottpoff(%rip), %reg2" is supported.  */
> > > > > +  if (HAVE_AS_R_X86_64_CODE_6_GOTTPOFF)
> > > > > +return true;
> > > > > +
> > > > >op = XEXP (op, 0);
> > > > >
> > > > >/* Disallow APX NDD ADD with UNSPEC_GOTNTPOFF.  */
> > > > > diff --git a/gcc/configure b/gcc/configure
> > > > > index 41b978b0380..c59c971862c 100755
> > > > > --- a/gcc/configure
> > > > > +++ b/gcc/configure
> > > > > @@ -29834,6 +29834,68 @@ cat >>confdefs.h <<_ACEOF
> > > > >  _ACEOF
> > > > >
> > > > >
> > > > > +if echo "$ld_ver" | grep GNU > /dev/null; then
> > > > > +  if $gcc_cv_ld -V 2>/dev/null | grep elf_x86_64_sol2 > 
> > > > > /dev/null; then
> > > > > +ld_ix86_gld_64_opt="-melf_x86_64_sol2"
> > > > > +  else
> > > > > +ld_ix86_gld_64_opt="-melf_x86_64"
> > > > > +  fi
> > > > > +fi
> > > > > +conftest_s='
> > > > > +   .text
> > > > > +   .globl  _start
> > > > > +   .type _start, @function
> > > > > +_start:
> > > > > +   addq%r23,foo@GOTTPOFF(%rip), %r15
> > > > > +   .section .tdata,"awT",@progbits
> > > > > +   .type foo, @object
> > > > > +foo:
> > > > > +   .quad 0'
> > > > > +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for 
> > > > > R_X86_64_CODE_6_GOTTPOFF reloc" >&5
> > > > > +$as_echo_n "checking assembler for R_X86_64_CODE_6_GOTTPOFF reloc... 
> > > > > " >&6; }
> > > > > +if ${gcc_cv_as_x86_64_code_6_gottpoff+:} false; then :
> > > > > +  $as_echo_n "(c

[PATCH v2] Do not emulate vectors containing floats.

2024-02-23 Thread Juergen Christ
The emulation via word mode tries to perform integer arithmetic on floating
point values instead of floating point arithmetic.  This leads to
mis-compilations.

Failure occured on s390x on these existing test cases:
gcc.dg/vect/tsvc/vect-tsvc-s112.c
gcc.dg/vect/tsvc/vect-tsvc-s113.c
gcc.dg/vect/tsvc/vect-tsvc-s119.c
gcc.dg/vect/tsvc/vect-tsvc-s121.c
gcc.dg/vect/tsvc/vect-tsvc-s131.c
gcc.dg/vect/tsvc/vect-tsvc-s132.c
gcc.dg/vect/tsvc/vect-tsvc-s2233.c
gcc.dg/vect/tsvc/vect-tsvc-s421.c
gcc.dg/vect/vect-alias-check-14.c
gcc.target/s390/vector/partial/s390-vec-length-epil-run-1.c
gcc.target/s390/vector/partial/s390-vec-length-epil-run-3.c
gcc.target/s390/vector/partial/s390-vec-length-full-run-3.c

gcc/ChangeLog:

* tree-vect-stmts.cc (vectorizable_operation): Don't emulate floating
  point vectors

Signed-off-by: Juergen Christ 
---
 gcc/tree-vect-stmts.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 09749ae38174..f95ff2c2aa34 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6756,7 +6756,8 @@ vectorizable_operation (vec_info *vinfo,
 those through even when the mode isn't word_mode.  For
 ops we have to lower the lowering code assumes we are
 dealing with word_mode.  */
-  if code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
+ || (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
|| !target_support_p)
   && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
  /* Check only during analysis.  */
-- 
2.39.3



Re: [PATCH] c++: Fix ICE due to folding a call to constructor on cdtor_returns_this arches (aka arm32) [PR113083]

2024-02-23 Thread Christophe Lyon
On Fri, 23 Feb 2024 at 10:13, Christophe Lyon
 wrote:
>
> On Fri, 23 Feb 2024 at 09:42, Jakub Jelinek  wrote:
> >
> > Hi!
> >
> > When targetm.cxx.cdtor_returns_this () (aka on arm32 TARGET_AAPCS_BASED)
> > constructor is supposed to return this pointer, but when we cp_fold such
> > a call, we don't take that into account and just INIT_EXPR the object,
> > so we can later ICE during gimplification, because the expression doesn't
> > have the right type.
> >
> > Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux and
> > tested with a cross to armv7-linux-gnueabi on the testcase, but
> > unfortunately there are no 32-bit arm boxes in cfarm and arm32 is gone from
> > Fedora for quite some time as well, so I have no easy way to test this.
> > Christophe, do you think you could test this?  Thanks.
>
> Hi Jakub,
>
> Sadly our precommit CI could not apply your patch automatically (as
> you can see in patchwork).
>
> I'll test your patch manually.
>

I can now confirm that the new test passes on arm (native
armv8l-unknown-linux-gnueabihf), and no regression.

Thanks,

Christophe

> Thanks,
>
> Christophe
>
> >
> > 2024-02-23  Jakub Jelinek  
> >
> > PR c++/113083
> > * cp-gimplify.cc (cp_fold): For targetm.cxx.cdtor_returns_this ()
> > wrap r into a COMPOUND_EXPR and return folded CALL_EXPR_ARG (x, 0).
> >
> > * g++.dg/cpp0x/constexpr-113083.C: New test.
> >
> > --- gcc/cp/cp-gimplify.cc.jj2024-02-22 21:45:09.663430066 +0100
> > +++ gcc/cp/cp-gimplify.cc   2024-02-22 22:30:23.481428242 +0100
> > @@ -3412,9 +3412,15 @@ cp_fold (tree x, fold_flags_t flags)
> > if (DECL_CONSTRUCTOR_P (callee))
> >   {
> > loc = EXPR_LOCATION (x);
> > -   tree s = build_fold_indirect_ref_loc (loc,
> > - CALL_EXPR_ARG (x, 0));
> > +   tree a = CALL_EXPR_ARG (x, 0);
> > +   bool return_this = targetm.cxx.cdtor_returns_this ();
> > +   if (return_this)
> > + a = cp_save_expr (a);
> > +   tree s = build_fold_indirect_ref_loc (loc, a);
> > r = cp_build_init_expr (s, r);
> > +   if (return_this)
> > + r = build2_loc (loc, COMPOUND_EXPR, TREE_TYPE (x), r,
> > + fold_convert_loc (loc, TREE_TYPE (x), a));
> >   }
> > x = r;
> > break;
> > --- gcc/testsuite/g++.dg/cpp0x/constexpr-113083.C.jj2024-01-13 
> > 00:05:00.077372302 +0100
> > +++ gcc/testsuite/g++.dg/cpp0x/constexpr-113083.C   2024-02-22 
> > 22:20:20.622618992 +0100
> > @@ -0,0 +1,16 @@
> > +// PR c++/113083
> > +// { dg-do compile { target c++11 } }
> > +// { dg-options "-Os" }
> > +
> > +struct A { constexpr A (); };
> > +
> > +void
> > +foo ()
> > +{
> > +  A b;
> > +}
> > +
> > +constexpr
> > +A::A ()
> > +{
> > +}
> >
> > Jakub
> >


Re: [PATCH v2] Do not emulate vectors containing floats.

2024-02-23 Thread Sam James


Juergen Christ  writes:

> The emulation via word mode tries to perform integer arithmetic on floating
> point values instead of floating point arithmetic.  This leads to
> mis-compilations.

Is the bug ref + test missing?

>
> Failure occured on s390x on these existing test cases:
> gcc.dg/vect/tsvc/vect-tsvc-s112.c
> gcc.dg/vect/tsvc/vect-tsvc-s113.c
> gcc.dg/vect/tsvc/vect-tsvc-s119.c
> gcc.dg/vect/tsvc/vect-tsvc-s121.c
> gcc.dg/vect/tsvc/vect-tsvc-s131.c
> gcc.dg/vect/tsvc/vect-tsvc-s132.c
> gcc.dg/vect/tsvc/vect-tsvc-s2233.c
> gcc.dg/vect/tsvc/vect-tsvc-s421.c
> gcc.dg/vect/vect-alias-check-14.c
> gcc.target/s390/vector/partial/s390-vec-length-epil-run-1.c
> gcc.target/s390/vector/partial/s390-vec-length-epil-run-3.c
> gcc.target/s390/vector/partial/s390-vec-length-full-run-3.c
>
> gcc/ChangeLog:
>
>   * tree-vect-stmts.cc (vectorizable_operation): Don't emulate floating
>   point vectors
>
> Signed-off-by: Juergen Christ 
> ---
>  gcc/tree-vect-stmts.cc | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 09749ae38174..f95ff2c2aa34 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -6756,7 +6756,8 @@ vectorizable_operation (vec_info *vinfo,
>those through even when the mode isn't word_mode.  For
>ops we have to lower the lowering code assumes we are
>dealing with word_mode.  */
> -  if code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
> +   || (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
>   || !target_support_p)
>  && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
> /* Check only during analysis.  */



Re: [PATCH v2] Do not emulate vectors containing floats.

2024-02-23 Thread Juergen Christ
Am Fri, Feb 23, 2024 at 01:57:12PM + schrieb Sam James:
> 
> Juergen Christ  writes:
> 
> > The emulation via word mode tries to perform integer arithmetic on floating
> > point values instead of floating point arithmetic.  This leads to
> > mis-compilations.
> 
> Is the bug ref + test missing?

Sorry, forgot to add the "bootstrapped and tested on s390x and x86_64".

Not sure how to reference a bugzilla here.  There is 114075 that
should be solved with this, too.

> >
> > Failure occured on s390x on these existing test cases:
> > gcc.dg/vect/tsvc/vect-tsvc-s112.c
> > gcc.dg/vect/tsvc/vect-tsvc-s113.c
> > gcc.dg/vect/tsvc/vect-tsvc-s119.c
> > gcc.dg/vect/tsvc/vect-tsvc-s121.c
> > gcc.dg/vect/tsvc/vect-tsvc-s131.c
> > gcc.dg/vect/tsvc/vect-tsvc-s132.c
> > gcc.dg/vect/tsvc/vect-tsvc-s2233.c
> > gcc.dg/vect/tsvc/vect-tsvc-s421.c
> > gcc.dg/vect/vect-alias-check-14.c
> > gcc.target/s390/vector/partial/s390-vec-length-epil-run-1.c
> > gcc.target/s390/vector/partial/s390-vec-length-epil-run-3.c
> > gcc.target/s390/vector/partial/s390-vec-length-full-run-3.c
> >
> > gcc/ChangeLog:
> >
> > * tree-vect-stmts.cc (vectorizable_operation): Don't emulate floating
> >   point vectors
> >
> > Signed-off-by: Juergen Christ 
> > ---
> >  gcc/tree-vect-stmts.cc | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > index 09749ae38174..f95ff2c2aa34 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -6756,7 +6756,8 @@ vectorizable_operation (vec_info *vinfo,
> >  those through even when the mode isn't word_mode.  For
> >  ops we have to lower the lowering code assumes we are
> >  dealing with word_mode.  */
> > -  if code == PLUS_EXPR || code == MINUS_EXPR || code == 
> > NEGATE_EXPR)
> > +  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
> > + || (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
> > || !target_support_p)
> >&& maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
> >   /* Check only during analysis.  */
> 


[PATCH v1 01/13] Introduce aarch64-w64-mingw32 target

2024-02-23 Thread Evgeny Karpov
Hi Andrew and Richard,

Thank you for pointing out there's no need for a 64-bit ISA and the big-endian 
target.
These changes will be addressed in v2.

Regards,
Evgeny

-Original Message-
Thursday, February 22, 2024 12:33 PM 
Richard Earnshaw (lists)  wrote:
>
+aarch64*-*-mingw*)

Other targets are a bit inconsistent here as well, but, as Andrew mentioned, if 
you don't want to handle big-endian, it might be better to match 
aarch64-*-mingw* here.


-Original Message-
Wednesday, February 21, 2024 7:23 PM 
Andrew Pinski wrote:

> need_64bit_isa=yes

This is not needed in the patch as it is only used for x86_64 targets.

Should you make sure nobody specifies the big-endian target:
aarch64_be-w64-mingw32  ?



Re: [PATCH] vect: Fix integer overflow calculating mask

2024-02-23 Thread Richard Biener



> Am 23.02.2024 um 14:03 schrieb Jakub Jelinek :
> 
> On Fri, Feb 23, 2024 at 12:58:53PM +, Andrew Stubbs wrote:
>> This is a follow-up to the previous patch to ensure that integer vector
>> bit-masks do not have excess bits set. It fixes a bug, observed on
>> amdgcn, in which the mask could be incorrectly set to zero, resulting in
>> wrong-code.
>> 
>> The mask was broken when nunits==32. The patched version will probably
>> be broken for nunits==64, but I don't think any current targets have
>> masks with more than 64 bits.
>> 
>> OK for mainline?
>> 
>> Andrew
>> 
>> gcc/ChangeLog:
>> 
>>* expr.cc (store_constructor): Use 64-bit shifts.
> 
> No, this isn't 64-bit shift on all hosts.
> Use HOST_WIDE_INT_1U instead.

I think there are now two other similar places recently added that need 
adjustment as well.

Richard 

>> --- a/gcc/expr.cc
>> +++ b/gcc/expr.cc
>> @@ -7879,7 +7879,7 @@ store_constructor (tree exp, rtx target, int cleared, 
>> poly_int64 size,
>>auto nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
>>if (maybe_ne (GET_MODE_PRECISION (mode), nunits))
>>  tmp = expand_binop (mode, and_optab, tmp,
>> -  GEN_INT ((1 << nunits) - 1), target,
>> +  GEN_INT ((1UL << nunits) - 1), target,
>>  true, OPTAB_WIDEN);
>>if (tmp != target)
>>  emit_move_insn (target, tmp);
>> --
>> 2.41.0
> 
>Jakub
> 


[pushed] aarch64: Spread out FPR usage between RA regions [PR113613]

2024-02-23 Thread Richard Sandiford
early-ra already had code to do regrename-style "broadening"
of the allocation, to promote scheduling freedom.  However,
the pass divides the function into allocation regions
and this broadening only worked within a single region.
This meant that if a basic block contained one subblock
of FPR use, followed by a point at which no FPRs were live,
followed by another subblock of FPR use, the two subblocks
would tend to reuse the same registers.  This in turn meant
that it wasn't possible to form LDP/STP pairs between them.

The failure to form LDPs and STPs in the testcase was a
regression from GCC 13.

The patch adds a simple heuristic to prefer less recently
used registers in the event of a tie.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
PR target/113613
* config/aarch64/aarch64-early-ra.cc
(early_ra::m_current_region): New member variable.
(early_ra::m_fpr_recency): Likewise.
(early_ra::start_new_region): Bump m_current_region.
(early_ra::allocate_colors): Prefer less recently used registers
in the event of a tie.  Add a comment to explain why we prefer(ed)
higher-numbered registers.
(early_ra::find_oldest_color): Prefer less recently used registers
here too.
(early_ra::finalize_allocation): Update recency information for
allocated registers.
(early_ra::process_blocks): Initialize m_current_region and
m_fpr_recency.

gcc/testsuite/
PR target/113613
* gcc.target/aarch64/pr113613.c: New test.
---
 gcc/config/aarch64/aarch64-early-ra.cc  | 55 +
 gcc/testsuite/gcc.target/aarch64/pr113613.c | 13 +
 2 files changed, 59 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113613.c

diff --git a/gcc/config/aarch64/aarch64-early-ra.cc 
b/gcc/config/aarch64/aarch64-early-ra.cc
index 9ac9ec1bb0d..8530b0ae41e 100644
--- a/gcc/config/aarch64/aarch64-early-ra.cc
+++ b/gcc/config/aarch64/aarch64-early-ra.cc
@@ -532,6 +532,12 @@ private:
   // The set of FPRs that are currently live.
   unsigned int m_live_fprs;
 
+  // A unique one-based identifier for the current region.
+  unsigned int m_current_region;
+
+  // The region in which each FPR was last used, or 0 if none.
+  unsigned int m_fpr_recency[32];
+
   // --
 
   // A mask of the FPRs that have already been allocated.
@@ -1305,6 +1311,7 @@ early_ra::start_new_region ()
   m_allocated_fprs = 0;
   m_call_preserved_fprs = 0;
   m_allocation_successful = true;
+  m_current_region += 1;
 }
 
 // Create and return an allocno group of size SIZE for register REGNO.
@@ -2819,19 +2826,30 @@ early_ra::allocate_colors ()
candidates &= ~(m_allocated_fprs >> i);
   unsigned int best = INVALID_REGNUM;
   int best_weight = 0;
+  unsigned int best_recency = 0;
   for (unsigned int fpr = 0; fpr <= 32U - color->group->size; ++fpr)
{
  if ((candidates & (1U << fpr)) == 0)
continue;
  int weight = color->fpr_preferences[fpr];
+ unsigned int recency = 0;
  // Account for registers that the current function must preserve.
  for (unsigned int i = 0; i < color->group->size; ++i)
-   if (m_call_preserved_fprs & (1U << (fpr + i)))
- weight -= 1;
- if (best == INVALID_REGNUM || best_weight <= weight)
+   {
+ if (m_call_preserved_fprs & (1U << (fpr + i)))
+   weight -= 1;
+ recency = MAX (recency, m_fpr_recency[fpr + i]);
+   }
+ // Prefer higher-numbered registers in the event of a tie.
+ // This should tend to keep lower-numbered registers free
+ // for allocnos that require V0-V7 or V0-V15.
+ if (best == INVALID_REGNUM
+ || best_weight < weight
+ || (best_weight == weight && recency <= best_recency))
{
  best = fpr;
  best_weight = weight;
+ best_recency = recency;
}
}
 
@@ -2888,19 +2906,27 @@ early_ra::find_oldest_color (unsigned int first_color,
 {
   color_info *best = nullptr;
   unsigned int best_start_point = ~0U;
+  unsigned int best_recency = 0;
   for (unsigned int ci = first_color; ci < m_colors.length (); ++ci)
 {
   auto *color = m_colors[ci];
-  if (fpr_conflicts & (1U << (color->hard_regno - V0_REGNUM)))
+  unsigned int fpr = color->hard_regno - V0_REGNUM;
+  if (fpr_conflicts & (1U << fpr))
continue;
-  if (!color->group)
-   return color;
-  auto chain_head = color->group->chain_heads ()[0];
-  auto start_point = m_allocnos[chain_head]->start_point;
-  if (!best || best_start_point > start_point)
+  unsigned int start_point = 0;
+  if (color->group)
+   {
+ auto chain_head = color->group->chain_heads ()[0];
+ start_point = m_allocnos[chain_head]->start

[pushed] aarch64: Tighten early-ra chain test for wide registers [PR113295]

2024-02-23 Thread Richard Sandiford
Most code in early-ra used is_chain_candidate to check whether we
should chain two allocnos.  This included both tests that matter
for correctness and tests for certain heuristics.

Once that test passes for one pair of allocnos, we test whether
it's safe to chain the containing groups (which might contain
multiple allocnos for x2, x3 and x4 modes).  This test used an
inline test for correctness only, deliberately skipping the
heuristics.  However, this instance of the test was missing
some handling of equivalent allocnos.

This patch fixes things by making is_chain_candidate take a
strictness parameter: correctness only, or correctness + heuristics.
It then makes the group-chaining test use the correctness version
rather than trying to replicate it inline.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
PR target/113295
* config/aarch64/aarch64-early-ra.cc
(early_ra::test_strictness): New enum.
(early_ra::is_chain_candidate): Add a strictness parameter to
control whether only correctness matters, or whether both correctness
and heuristics should be used.  Handle multiple levels of equivalence.
(early_ra::find_related_start): Update call accordingly.
(early_ra::strided_polarity_pref): Likewise.
(early_ra::form_chains): Likewise.
(early_ra::try_to_chain_allocnos): Use is_chain_candidate in
correctness mode rather than trying to inline the test.

gcc/testsuite/
PR target/113295
* gcc.target/aarch64/pr113295-2.c: New test.
---
 gcc/config/aarch64/aarch64-early-ra.cc| 48 
 gcc/testsuite/gcc.target/aarch64/pr113295-2.c | 57 +++
 2 files changed, 82 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113295-2.c

diff --git a/gcc/config/aarch64/aarch64-early-ra.cc 
b/gcc/config/aarch64/aarch64-early-ra.cc
index 58ae5a49913..9ac9ec1bb0d 100644
--- a/gcc/config/aarch64/aarch64-early-ra.cc
+++ b/gcc/config/aarch64/aarch64-early-ra.cc
@@ -95,6 +95,10 @@ public:
   void execute ();
 
 private:
+  // Whether to test only things that are required for correctness,
+  // or whether to take optimization heuristics into account as well.
+  enum test_strictness { CORRECTNESS_ONLY, ALL_REASONS };
+
   static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits");
   using operand_mask = uint32_t;
 
@@ -452,7 +456,7 @@ private:
 
   template
   static int cmp_increasing (const void *, const void *);
-  bool is_chain_candidate (allocno_info *, allocno_info *);
+  bool is_chain_candidate (allocno_info *, allocno_info *, test_strictness);
   int rate_chain (allocno_info *, allocno_info *);
   static int cmp_chain_candidates (const void *, const void *);
   void chain_allocnos (unsigned int &, unsigned int &);
@@ -1588,7 +1592,7 @@ early_ra::find_related_start (allocno_info *dest_allocno,
return res;
 
   auto *next_allocno = m_allocnos[dest_allocno->copy_dest];
-  if (!is_chain_candidate (dest_allocno, next_allocno))
+  if (!is_chain_candidate (dest_allocno, next_allocno, ALL_REASONS))
return res;
 
   dest_allocno = next_allocno;
@@ -2011,7 +2015,7 @@ early_ra::strided_polarity_pref (allocno_info *allocno1,
   if (allocno1->offset + 1 < allocno1->group_size
   && allocno2->offset + 1 < allocno2->group_size)
 {
-  if (is_chain_candidate (allocno1 + 1, allocno2 + 1))
+  if (is_chain_candidate (allocno1 + 1, allocno2 + 1, ALL_REASONS))
return 1;
   else
return -1;
@@ -2019,7 +2023,7 @@ early_ra::strided_polarity_pref (allocno_info *allocno1,
 
   if (allocno1->offset > 0 && allocno2->offset > 0)
 {
-  if (is_chain_candidate (allocno1 - 1, allocno2 - 1))
+  if (is_chain_candidate (allocno1 - 1, allocno2 - 1, ALL_REASONS))
return 1;
   else
return -1;
@@ -2215,38 +2219,37 @@ early_ra::cmp_increasing (const void *allocno1_ptr, 
const void *allocno2_ptr)
 }
 
 // Return true if we should consider chaining ALLOCNO1 onto the head
-// of ALLOCNO2.  This is just a local test of the two allocnos; it doesn't
-// guarantee that chaining them would give a self-consistent system.
+// of ALLOCNO2.  STRICTNESS says whether we should take copy-elision
+// heuristics into account, or whether we should just consider things
+// that matter for correctness.
+//
+// This is just a local test of the two allocnos; it doesn't guarantee
+// that chaining them would give a self-consistent system.
 bool
-early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2)
+early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2,
+ test_strictness strictness)
 {
   if (allocno2->is_shared ())
 return false;
 
-  if (allocno1->is_equiv)
+  while (allocno1->is_equiv)
 allocno1 = m_allocnos[allocno1->related_allocno];
 
   if (allocno2->start_point >= allocno1->end_point
   && !allocno2->is_equiv_to (allocno

[PATCH v1 02/13] aarch64: The aarch64-w64-mingw32 target implements

2024-02-23 Thread Evgeny Karpov
The calling ABI enum definition has been done following a similar convention in 
https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=gcc/config/i386/i386-opts.h;h=ef2825803b32001b9632769bdff196d1e43d27ba;hb=refs/heads/master#l41

MS_ABI is used in gcc/config/i386/mingw32.h and gcc/config/i386/winnt-d.cc
https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=gcc/config/i386/mingw32.h;h=58304fc55f629648e47490fd3c0f3db3858e4fd8;hb=refs/heads/master#l22

These files are moved to the mingw folder in the patch series.
https://gcc.gnu.org/pipermail/gcc-patches/attachments/20240221/5e75c464/attachment.txt

What do you think about this change for v2?

+/* Available call ABIs.  */
+enum aarch64_calling_abi
+{
+  AARCH64_CALLING_ABI_EABI,
+  AARCH64_CALLING_ABI_MS,
+  MS_ABI = AARCH64_CALLING_ABI_MS
+};
+

Regards,
Evgeny


Thursday, February 22, 2024 12:40 PM
Richard Earnshaw (lists) wrote:

>
+/* Available call ABIs.  */
+enum calling_abi
+{
+  AARCH64_EABI = 0,
+  MS_ABI = 1
+};
+

The convention in this file seems to be that all enum types to start with 
aarch64.  Also, the enumeration values should start with the name of the 
enumeration type in upper case, so:

enum aarch64_calling_abi
{
  AARCH64_CALLING_ABI_EABI,
  AARCH64_CALLING_ABI_MS
};

or something very much like that.

R.


Re: [PATCH] vect: Fix integer overflow calculating mask

2024-02-23 Thread Andrew Stubbs

On 23/02/2024 13:02, Jakub Jelinek wrote:

On Fri, Feb 23, 2024 at 12:58:53PM +, Andrew Stubbs wrote:

This is a follow-up to the previous patch to ensure that integer vector
bit-masks do not have excess bits set. It fixes a bug, observed on
amdgcn, in which the mask could be incorrectly set to zero, resulting in
wrong-code.

The mask was broken when nunits==32. The patched version will probably
be broken for nunits==64, but I don't think any current targets have
masks with more than 64 bits.

OK for mainline?

Andrew

gcc/ChangeLog:

* expr.cc (store_constructor): Use 64-bit shifts.


No, this isn't 64-bit shift on all hosts.
Use HOST_WIDE_INT_1U instead.


OK, I did wonder if there was a proper way to do it. :)

How about this?

Andrew
vect: Fix integer overflow calculating mask

The mask was broken when nunits==32 on hosts where int is 32-bit.

gcc/ChangeLog:

* expr.cc (store_constructor): Use 64-bit shifts.

diff --git a/gcc/expr.cc b/gcc/expr.cc
index e23880e..6bd16ac7f49 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7879,8 +7879,8 @@ store_constructor (tree exp, rtx target, int cleared, 
poly_int64 size,
auto nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
if (maybe_ne (GET_MODE_PRECISION (mode), nunits))
  tmp = expand_binop (mode, and_optab, tmp,
- GEN_INT ((1 << nunits) - 1), target,
- true, OPTAB_WIDEN);
+ GEN_INT ((HOST_WIDE_INT_1U << nunits) - 1),
+ target, true, OPTAB_WIDEN);
if (tmp != target)
  emit_move_insn (target, tmp);
break;


Re: [PATCH] vect: Fix integer overflow calculating mask

2024-02-23 Thread Jakub Jelinek
On Fri, Feb 23, 2024 at 02:22:19PM +, Andrew Stubbs wrote:
> On 23/02/2024 13:02, Jakub Jelinek wrote:
> > On Fri, Feb 23, 2024 at 12:58:53PM +, Andrew Stubbs wrote:
> > > This is a follow-up to the previous patch to ensure that integer vector
> > > bit-masks do not have excess bits set. It fixes a bug, observed on
> > > amdgcn, in which the mask could be incorrectly set to zero, resulting in
> > > wrong-code.
> > > 
> > > The mask was broken when nunits==32. The patched version will probably
> > > be broken for nunits==64, but I don't think any current targets have
> > > masks with more than 64 bits.
> > > 
> > > OK for mainline?
> > > 
> > > Andrew
> > > 
> > > gcc/ChangeLog:
> > > 
> > >   * expr.cc (store_constructor): Use 64-bit shifts.
> > 
> > No, this isn't 64-bit shift on all hosts.
> > Use HOST_WIDE_INT_1U instead.
> 
> OK, I did wonder if there was a proper way to do it. :)
> 
> How about this?

If you change the other two GEN_INT ((1 << nunits) - 1) occurrences in
expr.cc the same way, then LGTM.

Jakub



Re: [PATCH] libgccjit: Make new_array_type take unsigned long

2024-02-23 Thread Antoni Boucher
I had forgotten to add the doc since there is now a new API.
Here it is.

On Wed, 2024-02-21 at 19:45 -0500, Antoni Boucher wrote:
> Thanks for the review.
> 
> Here's the updated patch.
> 
> On Thu, 2023-12-07 at 20:04 -0500, David Malcolm wrote:
> > On Thu, 2023-12-07 at 17:29 -0500, Antoni Boucher wrote:
> > > Hi.
> > > This patches update gcc_jit_context_new_array_type to take the
> > > size
> > > as
> > > an unsigned long instead of a int, to allow creating bigger array
> > > types.
> > > 
> > > I haven't written the ChangeLog yet because I wasn't sure it's
> > > allowed
> > > to change the type of a function like that.
> > > If it isn't, what would you suggest?
> > 
> > We've kept ABI compatibility all the way back to the version in GCC
> > 5,
> > so it seems a shame to break ABI.
> > 
> > How about a new API entrypoint:
> >   gcc_jit_context_new_array_type_unsigned_long
> > whilst keeping the old one.
> > 
> > Then everything internally can use "unsigned long"; we just keep
> > the
> > old entrypoint accepting int (which internally promotes the arg to
> > unsigned long, if positive, sharing all the implementation).
> > 
> > Alternatively, I think there may be a way to do this with symbol
> > versioning:
> >   https://gcc.gnu.org/wiki/SymbolVersioning
> > see e.g. Section 3.7 of Ulrich Drepper's "How To Write Shared
> > Libraries", but I'm a bit wary of cross-platform compatibility with
> > that.
> > 
> > Dave
> > 
> > 
> 

From 00156914f0805788190706935962fe65ab5fd7cb Mon Sep 17 00:00:00 2001
From: Antoni Boucher 
Date: Sat, 4 Mar 2023 00:44:49 -0500
Subject: [PATCH] libgccjit: Add gcc_jit_context_new_array_type_unsigned_long

gcc/jit/ChangeLog:

	* docs/topics/compatibility.rst (LIBGCCJIT_ABI_28): New ABI tag.
	* docs/topics/types.rst: Document
	gcc_jit_context_new_array_type_unsigned_long.
	* jit-playback.cc (new_array_type): Change num_elements type to
	unsigned long.
	* jit-playback.h (new_array_type): Change num_elements type to
	unsigned long.
	* jit-recording.cc (recording::context::new_array_type): Change
	num_elements type to unsigned long.
	(recording::array_type::make_debug_string): Use unsigned long
	format.
	(recording::array_type::write_reproducer): Switch to
	gcc_jit_context_new_array_type_unsigned_long.
	* jit-recording.h (class array_type): Change num_elements type
	to unsigned long.
	(new_array_type): Change num_elements type to unsigned long.
	(num_elements): Change return type to unsigned long.
	* libgccjit.cc (gcc_jit_context_new_array_type_unsigned_long):
	New function.
	* libgccjit.h (gcc_jit_context_new_array_type_unsigned_long):
	New function.
	* libgccjit.map: New function.

gcc/testsuite/ChangeLog:

	* jit.dg/all-non-failing-tests.h: Add test-arrays-unsigned-long.c.
	* jit.dg/test-arrays-unsigned-long.c: New test.
---
 gcc/jit/docs/topics/compatibility.rst |   7 +
 gcc/jit/docs/topics/types.rst |  18 ++
 gcc/jit/jit-playback.cc   |   2 +-
 gcc/jit/jit-playback.h|   2 +-
 gcc/jit/jit-recording.cc  |  12 +-
 gcc/jit/jit-recording.h   |   8 +-
 gcc/jit/libgccjit.cc  |  12 +-
 gcc/jit/libgccjit.h   |  14 ++
 gcc/jit/libgccjit.map |   5 +
 gcc/testsuite/jit.dg/all-non-failing-tests.h  |  10 ++
 .../jit.dg/test-arrays-unsigned-long.c| 165 ++
 11 files changed, 242 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/jit.dg/test-arrays-unsigned-long.c

diff --git a/gcc/jit/docs/topics/compatibility.rst b/gcc/jit/docs/topics/compatibility.rst
index 9cfb054f653..a8c8cf45630 100644
--- a/gcc/jit/docs/topics/compatibility.rst
+++ b/gcc/jit/docs/topics/compatibility.rst
@@ -397,3 +397,10 @@ on functions and variables:
 
 ``LIBGCCJIT_ABI_27`` covers the addition of
 :func:`gcc_jit_context_new_sizeof`
+
+.. _LIBGCCJIT_ABI_28:
+
+``LIBGCCJIT_ABI_28``
+
+``LIBGCCJIT_ABI_28`` covers the addition of
+:func:`gcc_jit_context_new_array_type_unsigned_long`
diff --git a/gcc/jit/docs/topics/types.rst b/gcc/jit/docs/topics/types.rst
index bb51f037b7e..d83ecd3cf13 100644
--- a/gcc/jit/docs/topics/types.rst
+++ b/gcc/jit/docs/topics/types.rst
@@ -160,6 +160,24 @@ Pointers, `const`, and `volatile`
 
Given non-`void` type "T", get type "T[N]" (for a constant N).
 
+.. function::  gcc_jit_type *\
+   gcc_jit_context_new_array_type_unsigned_long (gcc_jit_context *ctxt, \
+ gcc_jit_location *loc, \
+ gcc_jit_type *element_type, \
+ unsigned long num_elements)
+
+   Given non-`void` type "T", get type "T[N]" (for a constant N).
+
+   This is the same as gcc_jit_context_new_array_type, but the type of
+   ``num_elements` different and thus allows creating bigger 

Re: [PATCH] vect: Fix integer overflow calculating mask

2024-02-23 Thread Richard Biener
On Fri, 23 Feb 2024, Jakub Jelinek wrote:

> On Fri, Feb 23, 2024 at 02:22:19PM +, Andrew Stubbs wrote:
> > On 23/02/2024 13:02, Jakub Jelinek wrote:
> > > On Fri, Feb 23, 2024 at 12:58:53PM +, Andrew Stubbs wrote:
> > > > This is a follow-up to the previous patch to ensure that integer vector
> > > > bit-masks do not have excess bits set. It fixes a bug, observed on
> > > > amdgcn, in which the mask could be incorrectly set to zero, resulting in
> > > > wrong-code.
> > > > 
> > > > The mask was broken when nunits==32. The patched version will probably
> > > > be broken for nunits==64, but I don't think any current targets have
> > > > masks with more than 64 bits.
> > > > 
> > > > OK for mainline?
> > > > 
> > > > Andrew
> > > > 
> > > > gcc/ChangeLog:
> > > > 
> > > > * expr.cc (store_constructor): Use 64-bit shifts.
> > > 
> > > No, this isn't 64-bit shift on all hosts.
> > > Use HOST_WIDE_INT_1U instead.
> > 
> > OK, I did wonder if there was a proper way to do it. :)
> > 
> > How about this?
> 
> If you change the other two GEN_INT ((1 << nunits) - 1) occurrences in
> expr.cc the same way, then LGTM.

There's also two in dojump.cc

Richard.


Re: [PATCH] ARM: Fix conditional execution [PR113915]

2024-02-23 Thread Wilco Dijkstra
Hi Richard,

> This bit isn't.  The correct fix here is to fix the pattern(s) concerned to 
> add the missing predicate.
>
> Note that builtin-bswap.x explicitly mentions predicated mnemonics in the 
> comments.

I fixed the patterns in v2. There are likely some more, plus we could likely 
merge many t1 and t2
patterns where the only difference is predication. But those cleanups are for 
another time...

Cheers,
Wilco

v2: Add predicable to the rev patterns.

By default most patterns can be conditionalized on Arm targets.  However
Thumb-2 predication requires the "predicable" attribute be explicitly
set to "yes".  Most patterns are shared between Arm and Thumb(-2) and are
marked with "predicable".  Given this sharing, it does not make sense to
use a different default for Arm.  So only consider conditional execution
of instructions that have the predicable attribute set to yes.  This ensures
that patterns not explicitly marked as such are never conditionally executed.

Passes regress and bootstrap, OK for commit?

gcc/ChangeLog:
PR target/113915
* config/arm/arm.md (NOCOND): Improve comment.
(arm_rev*) Add predicable.
* config/arm/arm.cc (arm_final_prescan_insn): Add check for
PREDICABLE_YES.

gcc/testsuite/ChangeLog:
PR target/113915
* gcc.target/arm/builtin-bswap-1.c: Fix test.

---

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 
1cd69268ee986a0953cc85ab259355d2191250ac..6a35fe44138135998877a9fb74c2a82a7f99dcd5
 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -25613,11 +25613,12 @@ arm_final_prescan_insn (rtx_insn *insn)
  break;
 
case INSN:
- /* Instructions using or affecting the condition codes make it
-fail.  */
+ /* Check the instruction is explicitly marked as predicable.
+Instructions using or affecting the condition codes are not.  
*/
  scanbody = PATTERN (this_insn);
  if (!(GET_CODE (scanbody) == SET
|| GET_CODE (scanbody) == PARALLEL)
+ || get_attr_predicable (this_insn) != PREDICABLE_YES
  || get_attr_conds (this_insn) != CONDS_NOCOND)
fail = TRUE;
  break;
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 
5816409f86f1106b410c5e21d77e599b485f85f2..81237a61d4a2ebcfb77e47c2bd29137aba28a521
 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -307,6 +307,8 @@
 ;
 ; NOCOND means that the instruction does not use or alter the condition
 ;   codes but can be converted into a conditionally exectuted instruction.
+;   Given that NOCOND is the default for most instructions if omitted,
+;   the attribute predicable must be set to yes as well.
 
 (define_attr "conds" "use,set,clob,unconditional,nocond"
(if_then_else
@@ -12547,6 +12549,7 @@
   revsh%?\t%0, %1"
   [(set_attr "arch" "t1,t2,32")
(set_attr "length" "2,2,4")
+   (set_attr "predicable" "no,yes,yes")
(set_attr "type" "rev")]
 )
 
@@ -12560,6 +12563,7 @@
rev16%?\t%0, %1"
   [(set_attr "arch" "t1,t2,32")
(set_attr "length" "2,2,4")
+   (set_attr "predicable" "no,yes,yes")
(set_attr "type" "rev")]
 )
 
@@ -12584,6 +12588,7 @@
rev16%?\t%0, %1"
   [(set_attr "arch" "t1,t2,32")
(set_attr "length" "2,2,4")
+   (set_attr "predicable" "no,yes,yes")
(set_attr "type" "rev")]
 )
 
@@ -12619,6 +12624,7 @@
rev16%?\t%0, %1"
   [(set_attr "arch" "t1,t2,32")
(set_attr "length" "2,2,4")
+   (set_attr "predicable" "no,yes,yes")
(set_attr "type" "rev")]
 )
 
diff --git a/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c 
b/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c
index 
c1e7740d14d3ca4e93a71e38b12f82c19791a204..1a311a6a5af647d40abd553e5d0ba1273c76d288
 100644
--- a/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c
+++ b/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c
@@ -5,14 +5,11 @@
of the instructions.  Add an -mtune option known to facilitate that.  */
 /* { dg-additional-options "-O2 -mtune=cortex-a53" } */
 /* { dg-final { scan-assembler-not "orr\[ \t\]" } } */
-/* { dg-final { scan-assembler-times "revsh\\t" 1 { target { arm_nothumb } } } 
}  */
-/* { dg-final { scan-assembler-times "revshne\\t" 1 { target { arm_nothumb } } 
} }  */
-/* { dg-final { scan-assembler-times "revsh\\t" 2 { target { ! arm_nothumb } } 
} }  */
-/* { dg-final { scan-assembler-times "rev16\\t" 1 { target { arm_nothumb } } } 
}  */
-/* { dg-final { scan-assembler-times "rev16ne\\t" 1 { target { arm_nothumb } } 
} }  */
-/* { dg-final { scan-assembler-times "rev16\\t" 2 { target { ! arm_nothumb } } 
} }  */
-/* { dg-final { scan-assembler-times "rev\\t" 2 { target { arm_nothumb } } } } 
 */
-/* { dg-final { scan-assembler-times "revne\\t" 2 { target { arm_nothumb } } } 
}  */
-/* { dg-final { scan-assembler-times "rev\\t" 4 { target { ! arm_nothumb } } } 
}  */
+/* { dg-final { scan-assembler-times "revsh\\t" 1 } }  */
+/* { dg-

Re: [PATCH v1 03/13] aarch64: Mark x18 register as a fixed register for MS ABI

2024-02-23 Thread Jacek Caban

On 22.02.2024 18:45, Andrew Pinski wrote:

On Thu, Feb 22, 2024 at 3:56 AM Richard Earnshaw (lists)
 wrote:

On 21/02/2024 18:30, Evgeny Karpov wrote:
+/* X18 reserved for the TEB on Windows.  */
+#ifdef TARGET_ARM64_MS_ABI
+# define FIXED_X18 1
+# define CALL_USED_X18 0
+#else
+# define FIXED_X18 0
+# define CALL_USED_X18 1
+#endif

I'm not overly keen on ifdefs like this (and the one below), it can get quite 
confusing if we have to support more than a couple of ABIs.  Perhaps we could 
create a couple of new headers, one for the EABI (which all existing targets 
would then need to include) and one for the MS ABI.  Then the mingw port would 
use that instead of the EABI header.

An alternative is to make all this dynamic, based on the setting of the 
aarch64_calling_abi enum and to make the adjustments in 
aarch64_conditional_register_usage.

Dynamically might be needed also if we want to support ms_abi
attribute and/or -mabi=ms to support the wine folks.



Wine no longer needs ms_abi, it was needed for PE-in-ELF modules in the 
past. We use use proper PE files now, so we need a cross compiler, but 
no special attributes. aarch64-w64-mingw32 is already well supported by 
Wine when using llvm-mingw, so as soon as GCC properly supports the ABI, 
Wine should just work with it, in theory. I didn't try it, but I don't 
see things like vararg support in this patchset nor in the repo, so I 
assume it won't work yet.



Thanks for the work!

Jacek



[PATCH] aarch64: Fix costing of manual bfi instructions

2024-02-23 Thread Andrew Pinski
This fixes the cost model for BFI instructions which don't
use directly zero_extract on the LHS.
aarch64_bfi_rtx_p does the heavy lifting by matching of
the patterns.

Note this alone does not fix PR 107270, it is a step in the right
direction. There we get z zero_extend for the non-shifted part
which we don't currently match.

Built and tested on aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_bfi_rtx_p): New function.
(aarch64_rtx_costs): For IOR, try calling aarch64_bfi_rtx_p.

Signed-off-by: Andrew Pinski 
---
 gcc/config/aarch64/aarch64.cc | 94 +++
 1 file changed, 94 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 3d8341c17fe..dc5c5c23cb3 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -13776,6 +13776,90 @@ aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
   return false;
 }
 
+/* Return true iff X is an rtx that will match an bfi instruction
+   i.e. as described in the *aarch64_bfi5 family of patterns.
+   OP0 and OP1 will be set to the operands of the insert involved
+   on success and will be NULL_RTX otherwise.  */
+
+static bool
+aarch64_bfi_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
+{
+  rtx op0, op1;
+  scalar_int_mode mode;
+
+  *res_op0 = NULL_RTX;
+  *res_op1 = NULL_RTX;
+  if (!is_a  (GET_MODE (x), &mode))
+return false;
+
+  if (GET_CODE (x) != IOR)
+return false;
+
+  unsigned HOST_WIDE_INT mask1;
+  unsigned HOST_WIDE_INT shft_amnt;
+  unsigned HOST_WIDE_INT mask2;
+  rtx shiftop;
+
+  rtx iop0 = XEXP (x, 0);
+  rtx iop1 = XEXP (x, 1);
+
+  if (GET_CODE (iop0) == AND
+  && CONST_INT_P (XEXP (iop0, 1))
+  && GET_CODE (XEXP (iop0, 0)) != ASHIFT)
+{
+  op0 = XEXP (iop0, 0);
+  mask1 = UINTVAL (XEXP (iop0, 1));
+  shiftop = iop1;
+}
+  else if (GET_CODE (iop1) == AND
+  && CONST_INT_P (XEXP (iop1, 1))
+  && GET_CODE (XEXP (iop1, 0)) != ASHIFT)
+{
+  op0 = XEXP (iop1, 0);
+  mask1 = UINTVAL (XEXP (iop1, 1));
+  shiftop = iop0;
+}
+  else
+return false;
+
+  /* Shifted with no mask. */
+  if (GET_CODE (shiftop) == ASHIFT
+  && CONST_INT_P (XEXP (shiftop, 1)))
+{
+  shft_amnt = UINTVAL (XEXP (shiftop, 1));
+  mask2 = HOST_WIDE_INT_M1U << shft_amnt;
+  op1 = XEXP (shiftop, 0);
+}
+   else if (GET_CODE (shiftop) == AND
+   && CONST_INT_P (XEXP (shiftop, 1)))
+{
+  mask2 = UINTVAL (XEXP (shiftop, 1));
+  if (GET_CODE (XEXP (shiftop, 0)) == ASHIFT
+ && CONST_INT_P (XEXP (XEXP (shiftop, 0), 1)))
+   {
+ op1 = XEXP (XEXP (shiftop, 0), 0);
+ shft_amnt = UINTVAL (XEXP (XEXP (shiftop, 0), 1));
+   }
+  else
+   {
+ op1 = XEXP (shiftop, 0);
+ shft_amnt = 0;
+   }
+}
+  else
+return false;
+
+  if (shft_amnt >= GET_MODE_BITSIZE (mode))
+return false;
+
+  if (!aarch64_masks_and_shift_for_bfi_p (mode, mask1, shft_amnt, mask2))
+return false;
+
+  *res_op0 = op0;
+  *res_op1 = op1;
+  return true;
+}
+
 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
storing it in *COST.  Result is true if the total cost of the operation
has now been calculated.  */
@@ -14662,6 +14746,16 @@ cost_plus:
  return true;
 }
 
+  if (aarch64_bfi_rtx_p (x, &op0, &op1))
+   {
+ *cost += rtx_cost (op0, mode, IOR, 0, speed);
+ *cost += rtx_cost (op0, mode, IOR, 1, speed);
+ if (speed)
+   *cost += extra_cost->alu.bfi;
+
+ return true;
+   }
+
   if (aarch64_extr_rtx_p (x, &op0, &op1))
 {
  *cost += rtx_cost (op0, mode, IOR, 0, speed);
-- 
2.43.0



[PATCH] libatomic: Fix build for --disable-gnu-indirect-function [PR113986]

2024-02-23 Thread Wilco Dijkstra

Fix libatomic build to support --disable-gnu-indirect-function on AArch64.
Always build atomic_16.S and add aliases to the __atomic_* functions if
!HAVE_IFUNC.

Passes regress and bootstrap, OK for commit?

libatomic:
PR target/113986
* Makefile.in: Regenerated.
* Makefile.am: Make atomic_16.S not depend on HAVE_IFUNC.
Remove predefine of HAVE_FEAT_LSE128.
* config/linux/aarch64/atomic_16.S: Add __atomic_ aliases if 
!HAVE_IFUNC.   
* config/linux/aarch64/host-config.h: Correctly handle !HAVE_IFUNC.

---

diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am
index 
d49c44c7d5fbe83061fddd1f8ef4813a39eb1b8b..980677f353345c050f6cef2d57090360216c56cf
 100644
--- a/libatomic/Makefile.am
+++ b/libatomic/Makefile.am
@@ -130,12 +130,8 @@ libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix 
_$(s)_.lo,$(SIZEOBJS)))
 ## On a target-specific basis, include alternates to be selected by IFUNC.
 if HAVE_IFUNC
 if ARCH_AARCH64_LINUX
-if ARCH_AARCH64_HAVE_LSE128
-AM_CPPFLAGS = -DHAVE_FEAT_LSE128
-endif
 IFUNC_OPTIONS   = -march=armv8-a+lse
 libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix 
_$(s)_1_.lo,$(SIZEOBJS)))
-libatomic_la_SOURCES += atomic_16.S
 
 endif
 if ARCH_ARM_LINUX
@@ -155,6 +151,10 @@ libatomic_la_LIBADD += $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
 endif
 endif
 
+if ARCH_AARCH64_LINUX
+libatomic_la_SOURCES += atomic_16.S
+endif
+
 libatomic_convenience_la_SOURCES = $(libatomic_la_SOURCES)
 libatomic_convenience_la_LIBADD = $(libatomic_la_LIBADD)
 
diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in
index 
11c8ec7ba15ba7da5ef55e90bd836317bc270061..d9d529bc502d4ce7b9997640d5f40f5d5cc1232c
 100644
--- a/libatomic/Makefile.in
+++ b/libatomic/Makefile.in
@@ -90,17 +90,17 @@ build_triplet = @build@
 host_triplet = @host@
 target_triplet = @target@
 @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_1 = $(foreach 
s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
-@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = atomic_16.S
-@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(foreach \
+@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = $(foreach \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ s,$(SIZES),$(addsuffix \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _$(s)_1_.lo,$(SIZEOBJS))) \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _8_2_.lo,$(SIZEOBJS)) \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ tas_1_2_.lo
-@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix 
_8_1_.lo,$(SIZEOBJS))
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_5 = $(addsuffix 
_16_1_.lo,$(SIZEOBJS)) \
+@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(addsuffix 
_8_1_.lo,$(SIZEOBJS))
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix 
_16_1_.lo,$(SIZEOBJS)) \
 @ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@   $(addsuffix 
_16_2_.lo,$(SIZEOBJS))
 
+@ARCH_AARCH64_LINUX_TRUE@am__append_5 = atomic_16.S
 subdir = .
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
@@ -156,8 +156,7 @@ am__uninstall_files_from_dir = { \
   }
 am__installdirs = "$(DESTDIR)$(toolexeclibdir)"
 LTLIBRARIES = $(noinst_LTLIBRARIES) $(toolexeclib_LTLIBRARIES)
-@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__objects_1 =  \
-@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@ atomic_16.lo
+@ARCH_AARCH64_LINUX_TRUE@am__objects_1 = atomic_16.lo
 am_libatomic_la_OBJECTS = gload.lo gstore.lo gcas.lo gexch.lo \
glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo \
$(am__objects_1)
@@ -425,7 +424,7 @@ libatomic_la_LDFLAGS = $(libatomic_version_info) 
$(libatomic_version_script) \
$(lt_host_flags) $(libatomic_darwin_rpath)
 
 libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c \
-   init.c fenv.c fence.c flag.c $(am__append_2)
+   init.c fenv.c fence.c flag.c $(am__append_5)
 SIZEOBJS = load store cas exch fadd fsub fand fior fxor fnand tas
 EXTRA_libatomic_la_SOURCES = $(addsuffix _n.c,$(SIZEOBJS))
 libatomic_la_DEPENDENCIES = $(libatomic_la_LIBADD) $(libatomic_version_dep)
@@ -451,9 +450,8 @@ all_c_files := $(foreach dir,$(search_path),$(wildcard 
$(dir)/*.c))
 # Then sort through them to find the one we want, and select the first.
 M_SRC = $(firstword $(filter %/$(M_FILE), $(all_c_files)))
 libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \
-   _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_3) \
-   $(am__append_4) $(am__append_5)
-@ARCH_AARCH64_HAVE_LSE128_TRUE@@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@AM_CPPFLAGS
 = -DHAVE_FEAT_LSE128
+   _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_2) \
+   $(am__append_3) $(am__append_4)
 @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp 
-DHAVE_KERNEL64
 @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586
diff --git a/libatomic/config/linux/aarch64/atomic_16.

Re: [PATCH v1 03/13] aarch64: Mark x18 register as a fixed register for MS ABI

2024-02-23 Thread Richard Sandiford
"Richard Earnshaw (lists)"  writes:
> On 21/02/2024 18:30, Evgeny Karpov wrote:
>> 
> +/* X18 reserved for the TEB on Windows.  */
> +#ifdef TARGET_ARM64_MS_ABI
> +# define FIXED_X18 1
> +# define CALL_USED_X18 0
> +#else
> +# define FIXED_X18 0
> +# define CALL_USED_X18 1
> +#endif
>
> I'm not overly keen on ifdefs like this (and the one below), it can get quite 
> confusing if we have to support more than a couple of ABIs.  Perhaps we could 
> create a couple of new headers, one for the EABI (which all existing targets 
> would then need to include) and one for the MS ABI.  Then the mingw port 
> would use that instead of the EABI header.
>
> An alternative is to make all this dynamic, based on the setting of the 
> aarch64_calling_abi enum and to make the adjustments in 
> aarch64_conditional_register_usage.

Agreed FWIW.

> +# define CALL_USED_X18 0
>
> Is that really correct?  If the register is really reserved, but some code 
> modifies it anyway, this will cause the compiler to restore the old value at 
> the end of a function; generally, for a reserved register, code that knows 
> what it's doing would want to make permanent changes to this value.

I don't think it would do that for fixed registers.  For those this
is more whether calls are allowed to change the value of x18 or whether
x18 is supposed to remain fixed (e.g. set at the start of the thread and
not changed thereafter).

How does the MS ABI use this register?  Same question for Darwin I suppose.

Thanks,
Richard

>
> +#ifdef TARGET_ARM64_MS_ABI
> +# define STATIC_CHAIN_REGNUM R17_REGNUM
> +#else
> +# define STATIC_CHAIN_REGNUM R18_REGNUM
> +#endif
>
> If we went the enum way, we'd want something like
>
> #define STATIC_CHAIN_REGNUM (calling_abi == AARCH64_CALLING_ABI_MS ? 
> R17_REGNUM : R18_REGNUM)
>
> R.


Re: [PATCH v1 04/13] aarch64: Add aarch64-w64-mingw32 COFF

2024-02-23 Thread Richard Sandiford
Evgeny Karpov  writes:
> From 55fd2a63afa9abb3543d714b6f5925efd2682e08 Mon Sep 17 00:00:00 2001
> From: Zac Walker 
> Date: Wed, 21 Feb 2024 12:20:46 +0100
> Subject: [PATCH v1 04/13] aarch64: Add aarch64-w64-mingw32 COFF
>
> Define ASM specific for COFF format on AArch64.
>
> gcc/ChangeLog:
>
>   * config.gcc: Add COFF format support definitions.
>   * config/aarch64/aarch64-coff.h: New file.

The only surprising thing here to me was:

> ---
>  gcc/config.gcc|  1 +
>  gcc/config/aarch64/aarch64-coff.h | 92 +++
>  2 files changed, 93 insertions(+)
>  create mode 100644 gcc/config/aarch64/aarch64-coff.h
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index 2a9e4c44f50..34c7be72fb6 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -1264,6 +1264,7 @@ aarch64*-*-linux*)
>   TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'`
>   ;;
>  aarch64*-*-mingw*)
> + tm_file="${tm_file} aarch64/aarch64-coff.h"
>   tmake_file="${tmake_file} aarch64/t-aarch64"
>   case ${enable_threads} in
> "" | yes | win32)
> diff --git a/gcc/config/aarch64/aarch64-coff.h 
> b/gcc/config/aarch64/aarch64-coff.h
> new file mode 100644
> index 000..d91bc36b67b
> --- /dev/null
> +++ b/gcc/config/aarch64/aarch64-coff.h
> @@ -0,0 +1,92 @@
> +/* Machine description for AArch64 architecture.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +
> +   This file is part of GCC.
> +
> +   GCC is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3, or (at your option)
> +   any later version.
> +
> +   GCC is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with GCC; see the file COPYING3.  If not see
> +   .  */
> +
> +#ifndef GCC_AARCH64_COFF_H
> +#define GCC_AARCH64_COFF_H
> +
> +#include "aarch64.h"
> +
> +#ifndef LOCAL_LABEL_PREFIX
> +# define LOCAL_LABEL_PREFIX  ""
> +#endif
> +
> +/* Using long long breaks -ansi and -std=c90, so these will need to be
> +   made conditional for an LLP64 ABI.  */
> +#undef SIZE_TYPE
> +#define SIZE_TYPE"long long unsigned int"
> +
> +#undef PTRDIFF_TYPE
> +#define PTRDIFF_TYPE "long long int"
> +
> +#define TARGET_64BIT 1

...this.  Does some code that is shared between x86 and aarch64 rely
on this definition?  It might be worth identifying the code in a comment
if so.

Thanks,
Richard

> +#undef LONG_TYPE_SIZE
> +#define LONG_TYPE_SIZE 32
> +
> +#ifndef ASM_GENERATE_INTERNAL_LABEL
> +# define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM)  \
> +  sprintf (STRING, "*%s%s%u", LOCAL_LABEL_PREFIX, PREFIX, (unsigned 
> int)(NUM))
> +#endif
> +
> +#define ASM_OUTPUT_ALIGN(STREAM, POWER)  \
> +  fprintf (STREAM, "\t.align\t%d\n", (int)POWER)
> +
> +/* Output a common block.  */
> +#ifndef ASM_OUTPUT_COMMON
> +# define ASM_OUTPUT_COMMON(STREAM, NAME, SIZE, ROUNDED)  \
> +{\
> +  fprintf (STREAM, "\t.comm\t"); \
> +  assemble_name (STREAM, NAME);  \
> +  asm_fprintf (STREAM, ", %d, %d\n", \
> +  (int)(ROUNDED), (int)(SIZE));  \
> +}
> +#endif
> +
> +/* Output a local common block.  /bin/as can't do this, so hack a
> +   `.space' into the bss segment.  Note that this is *bad* practice,
> +   which is guaranteed NOT to work since it doesn't define STATIC
> +   COMMON space but merely STATIC BSS space.  */
> +#ifndef ASM_OUTPUT_ALIGNED_LOCAL
> +# define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN) \
> +{
> \
> +  switch_to_section (bss_section);   
> \
> +  ASM_OUTPUT_ALIGN (STREAM, floor_log2 (ALIGN / BITS_PER_UNIT)); \
> +  ASM_OUTPUT_LABEL (STREAM, NAME);   
> \
> +  fprintf (STREAM, "\t.space\t%d\n", (int)(SIZE));   
> \
> +}
> +#endif
> +
> +#define ASM_OUTPUT_SKIP(STREAM, NBYTES)  \
> +  fprintf (STREAM, "\t.space\t%d  // skip\n", (int) (NBYTES))
> +
> +#define ASM_OUTPUT_TYPE_DIRECTIVE(STREAM, NAME, TYPE)
> +#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)
> +
> +#define TEXT_SECTION_ASM_OP  "\t.text"
> +#define DATA_SECTION_ASM_OP  "\t.data"
> +#define BSS_SECTION_ASM_OP   "\t.bss"
> +
> +#define CTORS_SECTION_ASM_OP "\t.section\t.ctors, \"aw\""
> +#define DTORS_SECTION_ASM_OP "\t.section\t.dtors, \"aw\""
> +
> +#define GLOBAL_ASM_OP "\t.global\t"
> +
> +#undef SUPPORTS_INIT_PRIORITY
> +#define SUPPORTS_INIT_PRIORITY 0
> 

Re: [PATCH v1 08/13] aarch64: Add Cygwin and MinGW environments for AArch64

2024-02-23 Thread Richard Sandiford
Evgeny Karpov  writes:
> From 1ea6efa6f88d131884ecef21c4b5d2ecbab14ea7 Mon Sep 17 00:00:00 2001
> From: Zac Walker 
> Date: Tue, 20 Feb 2024 18:06:36 +0100
> Subject: [PATCH v1 08/13] aarch64: Add Cygwin and MinGW environments for
>  AArch64
>
> Define Cygwin and MinGW environment such as types, SEH definitions,
> shared libraries, etc.
>
> gcc/ChangeLog:
>
>   * config.gcc: Add Cygwin and MinGW difinitions.
>   * config/aarch64/aarch64-protos.h
>   (mingw_pe_maybe_record_exported_symbol): Declare functions
>   which are used in Cygwin and MinGW environment.
>   (mingw_pe_section_type_flags): Likewise.
>   (mingw_pe_unique_section): Likewise.
>   (mingw_pe_encode_section_info): Likewise.
>   * config/aarch64/cygming.h: New file.
> ---
>  gcc/config.gcc  |   1 +
>  gcc/config/aarch64/aarch64-protos.h |   5 +
>  gcc/config/aarch64/cygming.h| 178 
>  3 files changed, 184 insertions(+)
>  create mode 100644 gcc/config/aarch64/cygming.h
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index 498ee702607..96e3508eb30 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -1265,6 +1265,7 @@ aarch64*-*-linux*)
>   ;;
>  aarch64*-*-mingw*)
>   tm_file="${tm_file} aarch64/aarch64-coff.h"
> + tm_file="${tm_file} aarch64/cygming.h"
>   tm_file="${tm_file} mingw/mingw32.h"
>   tm_file="${tm_file} mingw/mingw-stdint.h"
>   tmake_file="${tmake_file} aarch64/t-aarch64"
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index a0b142e0b94..6d85452b0f6 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -1109,6 +1109,11 @@ extern void aarch64_output_patchable_area (unsigned 
> int, bool);
>  
>  extern void aarch64_adjust_reg_alloc_order ();
>  
> +extern void mingw_pe_maybe_record_exported_symbol (tree, const char *, int);
> +extern unsigned int mingw_pe_section_type_flags (tree, const char *, int);
> +extern void mingw_pe_unique_section (tree, int);
> +extern void mingw_pe_encode_section_info (tree, rtx, int);
> +
>  bool aarch64_optimize_mode_switching (aarch64_mode_entity);
>  void aarch64_restore_za (rtx);
>  
> diff --git a/gcc/config/aarch64/cygming.h b/gcc/config/aarch64/cygming.h
> new file mode 100644
> index 000..cf47184eb66
> --- /dev/null
> +++ b/gcc/config/aarch64/cygming.h
> @@ -0,0 +1,178 @@
> +/* Operating system specific defines to be used when targeting GCC for
> +   hosting on Windows32, using a Unix style C library and tools.
> +   Copyright (C) 1995-2024 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify
> +it under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful,
> +but WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +GNU General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +.  */
> +
> +#ifndef GCC_AARCH64_CYGMING_H
> +#define GCC_AARCH64_CYGMING_H
> +
> +#undef PREFERRED_DEBUGGING_TYPE
> +#define PREFERRED_DEBUGGING_TYPE DINFO_TYPE_NONE
> +
> +#define FASTCALL_PREFIX '@'
> +
> +#define print_reg(rtx, code, file)
> +
> +#define SYMBOL_FLAG_DLLIMPORT 0
> +#define SYMBOL_FLAG_DLLEXPORT 0
> +
> +#define SYMBOL_REF_DLLEXPORT_P(X) \
> + ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLEXPORT) != 0)
> +
> +#undef TARGET_SEH
> +#define TARGET_SEH 0
> +
> +#define SSE_REGNO_P(N) 0
> +#define GENERAL_REGNO_P(N) 0

Could you add a comment to explain how these two macros are consumed?
What is the effect of saying that everything is neither a general
register nor an SSE register?

> +#define SEH_MAX_FRAME_SIZE 0
> +
> +#undef DEFAULT_ABI
> +#define DEFAULT_ABI MS_ABI
> +
> +#undef TARGET_PECOFF
> +#define TARGET_PECOFF 1
> +
> +#include 
> +#ifdef __MINGW32__
> +#include 
> +#endif
> +
> +extern void mingw_pe_asm_named_section (const char *, unsigned int, tree);
> +extern void mingw_pe_declare_function_type (FILE *file, const char *name,
> + int pub);
> +
> +#define TARGET_ASM_NAMED_SECTION  mingw_pe_asm_named_section
> +
> +/* Select attributes for named sections.  */
> +#define TARGET_SECTION_TYPE_FLAGS  mingw_pe_section_type_flags
> +
> +#define TARGET_ASM_UNIQUE_SECTION mingw_pe_unique_section
> +#define TARGET_ENCODE_SECTION_INFO  mingw_pe_encode_section_info
> +
> +/* Declare the type properly for any external libcall.  */
> +#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \
> +  mingw_pe_declare_function_type (FILE, XSTR (FUN, 0), 1)
> +
> +#define TARGET_OS_CPP_BUILTINS() \
> +  do  

Re: [PATCH] RISC-V: Point our Python scripts at python3

2024-02-23 Thread Palmer Dabbelt

On Thu, 22 Feb 2024 20:29:37 PST (-0800), Kito Cheng wrote:

I guess Palmer is too busy, so committed to trunk :P


Thanks, I got distracted with some work stuff ;)



On Tue, Feb 13, 2024 at 11:55 PM Jeff Law  wrote:




On 2/9/24 09:53, Palmer Dabbelt wrote:
> This builds for me, and I frequently have python-is-python3 type
> packages installed so I think I've been implicitly testing it for a
> while.  Looks like Kito's tested similar configurations, and the
> bugzilla indicates we should be moving over.
>
> gcc/ChangeLog:
>
>   PR 109668
>   * config/riscv/arch-canonicalize: Move to python3
>   * config/riscv/multilib-generator: Likewise
Just to summarize from the coordination call this morning.  We've agreed
this should go forward.  While there is minor risk (this code is rarely
run), it's something we're prepared to handle if there is fallout.

Jeff


Re: [PATCH v1 02/13] aarch64: The aarch64-w64-mingw32 target implements

2024-02-23 Thread Richard Sandiford
Evgeny Karpov  writes:
> The calling ABI enum definition has been done following a similar convention 
> in 
> https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=gcc/config/i386/i386-opts.h;h=ef2825803b32001b9632769bdff196d1e43d27ba;hb=refs/heads/master#l41
>
> MS_ABI is used in gcc/config/i386/mingw32.h and gcc/config/i386/winnt-d.cc
> https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=gcc/config/i386/mingw32.h;h=58304fc55f629648e47490fd3c0f3db3858e4fd8;hb=refs/heads/master#l22
>
> These files are moved to the mingw folder in the patch series.
> https://gcc.gnu.org/pipermail/gcc-patches/attachments/20240221/5e75c464/attachment.txt
>
> What do you think about this change for v2?
>
> +/* Available call ABIs.  */
> +enum aarch64_calling_abi
> +{
> +  AARCH64_CALLING_ABI_EABI,
> +  AARCH64_CALLING_ABI_MS,
> +  MS_ABI = AARCH64_CALLING_ABI_MS
> +};
> +

How is MS_ABI used in practice?  When I apply locally, it looks like
the two non-x86 uses are in:

gcc/config/mingw/mingw32.h:  if (TARGET_64BIT && ix86_abi == MS_ABI)
\
gcc/config/mingw/winnt-d.cc:  if (TARGET_64BIT && ix86_abi == MS_ABI)

But these should fail to build if used, because AFAICT there's no
definition of ix86_abi on aarch64.

The first match is in EXTRA_OS_CPP_BUILTINS, but I couldn't see any uses
of that in aarch64 code, which would explain why everything builds OK.
The winnt-d.cc occurence looks like it would break the build with the
D frontend enabled though.

Are there two distinct ABIs for aarch64-*-mingw*?  Or are these
distinctions ignored on aarch64 and just retained for compatibility?

If there are two distinct ABIs then we should probably add them to
aarch64_arm_pcs.  But if there is only a single ABI, we should probably
avoid adding calling_abi altogether and instead provide a macro like
TARGET_IS_MS_ABI that aarch64 and x86 can define differently.

(To be clear, I don't think the different handling of x18 matters
for the PCS classification.  That's an orthogonal platform property
that applies to all PCS variants equally.  No-one had suggested
otherwise, just wanted to say in case. :-) )

Thanks,
Richard

>
> Regards,
> Evgeny
>
>
> Thursday, February 22, 2024 12:40 PM
> Richard Earnshaw (lists) wrote:
>
>>
> +/* Available call ABIs.  */
> +enum calling_abi
> +{
> +  AARCH64_EABI = 0,
> +  MS_ABI = 1
> +};
> +
>
> The convention in this file seems to be that all enum types to start with 
> aarch64.  Also, the enumeration values should start with the name of the 
> enumeration type in upper case, so:
>
> enum aarch64_calling_abi
> {
>   AARCH64_CALLING_ABI_EABI,
>   AARCH64_CALLING_ABI_MS
> };
>
> or something very much like that.
>
> R.


Re: [PATCH] c++: Fix ICE due to folding a call to constructor on cdtor_returns_this arches (aka arm32) [PR113083]

2024-02-23 Thread Jason Merrill

On 2/23/24 08:53, Christophe Lyon wrote:

On Fri, 23 Feb 2024 at 10:13, Christophe Lyon
 wrote:


On Fri, 23 Feb 2024 at 09:42, Jakub Jelinek  wrote:


Hi!

When targetm.cxx.cdtor_returns_this () (aka on arm32 TARGET_AAPCS_BASED)
constructor is supposed to return this pointer, but when we cp_fold such
a call, we don't take that into account and just INIT_EXPR the object,
so we can later ICE during gimplification, because the expression doesn't
have the right type.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux and
tested with a cross to armv7-linux-gnueabi on the testcase, but
unfortunately there are no 32-bit arm boxes in cfarm and arm32 is gone from
Fedora for quite some time as well, so I have no easy way to test this.
Christophe, do you think you could test this?  Thanks.


Hi Jakub,

Sadly our precommit CI could not apply your patch automatically (as
you can see in patchwork).

I'll test your patch manually.



I can now confirm that the new test passes on arm (native
armv8l-unknown-linux-gnueabihf), and no regression.


The patch is OK.

Jason



[PATCH]middle-end: update vuses out of loop which use a vdef that's moved [PR114068]

2024-02-23 Thread Tamar Christina
Hi All,

In certain cases we can have a situation where the merge block has a vUSE
virtual PHI and the exits do not.  In this case for instance the exits lead
to an abort so they have no virtual PHIs.  If we have a store before the first
exit and we move it to a later block during vectorization we update the vUSE
chain.

However the merge block is not an exit and is not visited by the update code.

This patch fixes it by checking during moving if there are any out of loop uses
of the vDEF that is the last_seen_vuse.  Normally there wouldn't be any and
things are skipped, but if there is then update that to the last vDEF in the
exit block.

Bootstrapped Regtested on aarch64-none-linux-gnu and
x86_64-pc-linux-gnu no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimizations/114068
* tree-vect-loop.cc (move_early_exit_stmts): Update vUSE chain in merge
block.

gcc/testsuite/ChangeLog:

PR tree-optimizations/114068
* gcc.dg/vect/vect-early-break_118-pr114068.c: New test.
* gcc.dg/vect/vect-early-break_119-pr114068.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
new file mode 100644
index 
..b462a464b6603e718c5a283513ea586fc13e37ce
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+struct h {
+  int b;
+  int f;
+} k;
+
+void n(int m) {
+  struct h a = k;
+  for (int o = m; o; ++o) {
+if (a.f)
+  __builtin_unreachable();
+if (o > 1)
+  __builtin_unreachable();
+*(&k.b + o) = 1;
+  }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c
new file mode 100644
index 
..a65ef7b8c4901b2ada585f38fda436dc07d1e1de
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+struct h {
+  int b;
+  int c;
+  int f;
+} k;
+
+void n(int m) {
+  struct h a = k;
+  for (int o = m; o; ++o) {
+if (a.f)
+  __builtin_unreachable();
+if (o > 1)
+  __builtin_unreachable();
+*(&k.b + o) = 1;
+*(&k.c + o*m) = 2;
+  }
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 
35f1f8c7d4245135ace740ff9be548919587..44bd8032b55b1ef84fdf4fa9d6117304b7709d6f
 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11837,6 +11837,27 @@ move_early_exit_stmts (loop_vec_info loop_vinfo)
   update_stmt (p);
 }
 
+  /* last_seen_vuse should now be the PHI in the loop header.  Check for
+ any out of loop uses and update them to the vUSE on the loop latch.  */
+  auto vuse_stmt =  loop_vinfo->lookup_def (last_seen_vuse);
+  gphi *vuse_def;
+  if (vuse_stmt
+  && (vuse_def = dyn_cast  (STMT_VINFO_STMT (vuse_stmt
+{
+  imm_use_iterator iter;
+  use_operand_p use_p;
+  gimple *use_stmt;
+  auto loop = LOOP_VINFO_LOOP (loop_vinfo);
+  tree vuse = PHI_ARG_DEF_FROM_EDGE (vuse_def, loop_latch_edge (loop));
+  FOR_EACH_IMM_USE_STMT (use_stmt, iter, last_seen_vuse)
+   {
+ if (flow_bb_inside_loop_p (loop, use_stmt->bb))
+   continue;
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+   SET_USE (use_p, vuse);
+   }
+}
+
   /* And update the LC PHIs on exits.  */
   for (edge e : get_loop_exit_edges (LOOP_VINFO_LOOP  (loop_vinfo)))
 if (!dominated_by_p (CDI_DOMINATORS, e->src, dest_bb))




-- 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
new file mode 100644
index 
..b462a464b6603e718c5a283513ea586fc13e37ce
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+struct h {
+  int b;
+  int f;
+} k;
+
+void n(int m) {
+  struct h a = k;
+  for (int o = m; o; ++o) {
+if (a.f)
+  __builtin_unreachable();
+if (o > 1)
+  __builtin_unreachable();
+*(&k.b + o) = 1;
+  }
+}
diff 

Re: [PATCH v1 02/13] aarch64: The aarch64-w64-mingw32 target implements

2024-02-23 Thread Andrew Pinski
On Fri, Feb 23, 2024 at 9:51 AM Richard Sandiford
 wrote:
>
> Evgeny Karpov  writes:
> > The calling ABI enum definition has been done following a similar 
> > convention in
> > https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=gcc/config/i386/i386-opts.h;h=ef2825803b32001b9632769bdff196d1e43d27ba;hb=refs/heads/master#l41
> >
> > MS_ABI is used in gcc/config/i386/mingw32.h and gcc/config/i386/winnt-d.cc
> > https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=gcc/config/i386/mingw32.h;h=58304fc55f629648e47490fd3c0f3db3858e4fd8;hb=refs/heads/master#l22
> >
> > These files are moved to the mingw folder in the patch series.
> > https://gcc.gnu.org/pipermail/gcc-patches/attachments/20240221/5e75c464/attachment.txt
> >
> > What do you think about this change for v2?
> >
> > +/* Available call ABIs.  */
> > +enum aarch64_calling_abi
> > +{
> > +  AARCH64_CALLING_ABI_EABI,
> > +  AARCH64_CALLING_ABI_MS,
> > +  MS_ABI = AARCH64_CALLING_ABI_MS
> > +};
> > +
>
> How is MS_ABI used in practice?  When I apply locally, it looks like
> the two non-x86 uses are in:
>
> gcc/config/mingw/mingw32.h:  if (TARGET_64BIT && ix86_abi == MS_ABI)  
>   \
> gcc/config/mingw/winnt-d.cc:  if (TARGET_64BIT && ix86_abi == MS_ABI)
>
> But these should fail to build if used, because AFAICT there's no
> definition of ix86_abi on aarch64.
>
> The first match is in EXTRA_OS_CPP_BUILTINS, but I couldn't see any uses
> of that in aarch64 code, which would explain why everything builds OK.
> The winnt-d.cc occurence looks like it would break the build with the
> D frontend enabled though.
>
> Are there two distinct ABIs for aarch64-*-mingw*?  Or are these
> distinctions ignored on aarch64 and just retained for compatibility?

There is arm64ec ABI defined for aarch64 windows which is a different
ABI from the standard windows aarch64 ABI, though I am not sure if it
supported with the patches here.
It is documented at
https://learn.microsoft.com/en-us/cpp/build/arm64ec-windows-abi-conventions?view=msvc-170
.

Thanks,
Andrew

>
> If there are two distinct ABIs then we should probably add them to
> aarch64_arm_pcs.  But if there is only a single ABI, we should probably
> avoid adding calling_abi altogether and instead provide a macro like
> TARGET_IS_MS_ABI that aarch64 and x86 can define differently.
>
> (To be clear, I don't think the different handling of x18 matters
> for the PCS classification.  That's an orthogonal platform property
> that applies to all PCS variants equally.  No-one had suggested
> otherwise, just wanted to say in case. :-) )
>
> Thanks,
> Richard
>
> >
> > Regards,
> > Evgeny
> >
> >
> > Thursday, February 22, 2024 12:40 PM
> > Richard Earnshaw (lists) wrote:
> >
> >>
> > +/* Available call ABIs.  */
> > +enum calling_abi
> > +{
> > +  AARCH64_EABI = 0,
> > +  MS_ABI = 1
> > +};
> > +
> >
> > The convention in this file seems to be that all enum types to start with 
> > aarch64.  Also, the enumeration values should start with the name of the 
> > enumeration type in upper case, so:
> >
> > enum aarch64_calling_abi
> > {
> >   AARCH64_CALLING_ABI_EABI,
> >   AARCH64_CALLING_ABI_MS
> > };
> >
> > or something very much like that.
> >
> > R.


Re: Repost [PATCH 1/6] Add -mcpu=future

2024-02-23 Thread Segher Boessenkool
On Tue, Feb 20, 2024 at 06:35:34PM +0800, Kewen.Lin wrote:
> on 2024/2/8 03:58, Michael Meissner wrote:
> $ grep -r "define PROCESSOR_DEFAULT" gcc/config/rs6000/
> gcc/config/rs6000/aix71.h:#define PROCESSOR_DEFAULT PROCESSOR_POWER7
> gcc/config/rs6000/aix71.h:#define PROCESSOR_DEFAULT64 PROCESSOR_POWER7
> gcc/config/rs6000/aix72.h:#define PROCESSOR_DEFAULT PROCESSOR_POWER7
> gcc/config/rs6000/aix72.h:#define PROCESSOR_DEFAULT64 PROCESSOR_POWER7
> gcc/config/rs6000/aix73.h:#define PROCESSOR_DEFAULT PROCESSOR_POWER8
> gcc/config/rs6000/aix73.h:#define PROCESSOR_DEFAULT64 PROCESSOR_POWER8
> gcc/config/rs6000/darwin.h:#define PROCESSOR_DEFAULT  PROCESSOR_PPC7400
> gcc/config/rs6000/darwin.h:#define PROCESSOR_DEFAULT64  PROCESSOR_POWER4
> gcc/config/rs6000/freebsd64.h:#define PROCESSOR_DEFAULT PROCESSOR_PPC7450
> gcc/config/rs6000/freebsd64.h:#define PROCESSOR_DEFAULT64 PROCESSOR_POWER8
> gcc/config/rs6000/linux64.h:#define PROCESSOR_DEFAULT PROCESSOR_POWER7
> gcc/config/rs6000/linux64.h:#define PROCESSOR_DEFAULT64 PROCESSOR_POWER8
> gcc/config/rs6000/rs6000.h:#define PROCESSOR_DEFAULT   PROCESSOR_PPC603
> gcc/config/rs6000/rs6000.h:#define PROCESSOR_DEFAULT64 PROCESSOR_RS64A
> gcc/config/rs6000/vxworks.h:#define PROCESSOR_DEFAULT PROCESSOR_PPC604
> 
> , and they are unlikely to be updated later, no?

In most cases did would be an ABI change.  Almost never an acceptable
thing to do.


Segher


Re: [PATCH v1 00/13] Add aarch64-w64-mingw32 target

2024-02-23 Thread Richard Sandiford
"Richard Earnshaw (lists)"  writes:
> On 21/02/2024 17:47, Evgeny Karpov wrote:
>> Hello,
>> 
>> We would like to take your attention to the review of changes for the
>> new GCC target, aarch64-w64-mingw32. The new target will be
>> supported, tested, added to CI, and maintained by Linaro. This marks
>> the first of three planned patch series contributing to the GCC C
>> compiler's support for Windows Arm64.
>> 
>> 1. Minimal aarch64-w64-mingw32 C implementation to cross-compile
>> hello-world with libgcc for Windows Arm64 using MinGW.
>> 2. Extension of the aarch64-w64-mingw32 C implementation to
>> cross-compile OpenSSL, OpenBLAS, FFmpeg, and libjpeg-turbo. All
>> packages successfully pass tests.
>> 3. Addition of call stack support for debugging, resolution of
>> optimization issues in the C compiler, and DLL export/import for the
>> aarch64-w64-mingw32 target.
>> 
>> This patch series introduces the 1st point, which involves building
>> hello-world for the aarch64-w64-mingw32 target. The patch depends on
>> the binutils changes for the aarch64-w64-mingw32 target that have
>> already been merged.
>> 
>> The binutils should include recent relocation fixes.
>> f87eaf8ff3995a5888c6dc4996a20c770e6bcd36
>> aarch64: Add new relocations and limit COFF AArch64 relocation offsets
>> 
>> The series is structured in a way to trivially show that it should not
>> affect any other targets.
>> 
>> In this patch, several changes have been made to support the
>> aarch64-w64-mingw32 target for GCC. The modifications include the
>> definition of the MS ABI for aarch64, adjustments to FIXED_REGISTERS
>> and STATIC_CHAIN_REGNUM for different ABIs, and specific definitions
>> for COFF format on AArch64. Additionally, the patch reuses MinGW
>>  types and definitions from i386, relocating them to a new
>> mingw folder for shared usage between both targets.
>> 
>> MinGW-specific options have been introduced for AArch64, along with
>> override options for aarch64-w64-mingw32. Builtin stack probing for
>> override options for aarch64-w64-mingw32. Builtin stack probing for
>> AArch64 has been enabled as an alternative for chkstk. Symbol name
>> encoding and section information handling for aarch64-w64-mingw32 have
>> been incorporated, and the MinGW environment has been added, which
>> will also be utilized for defining the Cygwin environment in the
>> future.
>> 
>> The patch includes renaming "x86 Windows Options" to "Cygwin and MinGW
>> Options," which now encompasses AArch64 as well. AArch64-specific
>> Cygwin and MinGW Options have been introduced for the unique
>> requirements of the AArch64 architecture.
>> 
>> Function type declaration and named sections support have been added.
>> The necessary objects for Cygwin and MinGW have been built for the
>> aarch64-w64-mingw32 target, and relevant files such as msformat-c.cc
>> and winnt-d.cc have been moved to the mingw folder for reuse in
>> AArch64.
>> 
>> Furthermore, the aarch64-w64-mingw32 target has been included in both
>> libatomic and libgcc, ensuring support for the AArch64 architecture
>> within these libraries. These changes collectively enhance the
>> capabilities of GCC for the specified target.
>> 
>> Coauthors: Zac Walker ,
>> Mark Harmstone   and
>> Ron Riddle 
>> 
>> Refactored, prepared, and validated by 
>> Radek Barton  and 
>> Evgeny Karpov 
>> 
>> Special thanks to the Linaro GNU toolchain team for internal review
>> and assistance in preparing the patch series!
>> 
>> Regards,
>> Evgeny
>
> Thanks for posting this.

Seconded. :)  Thanks also for the very clear organisation of the series,
and for commonising code rather than cut-&-pasting it.

FWIW, I agree with all the comments posted so far, and just sent
some other comments too.  I think my main high-level comments are:

- Could you double-check that all the code in the common files are
  used on both aarch64 and x86?  I think it's OK to move code outside
  of x86 even if aarch64 doesn't want to use it, provided that it makes
  conceptual target-independent sense.  But it's not clear whether
  unused code is deliberate or not (e.g. the EXTRA_OS_CPP_BUILTINS
  thing I mentioned in the part 2 review).

- Could you test with all languages enabled, and say what languages
  are supported?  Some languages require an existing compiler for
  the same language and so are more difficult to bootstrap for
  a new port.  I suppose you'd need a cross-host build first,
  then use the cross-compiled compilers to bootstrap.

Thanks,
Richard


Re: [PATCH v2] Do not emulate vectors containing floats.

2024-02-23 Thread Jakub Jelinek
On Fri, Feb 23, 2024 at 02:43:45PM +0100, Juergen Christ wrote:
> The emulation via word mode tries to perform integer arithmetic on floating
> point values instead of floating point arithmetic.  This leads to
> mis-compilations.
> 
> Failure occured on s390x on these existing test cases:
> gcc.dg/vect/tsvc/vect-tsvc-s112.c
> gcc.dg/vect/tsvc/vect-tsvc-s113.c
> gcc.dg/vect/tsvc/vect-tsvc-s119.c
> gcc.dg/vect/tsvc/vect-tsvc-s121.c
> gcc.dg/vect/tsvc/vect-tsvc-s131.c
> gcc.dg/vect/tsvc/vect-tsvc-s132.c
> gcc.dg/vect/tsvc/vect-tsvc-s2233.c
> gcc.dg/vect/tsvc/vect-tsvc-s421.c
> gcc.dg/vect/vect-alias-check-14.c
> gcc.target/s390/vector/partial/s390-vec-length-epil-run-1.c
> gcc.target/s390/vector/partial/s390-vec-length-epil-run-3.c
> gcc.target/s390/vector/partial/s390-vec-length-full-run-3.c
> 
> gcc/ChangeLog:
> 

Please add
PR tree-optimization/114075
above the * tree-vect-stmts line.
>   * tree-vect-stmts.cc (vectorizable_operation): Don't emulate floating
>   point vectors

This line should be tab indented like the first one, and end with .
And given what the patch does, perhaps say non-integral instead of floating
point.

As for testcase, I'll handle it separately, given that it already
fixes some pre-existing tests.

> Signed-off-by: Juergen Christ 
> ---
>  gcc/tree-vect-stmts.cc | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 09749ae38174..f95ff2c2aa34 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -6756,7 +6756,8 @@ vectorizable_operation (vec_info *vinfo,
>those through even when the mode isn't word_mode.  For
>ops we have to lower the lowering code assumes we are
>dealing with word_mode.  */
> -  if code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
> +   || (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
>   || !target_support_p)
>  && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
> /* Check only during analysis.  */

LGTM, but please wait until Monday evening so that Richi or Richard
have a chance to chime in.

Jakub



Re: [PATCH RFA] build: drop target libs from LD_LIBRARY_PATH [PR105688]

2024-02-23 Thread Jason Merrill

On 2/23/24 06:23, Alexandre Oliva wrote:


I'm not so worried about bootstrap itself as I am about post-bootstrap
host tools.  Those are unusual in that, after native bootstraps, they're
built using the just-built (last-stage) compiler and target libraries,
rather than the host compiler and system libraries.  While configuring
them, we need LD_LIBRARY_PATH (or similar) set up so that native
execution tests can pass, at the very least; while building them, we may
need LD_LIBRARY_PATH set up so that dependent libraries are found and
link correctly.


So, like gdb?  Sure, if we built a stage3 gdb with the stage2 gcc and 
linked against the stage2 libstdc++.so, it would want to be able to find 
the stage2 libstdc++.so, or the stage3 in case of bootstrap-lean.


The problem, as you say, comes when you want to both bootstrap and build 
tools that aren't involved in the bootstrap process.  To support that 
perhaps we want POSTBOOTSTRAP_HOST_EXPORTS for host modules without the 
bootstrap tag, and add the TARGET_LIB_PATH directories there?


Jason



Re: [PATCH] combine: Don't simplify high part of paradoxical-SUBREG-of-MEM on machines that sign-extend loads [PR113010]

2024-02-23 Thread Greg McGary



On 2/22/24 2:08 PM, Jakub Jelinek wrote:

On Thu, Feb 22, 2024 at 12:59:18PM -0800, Greg McGary wrote:

The sign bit of a sign-extending load cannot be known until runtime,
so don't attempt to simplify it in the combiner.

2024-02-22  Greg McGary  

 PR rtl-optimization/113010
 * combine.cc (simplify_comparison): Don't simplify high part
of paradoxical-SUBREG-of-MEM on machines that sign-extend loads

 * gcc.c-torture/execute/pr113010.c: New test.
---
  gcc/combine.cc | 10 --
  gcc/testsuite/gcc.c-torture/execute/pr113010.c |  9 +
  2 files changed, 17 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr113010.c

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 812553c091e..736206242e1 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -12550,9 +12550,15 @@ simplify_comparison (enum rtx_code code, rtx *pop0, 
rtx *pop1)
}
  
  	  /* If the inner mode is narrower and we are extracting the low part,

-we can treat the SUBREG as if it were a ZERO_EXTEND.  */
+we can treat the SUBREG as if it were a ZERO_EXTEND ...  */
  if (paradoxical_subreg_p (op0))
-   ;
+   {
+ /* ... except we can't treat as ZERO_EXTEND when a machine
+automatically sign-extends loads. */
+ if (MEM_P (SUBREG_REG (op0)) && WORD_REGISTER_OPERATIONS
+ && load_extend_op (inner_mode) == SIGN_EXTEND)
+   break;

That doesn't feel sufficient.  Like in the PR112758 patch, I believe
for WORD_REGISTER_OPERATIONS you should treat it as a ZERO_EXTEND only
if MEM_P (SUBREG_REG (op0)) && load_extend_op (inner_mode) == ZERO_EXTEND
or if GET_MODE_PRECISION (inner_mode) is known to be >= BITS_PER_WORD.

Jakub


The gist of your comment is that my patch was missing the test for width
of inner_mode vs. BITS_PER_WORD. Here's a revision. Looks good to you?

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 812553c091e..4626f2edae9 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -12550,9 +12550,17 @@ simplify_comparison (enum rtx_code code, rtx 
*pop0, rtx *pop1)

    }

  /* If the inner mode is narrower and we are extracting the 
low part,

-    we can treat the SUBREG as if it were a ZERO_EXTEND. */
+    we can treat the SUBREG as if it were a ZERO_EXTEND ...  */
  if (paradoxical_subreg_p (op0))
-   ;
+   {
+ /* ... except we can't do that for loads on machines
+    that don't automatically zero-extend loads. */
+ if (WORD_REGISTER_OPERATIONS
+ && GET_MODE_PRECISION (inner_mode) < BITS_PER_WORD
+ && MEM_P (SUBREG_REG (op0))
+ && load_extend_op (inner_mode) != ZERO_EXTEND)
+   break;
+   }
  else if (subreg_lowpart_p (op0)
   && GET_MODE_CLASS (mode) == MODE_INT
   && is_int_mode (GET_MODE (SUBREG_REG (op0)), 
&inner_mode)


Re: [PATCH v1] RISC-V: Introduce gcc option mrvv-vector-bits for RVV

2024-02-23 Thread Vineet Gupta
+CC Greg who might also have some bits in flight here.

On 2/23/24 01:23, Li, Pan2 wrote:
>
> > I would prefer to only keep zvl and scalable or zvl only, since I
>
> > don't see too much value in specifying a value which different from
>
> > zvl*b, that's a legacy option used before zvl*b option was introduced,
>

+1

> > and the reason to add that is that could used for compatible with
>
> > clang/LLVM for riscv_rvv_vector_bits attribute I think?
>
>  
>
> Yes, exactly to be compatible with clang/llvm. Just take zvl is good
> enough IMO, and update in v2 once we have alignment.
>

+1

It seems you would also want to implement feature macro
__riscv_v_fixed_vlen which llvm does and downstream projects such as
xsimd rely on.

>  
>
> > And if we want this (I'm not sure), it really feels like it ought to
> defer to gcc-15.
>
> > But I'd like to CC more RISC-V GCC folks to see the votes.
>
> > If most of the people don't want this in GCC-14 and defer it to
> GCC-15, I won't insist on it.
>
>  
>
> Sure, let’s wait for a while.
>

Sure it is late in cycle, but I DO agree to gcc-14 inclusion. And thats
because it is related to end user experience: gcc is merely catching up
to what llvm already has.  Rivos folks working on some downstream
projects have brought up this disparity internally. If we don't now, the
projects will have to carry that for posterity. For that reason I'd
consider this as *fix* category such as a VSETVL fix.

P.S. Some of this is captured in PR/112817 and it would be nice to
update stuff there too.

But to me what is more important under same umbrella, for gcc-14 still,
is *attribute riscv_rvv_vector_bits* for VLS codegen (also discussed in
same PR/112817).
Again this is from same devs for downstream projects complain that gcc
is not up to par with llvm there - and this is no longer just syntactic
sugar tucked away in a makefile. They actively need #ifdef ugliness in
their code to handle llvm vs. gcc. Granted this part of work might (or
not) be trivial, specially this late, but I'm just putting it out there
for consideration.

Thx,
-Vineet



>  
>
> Pan
>
>  
>
> *From:*juzhe.zh...@rivai.ai 
> *Sent:* Friday, February 23, 2024 4:38 PM
> *To:* jeffreyalaw ; kito.cheng
> ; Li, Pan2 
> *Cc:* gcc-patches ; Wang, Yanzhang
> ; Robin Dapp ; palmer
> ; vineetg ; Patrick O'Neill
> ; Edwin Lu 
> *Subject:* Re: Re: [PATCH v1] RISC-V: Introduce gcc option
> mrvv-vector-bits for RVV
>
>  
>
> I personally think it's better to has VLS compile option and attribute
> in GCC-14.
>
> Since there are many people porting different libraury
> (eigen/highway/xnnpack/openBLAS,...) with VLS feature,
>
> they test them with Clang.
>
>  
>
> If we don't support it, we will end up with Clang can compile those
> lib but GCC-14 can't which will make RISC-V
>
> folks think GCC is still pretty far behind than Clang.
>
>  
>
> Besides, VLS compile option and attribute are pretty safe codes, I
> would surprise that it will cause issues on current RVV support.
>
>  
>
> So, +1 from my side to support VLS compile option and attribute on GCC-14.
>
>  
>
> But I'd like to CC more RISC-V GCC folks to see the votes. 
>
> If most of the people don't want this in GCC-14 and defer it to
> GCC-15, I won't insist on it.
>
>  
>
> Thanks.
>
>  
>
> 
>
> juzhe.zh...@rivai.ai
>
>  
>
> *From:* Jeff Law 
>
> *Date:* 2024-02-23 16:29
>
> *To:* Kito Cheng ; pan2.li
> 
>
> *CC:* gcc-patches ; juzhe.zhong
> ; yanzhang.wang
> 
>
> *Subject:* Re: [PATCH v1] RISC-V: Introduce gcc option
> mrvv-vector-bits for RVV
>
>  
>
>  
>
> On 2/23/24 01:22, Kito Cheng wrote:
>
> > I would prefer to only keep zvl and scalable or zvl only, since I
>
> > don't see too much value in specifying a value which different from
>
> > zvl*b, that's a legacy option used before zvl*b option was
> introduced,
>
> > and the reason to add that is that could used for compatible with
>
> > clang/LLVM for riscv_rvv_vector_bits attribute I think?
>
> And if we want this (I'm not sure), it really feels like it ought to
>
> defer to gcc-15.
>
>  
>
> jeff
>
>  
>
>  
>



Re: [PATCH v1 02/13] aarch64: The aarch64-w64-mingw32 target implements

2024-02-23 Thread Martin Storsjö

On Fri, 23 Feb 2024, Richard Sandiford wrote:

Are there two distinct ABIs for aarch64-*-mingw*?  Or are these 
distinctions ignored on aarch64 and just retained for compatibility?


On Windows on AArch64, the calling convention normally matches regular 
AAPCS64 - so the ms_abi attribute normally has no effect. However, for 
variadic functions, the calling convention differs, so the ms_abi 
attribute could be used to implement functions with the Windows vararg 
calling convention on Linux.


(As far as I know, the correct Windows vararg calling convention is not 
yet implemented in this patch series, but would be a later addition.)


Clang/LLVM does implement the Windows AArch64 vararg calling convention, 
and it used to be necessary for Wine on AArch64 before, but as Jacek 
mentioned, it's no longer needed by Wine.


ARM64EC is an entirely different thing though, both out of scope for this 
patchset, and also a much bigger thing than an MS_ABI attribute.


// Martin



Re: [PATCH] RISC-V: Fix vec_init for simple sequences [PR114028].

2024-02-23 Thread Robin Dapp
> +/* { dg-final { scan-assembler-times "vmv\.v\.i\tv\[0-9\],0" 0 } } */
> 
> I think you should use "scan-assembler-not"

Thanks, going to commit with that change.

Regards
 Robin


Re: [PATCH v1 05/13] Reuse MinGW from i386 for AArch64

2024-02-23 Thread Bernhard Reutner-Fischer
On Thu, 22 Feb 2024 15:56:46 +
Evgeny Karpov  wrote:

> A ChangeLog template using "Moved... ...here" has been generated by 
> contrib/mklog.py.
> It seems that it needs modification.
> 
> Regards,
> Evgeny
> 
> -Original Message-
> Thursday, February 22, 2024 12:11 PM
> Richard Earnshaw (lists) wrote:
> 
> > The ChangeLog has to be expressed in present tense, as mandated by the 
> > standard; s/Moved/Move/g etc.  
> 
> Agreed, but that's a detail that we can get to once the patch has been 
> properly reviewed.
> 

Excellent, two patches for the price of one :)

diff --git a/contrib/mklog.py b/contrib/mklog.py
index d764fb41f99..7d8d554b15e 100755
--- a/contrib/mklog.py
+++ b/contrib/mklog.py
@@ -277,7 +277,7 @@ def generate_changelog(data, no_functions=False, 
fill_pr_titles=False,
 # it used to be path.source_file[2:]
 relative_path = get_rel_path_if_prefixed(file.source_file[2:],
  changelog)
-out = append_changelog_line(out, relative_path, 'Moved to...')
+out = append_changelog_line(out, relative_path, 'Move to...')
 new_path = get_rel_path_if_prefixed(file.target_file[2:],
 changelog)
 out += f'\t* {new_path}: ...here.\n'


cheers


[PATCH, v2] Fix fortran/PR114024

2024-02-23 Thread Harald Anlauf

Hi Steve, all,

here's an updated patch with an enhanced testcase that also
checks MOLD= besides SOURCE=.

Regtested on x86_64-pc-linux-gnu.  Is it OK for mainline?

Cheers,
Harald

On 2/22/24 22:32, Harald Anlauf wrote:

On 2/22/24 22:01, Steve Kargl wrote:

BTW, my patch and I suspect your improved patch also
fixes 'allocate(x,mold=z%re)'.  Consider,

    complex z(3)
    real, allocatable :: x(:)
    z = 42ha
    allocate(x, mold=z%re)
    print *, size(x)
    end

% gfortran13 -o z a.f90
a.f90:9:25:

 9 |    allocate(x, mold=z%re)
   | 1
internal compiler error: in retrieve_last_ref, at
fortran/trans-array.cc:6070
0x247d7a679 __libc_start1
 /usr/src/lib/libc/csu/libc_start1.c:157

% gfcx -o z a.f90 && ./z
    3



Nice!  I completely forgot about MOLD...

So the only missing pieces are a really comprehensive testcase
and successful regtests...
From a176c2f44f812d82aeb430fadf23ab4b6dd5bd65 Mon Sep 17 00:00:00 2001
From: Steve Kargl 
Date: Fri, 23 Feb 2024 22:05:04 +0100
Subject: [PATCH] Fortran: ALLOCATE statement, SOURCE/MOLD expressions with
 subrefs [PR114024]

	PR fortran/114024

gcc/fortran/ChangeLog:

	* trans-stmt.cc (gfc_trans_allocate): When a source expression has
	substring references, part-refs, or %re/%im inquiries, wrap the
	entity in parentheses to force evaluation of the expression.

gcc/testsuite/ChangeLog:

	* gfortran.dg/allocate_with_source_27.f90: New test.
	* gfortran.dg/allocate_with_source_28.f90: New test.

Co-Authored-By: Harald Anlauf 
---
 gcc/fortran/trans-stmt.cc | 10 ++-
 .../gfortran.dg/allocate_with_source_27.f90   | 20 +
 .../gfortran.dg/allocate_with_source_28.f90   | 90 +++
 3 files changed, 118 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/allocate_with_source_27.f90
 create mode 100644 gcc/testsuite/gfortran.dg/allocate_with_source_28.f90

diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc
index 5247d3d39d7..e09828e218b 100644
--- a/gcc/fortran/trans-stmt.cc
+++ b/gcc/fortran/trans-stmt.cc
@@ -6355,8 +6355,14 @@ gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate)
 	vtab_needed = (al->expr->ts.type == BT_CLASS);
 
   gfc_init_se (&se, NULL);
-  /* When expr3 is a variable, i.e., a very simple expression,
-	 then convert it once here.  */
+  /* When expr3 is a variable, i.e., a very simple expression, then
+	 convert it once here.  If one has a source expression that has
+	 substring references, part-refs, or %re/%im inquiries, wrap the
+	 entity in parentheses to force evaluation of the expression.  */
+  if (code->expr3->expr_type == EXPR_VARIABLE
+	  && is_subref_array (code->expr3))
+	code->expr3 = gfc_get_parentheses (code->expr3);
+
   if (code->expr3->expr_type == EXPR_VARIABLE
 	  || code->expr3->expr_type == EXPR_ARRAY
 	  || code->expr3->expr_type == EXPR_CONSTANT)
diff --git a/gcc/testsuite/gfortran.dg/allocate_with_source_27.f90 b/gcc/testsuite/gfortran.dg/allocate_with_source_27.f90
new file mode 100644
index 000..d0f0f3c4a84
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/allocate_with_source_27.f90
@@ -0,0 +1,20 @@
+!
+! { dg-do run }
+!
+! fortran/PR114024
+! https://github.com/fujitsu/compiler-test-suite
+! Modified from Fortran/0093/0093_0130.f90
+!
+program foo
+   implicit none
+   complex :: cmp(3)
+   real, allocatable :: xx(:), yy(:), zz(:)
+   cmp = (3., 6.78)
+   allocate(xx, source = cmp%re)  ! This caused an ICE.
+   allocate(yy, source = cmp(1:3)%re) ! This caused an ICE.
+   allocate(zz, source = (cmp%re))
+   if (any(xx /= [3., 3., 3.])) stop 1
+   if (any(yy /= [3., 3., 3.])) stop 2
+   if (any(zz /= [3., 3., 3.])) stop 3
+end program foo
+
diff --git a/gcc/testsuite/gfortran.dg/allocate_with_source_28.f90 b/gcc/testsuite/gfortran.dg/allocate_with_source_28.f90
new file mode 100644
index 000..976c567cf22
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/allocate_with_source_28.f90
@@ -0,0 +1,90 @@
+! { dg-do run }
+!
+! PR fortran/114024
+
+program foo
+  implicit none
+  complex :: cmp(3) = (3.,4.)
+  type ci   ! pseudo "complex integer" type
+ integer :: re
+ integer :: im
+  end type ci
+  type cr   ! pseudo "complex" type
+ real :: re
+ real :: im
+  end type cr
+  type u
+ type(ci) :: ii(3)
+ type(cr) :: rr(3)
+  end type u
+  type(u) :: cc
+
+  cc% ii% re = nint (cmp% re)
+  cc% ii% im = nint (cmp% im)
+  cc% rr% re = cmp% re
+  cc% rr% im = cmp% im
+ 
+ call test_substring ()
+  call test_int_real ()
+  call test_poly ()
+
+contains
+
+  subroutine test_substring ()
+character(4)  :: str(3) = ["abcd","efgh","ijkl"]
+character(:), allocatable :: ac(:)
+allocate (ac, source=str(1::2)(2:4))
+if (size (ac) /= 2 .or. len (ac) /= 3) stop 11
+if (ac(2) /= "jkl")stop 12
+deallocate (ac)
+allocate (ac, mold=str(1::2)(2:4))
+if (size (ac) /= 2 

Re: [PATCH, v2] Fix fortran/PR114024

2024-02-23 Thread rep . dot . nop
On 23 February 2024 22:15:17 CET, Harald Anlauf  wrote:
>Hi Steve, all,
>
>here's an updated patch with an enhanced testcase that also
>checks MOLD= besides SOURCE=.
>
>Regtested on x86_64-pc-linux-gnu.  Is it OK for mainline?

LGTM
cheers

>
>Cheers,
>Harald



[PATCH v1 02/13] aarch64: The aarch64-w64-mingw32 target implements

2024-02-23 Thread Evgeny Karpov
Hi Richard,

Thank you for your review!

The MS_ABI definition is for the x86/x64 MS ABI, and it's clear that it 
shouldn't be used on aarch64.

The AARCH64_CALLING_ABI_MS definition resolves the issue.
It just needs to be properly handled in mingw32.h.

The change below is sufficient to resolve the ABI usage in mingw.

Regards,
Evgeny

gcc/config.gcc
-   tm_defines="${tm_defines} TARGET_ARM64_MS_ABI=1"
+   tm_defines="${tm_defines} TARGET_AARCH64_MS_ABI=1"

config/aarch64/aarch64-opts.h

+/* Available call ABIs.  */
+enum aarch64_calling_abi
+{
+  AARCH64_CALLING_ABI_EABI,
+  AARCH64_CALLING_ABI_MS
+};
+

gcc/config/mingw/mingw32.h
@@ -19,7 +19,11 @@

-#define DEFAULT_ABI MS_ABI
+#if defined (TARGET_AARCH64_MS_ABI)
+# define DEFAULT_ABI AARCH64_CALLING_ABI_MS
+#else
+# define DEFAULT_ABI MS_ABI
+#endif



-Original Message-
Friday, February 23, 2024 6:50 PM
Richard Sandiford wrote:

> What do you think about this change for v2?
>
> +/* Available call ABIs.  */
> +enum aarch64_calling_abi
> +{
> +  AARCH64_CALLING_ABI_EABI,
> +  AARCH64_CALLING_ABI_MS,
> +  MS_ABI = AARCH64_CALLING_ABI_MS
> +};
> +

How is MS_ABI used in practice?  When I apply locally, it looks like the two 
non-x86 uses are in:

gcc/config/mingw/mingw32.h:  if (TARGET_64BIT && ix86_abi == MS_ABI)
\
gcc/config/mingw/winnt-d.cc:  if (TARGET_64BIT && ix86_abi == MS_ABI)

But these should fail to build if used, because AFAICT there's no definition of 
ix86_abi on aarch64.

The first match is in EXTRA_OS_CPP_BUILTINS, but I couldn't see any uses of 
that in aarch64 code, which would explain why everything builds OK.
The winnt-d.cc occurence looks like it would break the build with the D 
frontend enabled though.

Are there two distinct ABIs for aarch64-*-mingw*?  Or are these distinctions 
ignored on aarch64 and just retained for compatibility?

If there are two distinct ABIs then we should probably add them to 
aarch64_arm_pcs.  But if there is only a single ABI, we should probably avoid 
adding calling_abi altogether and instead provide a macro like TARGET_IS_MS_ABI 
that aarch64 and x86 can define differently.

(To be clear, I don't think the different handling of x18 matters for the PCS 
classification.  That's an orthogonal platform property that applies to all PCS 
variants equally.  No-one had suggested otherwise, just wanted to say in case. 
:-) )

Thanks,
Richard



Re: [PATCH, v2] Fix fortran/PR114024

2024-02-23 Thread Steve Kargl
On Fri, Feb 23, 2024 at 10:15:17PM +0100, Harald Anlauf wrote:
> Hi Steve, all,
> 
> here's an updated patch with an enhanced testcase that also
> checks MOLD= besides SOURCE=.
> 
> Regtested on x86_64-pc-linux-gnu.  Is it OK for mainline?
> 

>From my viewpoint, yes.

Thanks for finding a better solution than I had conjured.

-- 
Steve


[PATCH v1 02/13] aarch64: The aarch64-w64-mingw32 target implements

2024-02-23 Thread Evgeny Karpov
Hi Martin,

Thank you for the clarification regarding the vararg implementation.
It is correct. The work is still in progress and will be included in
a later patch series.

ARM64EC is a separate work, which is outside the scope of the current
contribution plan.

Regards,
Evgeny

-Original Message-
Friday, February 23, 2024 9:37 PM
Martin Storsjö wrote: 

On Fri, 23 Feb 2024, Richard Sandiford wrote:

> Are there two distinct ABIs for aarch64-*-mingw*?  Or are these 
> distinctions ignored on aarch64 and just retained for compatibility?

(As far as I know, the correct Windows vararg calling convention is not yet 
implemented in this patch series, but would be a later addition.)

ARM64EC is an entirely different thing though, both out of scope for this 
patchset, and also a much bigger thing than an MS_ABI attribute.

// Martin



Re: [PATCH v1 01/13] Introduce aarch64-w64-mingw32 target

2024-02-23 Thread Fangrui Song
+Martin who may have an opinion
(https://github.com/mstorsjo/llvm-mingw supports aarch64)

On Fri, Feb 23, 2024 at 6:15 AM Evgeny Karpov
 wrote:
>
> Hi Andrew and Richard,
>
> Thank you for pointing out there's no need for a 64-bit ISA and the 
> big-endian target.
> These changes will be addressed in v2.
>
> Regards,
> Evgeny
>
> -Original Message-
> Thursday, February 22, 2024 12:33 PM
> Richard Earnshaw (lists)  wrote:
> >
> +aarch64*-*-mingw*)
>
> Other targets are a bit inconsistent here as well, but, as Andrew mentioned, 
> if you don't want to handle big-endian, it might be better to match 
> aarch64-*-mingw* here.
>
>
> -Original Message-
> Wednesday, February 21, 2024 7:23 PM
> Andrew Pinski wrote:
>
> > need_64bit_isa=yes
>
> This is not needed in the patch as it is only used for x86_64 targets.
>
> Should you make sure nobody specifies the big-endian target:
> aarch64_be-w64-mingw32  ?
>


-- 
宋方睿


[PATCH] RISC-V: Update test expectancies with recent scheduler change

2024-02-23 Thread Edwin Lu
Given the recent change with adding the scheduler pipeline descriptions,
many scan-dump failures emerged. Relax the expected assembler output
conditions on the affected tests to reduce noise.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c: Bound testcase
assembly matching
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-1.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-2.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-3.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-4.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-5.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-6.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-7.c: Ditto
* gcc.target/riscv/rvv/base/vcreate.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-30.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-31.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_single_block-17.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_single_block-18.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-11.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-12.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-4.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-5.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-6.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-7.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-8.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-9.c: Ditto

Signed-off-by: Edwin Lu 
---
 .../costmodel/riscv/rvv/dynamic-lmul4-6.c |  3 +-
 .../costmodel/riscv/rvv/dynamic-lmul4-8.c |  3 +-
 .../gcc.target/riscv/rvv/base/pr108185-1.c| 25 +
 .../gcc.target/riscv/rvv/base/pr108185-2.c| 25 +
 .../gcc.target/riscv/rvv/base/pr108185-3.c| 25 +
 .../gcc.target/riscv/rvv/base/pr108185-4.c| 25 +
 .../gcc.target/riscv/rvv/base/pr108185-5.c| 25 +
 .../gcc.target/riscv/rvv/base/pr108185-6.c| 25 +
 .../gcc.target/riscv/rvv/base/pr108185-7.c| 25 +
 .../gcc.target/riscv/rvv/base/vcreate.c   | 13 +++--
 .../riscv/rvv/vsetvl/vlmax_back_prop-30.c |  8 --
 .../riscv/rvv/vsetvl/vlmax_back_prop-31.c |  8 --
 .../riscv/rvv/vsetvl/vlmax_single_block-17.c  | 28 ++-
 .../riscv/rvv/vsetvl/vlmax_single_block-18.c  | 14 --
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-10.c  |  9 --
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-11.c  |  3 +-
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-12.c  |  3 +-
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-4.c   |  8 --
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-5.c   |  8 --
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-6.c   |  8 --
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-7.c   |  8 --
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-8.c   |  8 --
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-9.c   |  8 --
 23 files changed, 238 insertions(+), 77 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c
index d2766f5984c..1cb0888f9d8 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c
@@ -20,7 +20,8 @@ foo (uint8_t *restrict a, uint8_t *restrict b, int n)
 }
 
 /* { dg-final { scan-assembler {e8,m4} } } */
-/* { dg-final { scan-assembler-times {csrr} 1 } } */
+/* { dg-final { scan-assembler-bound {csrr} >= 1 } } */
+/* { dg-final { scan-assembler-bound {csrr} <= 3 } } */
 /* Since we don't support VLA SLP for LMUL = 8, dynamic LMUL cost model start 
from LMUL = 4.  */
 /* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it 
has unexpected spills" "vect" } } */
 /* { dg-final { scan-tree-dump-not "Maximum lmul = 8" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c
index 362c49f1411..0d644fc69bf 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c
@@ -29,7 +29,8 @@ foo (uint8_t *restrict a, uint8_t *restrict b, int n)
 }
 
 /* { dg-final { scan-assembler {e8,m4} } } */
-/* { dg-final { scan-assembler-times {csrr} 1 } } */
+/* { dg-final { scan-assembler-bound {csrr} >= 1 } } */
+/* { dg-final { scan-assembler-bound {csrr} <= 3 } } */
 /* Since we don't support VLA SLP for LMUL = 8, dynamic LMUL cost model start 
from LMUL = 4.  */
 /* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it 
has unexpected spills" "vect" } } */
 /* { dg-final { scan-tree-dump-n

[PATCH 1/2] aarch64: Use fmov s/d/hN, FP_CST for some vector CST [PR113856]

2024-02-23 Thread Andrew Pinski
Aarch64 has a way to form some floating point CSTs via the fmov instructions,
these instructions also zero out the upper parts of the registers so they can
be used for vector CSTs that have have one non-zero constant that would be able
to formed via the fmov in the first element.

This implements this "small" optimization so these vector cst don't need to do
loads from memory.

Built and tested on aarch64-linux-gnu with no regressions.

PR target/113856

gcc/ChangeLog:

* config/aarch64/aarch64.cc (struct simd_immediate_info):
Add FMOV_SDH to insn_type. For scalar_float_mode constructor
add insn_in.
(aarch64_simd_valid_immediate): Catch `{fp, 0...}` vector_cst
and return a simd_immediate_info which uses FMOV_SDH.
(aarch64_output_simd_mov_immediate): Support outputting
fmov for FMOV_SDH.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/fmov-zero-cst-1.c: New test.
* gcc.target/aarch64/fmov-zero-cst-2.c: New test.

Signed-off-by: Andrew Pinski 
---
 gcc/config/aarch64/aarch64.cc | 48 ++---
 .../gcc.target/aarch64/fmov-zero-cst-1.c  | 52 +++
 .../gcc.target/aarch64/fmov-zero-cst-2.c  | 19 +++
 3 files changed, 111 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-2.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 5dd0814f198..c4386591a9b 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -126,11 +126,11 @@ constexpr auto AARCH64_STATE_OUT = 1U << 2;
 /* Information about a legitimate vector immediate operand.  */
 struct simd_immediate_info
 {
-  enum insn_type { MOV, MVN, INDEX, PTRUE };
+  enum insn_type { MOV, FMOV_SDH, MVN, INDEX, PTRUE };
   enum modifier_type { LSL, MSL };
 
   simd_immediate_info () {}
-  simd_immediate_info (scalar_float_mode, rtx);
+  simd_immediate_info (scalar_float_mode, rtx, insn_type = MOV);
   simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT,
   insn_type = MOV, modifier_type = LSL,
   unsigned int = 0);
@@ -145,7 +145,7 @@ struct simd_immediate_info
 
   union
   {
-/* For MOV and MVN.  */
+/* For MOV, FMOV_SDH and MVN.  */
 struct
 {
   /* The value of each element.  */
@@ -173,9 +173,10 @@ struct simd_immediate_info
 /* Construct a floating-point immediate in which each element has mode
ELT_MODE_IN and value VALUE_IN.  */
 inline simd_immediate_info
-::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
-  : elt_mode (elt_mode_in), insn (MOV)
+::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in, insn_type 
insn_in)
+  : elt_mode (elt_mode_in), insn (insn_in)
 {
+  gcc_assert (insn_in == MOV || insn_in == FMOV_SDH);
   u.mov.value = value_in;
   u.mov.modifier = LSL;
   u.mov.shift = 0;
@@ -22932,6 +22933,35 @@ aarch64_simd_valid_immediate (rtx op, 
simd_immediate_info *info,
  return true;
}
 }
+  /* See if we can use fmov d0/s0/h0 ... for the constant. */
+  if (n_elts >= 1
+  && (vec_flags & VEC_ADVSIMD)
+  && is_a  (elt_mode, &elt_float_mode)
+  && !CONST_VECTOR_DUPLICATE_P (op))
+{
+  rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
+  if (aarch64_float_const_zero_rtx_p (elt)
+ || aarch64_float_const_representable_p (elt))
+   {
+ bool valid = true;
+ for (unsigned int i = 1; i < n_elts; i++)
+   {
+ rtx elt1 = CONST_VECTOR_ENCODED_ELT (op, i);
+ if (!aarch64_float_const_zero_rtx_p (elt1))
+   {
+ valid = false;
+ break;
+   }
+   }
+ if (valid)
+   {
+ if (info)
+   *info = simd_immediate_info (elt_float_mode, elt,
+simd_immediate_info::FMOV_SDH);
+ return true;
+   }
+   }
+}
 
   /* If all elements in an SVE vector have the same value, we have a free
  choice between using the element mode and using the container mode.
@@ -25121,7 +25151,8 @@ aarch64_output_simd_mov_immediate (rtx const_vector, 
unsigned width,
 
   if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
 {
-  gcc_assert (info.insn == simd_immediate_info::MOV
+  gcc_assert ((info.insn == simd_immediate_info::MOV
+  || info.insn == simd_immediate_info::FMOV_SDH)
  && info.u.mov.shift == 0);
   /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
 move immediate path.  */
@@ -25134,8 +25165,9 @@ aarch64_output_simd_mov_immediate (rtx const_vector, 
unsigned width,
  real_to_decimal_for_mode (float_buf,
CONST_DOUBLE_REAL_VALUE (info.u.mov.value),
buf_size, buf_size, 1, info

[PATCH 2/2] aarch64: Support `{1.0f, 1.0f, 0.0, 0.0}` CST forming with fmov with a smaller vector type.

2024-02-23 Thread Andrew Pinski
This enables construction of V4SF CST like `{1.0f, 1.0f, 0.0f, 0.0f}`
(and other fp enabled CSTs) by using `fmov v0.2s, 1.0` as the instruction
is designed to zero out the other bits.
This is a small extension on top of the code that creates fmov for the case
where the all but the first element is non-zero.

Built and tested for aarch64-linux-gnu with no regressions.

PR target/113856

gcc/ChangeLog:

* config/aarch64/aarch64.cc (simd_immediate_info): Add bool to the
float mode constructor. Document modifier field for FMOV_SDH.
(aarch64_simd_valid_immediate): Recognize where the first half
of the const float vect is the same.
(aarch64_output_simd_mov_immediate): Handle the case where insn is
FMOV_SDH and modifier is MSL.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/fmov-zero-cst-3.c: New test.

Signed-off-by: Andrew Pinski 
---
 gcc/config/aarch64/aarch64.cc | 34 ---
 .../gcc.target/aarch64/fmov-zero-cst-3.c  | 28 +++
 2 files changed, 57 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index c4386591a9b..89bd0c5e5a6 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -130,7 +130,7 @@ struct simd_immediate_info
   enum modifier_type { LSL, MSL };
 
   simd_immediate_info () {}
-  simd_immediate_info (scalar_float_mode, rtx, insn_type = MOV);
+  simd_immediate_info (scalar_float_mode, rtx, insn_type = MOV, bool = false);
   simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT,
   insn_type = MOV, modifier_type = LSL,
   unsigned int = 0);
@@ -153,6 +153,8 @@ struct simd_immediate_info
 
   /* The kind of shift modifier to use, and the number of bits to shift.
 This is (LSL, 0) if no shift is needed.  */
+  /* For FMOV_SDH, LSL says it is a single while MSL
+says if it is either .4h/.2s fmov. */
   modifier_type modifier;
   unsigned int shift;
 } mov;
@@ -173,12 +175,12 @@ struct simd_immediate_info
 /* Construct a floating-point immediate in which each element has mode
ELT_MODE_IN and value VALUE_IN.  */
 inline simd_immediate_info
-::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in, insn_type 
insn_in)
+::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in, insn_type 
insn_in, bool firsthalfsame)
   : elt_mode (elt_mode_in), insn (insn_in)
 {
   gcc_assert (insn_in == MOV || insn_in == FMOV_SDH);
   u.mov.value = value_in;
-  u.mov.modifier = LSL;
+  u.mov.modifier = firsthalfsame ? MSL : LSL;
   u.mov.shift = 0;
 }
 
@@ -22944,10 +22946,23 @@ aarch64_simd_valid_immediate (rtx op, 
simd_immediate_info *info,
  || aarch64_float_const_representable_p (elt))
{
  bool valid = true;
+ bool firsthalfsame = false;
  for (unsigned int i = 1; i < n_elts; i++)
{
  rtx elt1 = CONST_VECTOR_ENCODED_ELT (op, i);
  if (!aarch64_float_const_zero_rtx_p (elt1))
+   {
+ if (i == 1)
+   firsthalfsame = true;
+ if (!firsthalfsame
+ || i >= n_elts/2
+ || !rtx_equal_p (elt, elt1))
+   {
+ valid = false;
+ break;
+   }
+   }
+ else if (firsthalfsame && i < n_elts/2)
{
  valid = false;
  break;
@@ -22957,7 +22972,8 @@ aarch64_simd_valid_immediate (rtx op, 
simd_immediate_info *info,
{
  if (info)
*info = simd_immediate_info (elt_float_mode, elt,
-simd_immediate_info::FMOV_SDH);
+simd_immediate_info::FMOV_SDH,
+firsthalfsame);
  return true;
}
}
@@ -25165,8 +25181,16 @@ aarch64_output_simd_mov_immediate (rtx const_vector, 
unsigned width,
  real_to_decimal_for_mode (float_buf,
CONST_DOUBLE_REAL_VALUE (info.u.mov.value),
buf_size, buf_size, 1, info.elt_mode);
- if (info.insn == simd_immediate_info::FMOV_SDH)
+ if (info.insn == simd_immediate_info::FMOV_SDH
+ && info.u.mov.modifier == simd_immediate_info::LSL)
snprintf (templ, sizeof (templ), "fmov\t%%%c0, %s", element_char, 
float_buf);
+ else if (info.insn == simd_immediate_info::FMOV_SDH
+ && info.u.mov.modifier == simd_immediate_info::MSL)
+   {
+ gcc_assert (element_char != 'd');
+ gcc_assert (lane_count > 2);
+ snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s", 
lane_count/2, element_char, float_

[PATCH] arm: Support -mfdpic for more targets

2024-02-23 Thread Fangrui Song
From: Fangrui Song 

Targets that are not arm*-*-uclinuxfdpiceabi can use -S -mfdpic, but -c
-mfdpic does not pass --fdpic to gas.  This is an unnecessary
restriction.  Just define the ASM_SPEC in bpabi.h.

Additionally, use armelf[b]_linux_fdpiceabi emulations for -mfdpic in
linux-eabi.h.  This will allow a future musl fdpic port to use the
desired BFD emulation.

gcc/ChangeLog:

* config/arm/bpabi.h (TARGET_FDPIC_ASM_SPEC): Transform -mfdpic.
* config/arm/linux-eabi.h (TARGET_FDPIC_LINKER_EMULATION): Define.
(SUBTARGET_EXTRA_LINK_SPEC): Use TARGET_FDPIC_LINKER_EMULATION
if -mfdpic.
---
 gcc/config/arm/bpabi.h  | 2 +-
 gcc/config/arm/linux-eabi.h | 5 -
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
index 7a279f3ed3c..6778be1a8bf 100644
--- a/gcc/config/arm/bpabi.h
+++ b/gcc/config/arm/bpabi.h
@@ -55,7 +55,7 @@
 #define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*"\
   "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
 
-#define TARGET_FDPIC_ASM_SPEC ""
+#define TARGET_FDPIC_ASM_SPEC "%{mfdpic: --fdpic}"
 
 #define BE8_LINK_SPEC  \
   "%{!r:%{!mbe32:%:be8_linkopt(%{mlittle-endian:little}"   \
diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h
index eef791f6a02..0c5c58e4928 100644
--- a/gcc/config/arm/linux-eabi.h
+++ b/gcc/config/arm/linux-eabi.h
@@ -46,12 +46,15 @@
 #undef  TARGET_LINKER_EMULATION
 #if TARGET_BIG_ENDIAN_DEFAULT
 #define TARGET_LINKER_EMULATION "armelfb_linux_eabi"
+#define TARGET_FDPIC_LINKER_EMULATION "armelfb_linux_fdpiceabi"
 #else
 #define TARGET_LINKER_EMULATION "armelf_linux_eabi"
+#define TARGET_FDPIC_LINKER_EMULATION "armelf_linux_fdpiceabi"
 #endif
 
 #undef  SUBTARGET_EXTRA_LINK_SPEC
-#define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION
+#define SUBTARGET_EXTRA_LINK_SPEC " -m %{mfdpic: " \
+  TARGET_FDPIC_LINKER_EMULATION ";:" TARGET_LINKER_EMULATION "}"
 
 /* GNU/Linux on ARM currently supports three dynamic linkers:
- ld-linux.so.2 - for the legacy ABI
-- 
2.44.0.rc1.240.g4c46232300-goog



[PATCH] bitint: Handle VIEW_CONVERT_EXPRs between large/huge BITINT_TYPEs and VECTOR/COMPLEX_TYPE etc. [PR114073]

2024-02-23 Thread Jakub Jelinek
Hi!

The following patch implements support for VIEW_CONVERT_EXPRs from/to
large/huge _BitInt to/from vector or complex types or anything else but
integral/pointer types which doesn't need to live in memory.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-02-24  Jakub Jelinek  

PR middle-end/114073
* gimple-lower-bitint.cc (bitint_large_huge::lower_stmt): Handle
VIEW_CONVERT_EXPRs between large/huge _BitInt and non-integer/pointer
types like vector or complex types.
(gimple_lower_bitint): Don't merge VIEW_CONVERT_EXPRs to non-integral
types.  Fix up VIEW_CONVERT_EXPR handling.  Allow merging
VIEW_CONVERT_EXPR from non-integral/pointer types with a store.

* gcc.dg/bitint-93.c: New test.

--- gcc/gimple-lower-bitint.cc.jj   2024-02-23 11:36:06.977015730 +0100
+++ gcc/gimple-lower-bitint.cc  2024-02-23 18:21:09.282751377 +0100
@@ -5305,27 +5305,21 @@ bitint_large_huge::lower_stmt (gimple *s
   else if (TREE_CODE (TREE_TYPE (rhs1)) == BITINT_TYPE
   && bitint_precision_kind (TREE_TYPE (rhs1)) >= bitint_prec_large
   && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
-  || POINTER_TYPE_P (TREE_TYPE (lhs
+  || POINTER_TYPE_P (TREE_TYPE (lhs))
+  || gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR))
{
  final_cast_p = true;
- if (TREE_CODE (TREE_TYPE (lhs)) == INTEGER_TYPE
- && TYPE_PRECISION (TREE_TYPE (lhs)) > MAX_FIXED_MODE_SIZE
+ if (((TREE_CODE (TREE_TYPE (lhs)) == INTEGER_TYPE
+   && TYPE_PRECISION (TREE_TYPE (lhs)) > MAX_FIXED_MODE_SIZE)
+  || (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+  && !POINTER_TYPE_P (TREE_TYPE (lhs
  && gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR)
{
  /* Handle VIEW_CONVERT_EXPRs to not generally supported
 huge INTEGER_TYPEs like uint256_t or uint512_t.  These
 are usually emitted from memcpy folding and backends
-support moves with them but that is usually it.  */
- if (TREE_CODE (rhs1) == INTEGER_CST)
-   {
- rhs1 = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
-rhs1);
- gcc_assert (rhs1 && TREE_CODE (rhs1) == INTEGER_CST);
- gimple_assign_set_rhs1 (stmt, rhs1);
- gimple_assign_set_rhs_code (stmt, INTEGER_CST);
- update_stmt (stmt);
- return;
-   }
+support moves with them but that is usually it.
+Similarly handle VCEs to vector/complex types etc.  */
  gcc_assert (TREE_CODE (rhs1) == SSA_NAME);
  if (SSA_NAME_IS_DEFAULT_DEF (rhs1)
  && (!SSA_NAME_VAR (rhs1) || VAR_P (SSA_NAME_VAR (rhs1
@@ -5376,6 +5370,18 @@ bitint_large_huge::lower_stmt (gimple *s
}
}
}
+  else if (TREE_CODE (TREE_TYPE (lhs)) == BITINT_TYPE
+  && bitint_precision_kind (TREE_TYPE (lhs)) >= bitint_prec_large
+  && !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
+  && !POINTER_TYPE_P (TREE_TYPE (rhs1))
+  && gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR)
+   {
+ int part = var_to_partition (m_map, lhs);
+ gcc_assert (m_vars[part] != NULL_TREE);
+ lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (rhs1), m_vars[part]);
+ insert_before (gimple_build_assign (lhs, rhs1));
+ return;
+   }
 }
   if (gimple_store_p (stmt))
 {
@@ -5411,6 +5417,28 @@ bitint_large_huge::lower_stmt (gimple *s
  case IMAGPART_EXPR:
lower_cplxpart_stmt (lhs, g);
goto handled;
+ case VIEW_CONVERT_EXPR:
+   {
+ tree rhs1 = gimple_assign_rhs1 (g);
+ rhs1 = TREE_OPERAND (rhs1, 0);
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
+ && !POINTER_TYPE_P (TREE_TYPE (rhs1)))
+   {
+ tree ltype = TREE_TYPE (rhs1);
+ addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (lhs));
+ ltype
+   = build_qualified_type (ltype,
+   TYPE_QUALS (TREE_TYPE (lhs))
+   | ENCODE_QUAL_ADDR_SPACE (as));
+ lhs = build1 (VIEW_CONVERT_EXPR, ltype, lhs);
+ gimple_assign_set_lhs (stmt, lhs);
+ gimple_assign_set_rhs1 (stmt, rhs1);
+ gimple_assign_set_rhs_code (stmt, TREE_CODE (rhs1));
+ update_stmt (stmt);
+ return;
+   }
+   }
+   break;
  default:
break;
  }
@@ -6235,6 +6263,14 

[PATCH] Use HOST_WIDE_INT_{C,UC,0,0U,1,1U} macros some more

2024-02-23 Thread Jakub Jelinek
Hi!

I've searched for some uses of (HOST_WIDE_INT) constant or (unsigned
HOST_WIDE_INT) constant and turned them into uses of the appropriate
macros.
THere are quite a few cases in non-i386 backends but I've left that out
for now.
The only behavior change is in build_replicated_int_cst where the
left shift was done in HOST_WIDE_INT type but assigned to unsigned
HOST_WIDE_INT, which I've changed into unsigned HOST_WIDE_INT shift.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-02-24  Jakub Jelinek  

gcc/
* builtins.cc (fold_builtin_isascii): Use HOST_WIDE_INT_UC macro.
* combine.cc (make_field_assignment): Use HOST_WIDE_INT_1U macro.
* double-int.cc (double_int::mask): Use HOST_WIDE_INT_UC macros.
* genattrtab.cc (attr_alt_complement): Use HOST_WIDE_INT_1 macro.
(mk_attr_alt): Use HOST_WIDE_INT_0 macro.
* genautomata.cc (bitmap_set_bit, CLEAR_BIT): Use HOST_WIDE_INT_1
macros.
* ipa-strub.cc (can_strub_internally_p): Use HOST_WIDE_INT_1 macro.
* loop-iv.cc (implies_p): Use HOST_WIDE_INT_1U macro.
* pretty-print.cc (test_pp_format): Use HOST_WIDE_INT_C and
HOST_WIDE_INT_UC macros.
* rtlanal.cc (nonzero_bits1): Use HOST_WIDE_INT_UC macro.
* tree.cc (build_replicated_int_cst): Use HOST_WIDE_INT_1U macro.
* tree.h (DECL_OFFSET_ALIGN): Use HOST_WIDE_INT_1U macro.
* tree-ssa-structalias.cc (dump_varinfo): Use ~HOST_WIDE_INT_0U
macros.
* wide-int.cc (divmod_internal_2): Use HOST_WIDE_INT_1U macro.
* config/i386/constraints.md (define_constraint "L"): Use
HOST_WIDE_INT_C macro.
* config/i386/i386.md (movabsq split peephole2): Use HOST_WIDE_INT_C
macro.
(movl + movb peephole2): Likewise.
* config/i386/predicates.md (x86_64_zext_immediate_operand): Likewise.
(const_32bit_mask): Likewise.
gcc/objc/
* objc-encoding.cc (encode_array): Use HOST_WIDE_INT_0 macros.

--- gcc/builtins.cc.jj  2024-02-06 08:43:14.84351 +0100
+++ gcc/builtins.cc 2024-02-23 22:02:48.245611359 +0100
@@ -9326,7 +9326,7 @@ fold_builtin_isascii (location_t loc, tr
   /* Transform isascii(c) -> ((c & ~0x7f) == 0).  */
   arg = fold_build2 (BIT_AND_EXPR, integer_type_node, arg,
 build_int_cst (integer_type_node,
-   ~ (unsigned HOST_WIDE_INT) 0x7f));
+   ~ HOST_WIDE_INT_UC (0x7f)));
   return fold_build2_loc (loc, EQ_EXPR, integer_type_node,
  arg, integer_zero_node);
 }
--- gcc/combine.cc.jj   2024-01-03 11:51:34.028696534 +0100
+++ gcc/combine.cc  2024-02-23 22:03:36.895923405 +0100
@@ -9745,7 +9745,7 @@ make_field_assignment (rtx x)
   if (width >= HOST_BITS_PER_WIDE_INT)
ze_mask = -1;
   else
-   ze_mask = ((unsigned HOST_WIDE_INT)1 << width) - 1;
+   ze_mask = (HOST_WIDE_INT_1U << width) - 1;
 
   /* Complete overlap.  We can remove the source AND.  */
   if ((and_mask & ze_mask) == ze_mask)
--- gcc/double-int.cc.jj2024-01-03 11:51:42.086584698 +0100
+++ gcc/double-int.cc   2024-02-23 22:04:30.586164187 +0100
@@ -671,14 +671,14 @@ double_int::mask (unsigned prec)
   if (prec > HOST_BITS_PER_WIDE_INT)
 {
   prec -= HOST_BITS_PER_WIDE_INT;
-  m = ((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1;
+  m = (HOST_WIDE_INT_UC (2) << (prec - 1)) - 1;
   mask.high = (HOST_WIDE_INT) m;
   mask.low = ALL_ONES;
 }
   else
 {
   mask.high = 0;
-  mask.low = prec ? ((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1 : 0;
+  mask.low = prec ? (HOST_WIDE_INT_UC (2) << (prec - 1)) - 1 : 0;
 }
 
   return mask;
--- gcc/genattrtab.cc.jj2024-01-03 11:51:38.125639672 +0100
+++ gcc/genattrtab.cc   2024-02-23 22:05:38.043210294 +0100
@@ -2392,7 +2392,7 @@ static rtx
 attr_alt_complement (rtx s)
 {
   return attr_rtx (EQ_ATTR_ALT, XWINT (s, 0),
-   ((HOST_WIDE_INT) 1) - XWINT (s, 1));
+  HOST_WIDE_INT_1 - XWINT (s, 1));
 }
 
 /* Return EQ_ATTR_ALT expression representing set containing elements set
@@ -2401,7 +2401,7 @@ attr_alt_complement (rtx s)
 static rtx
 mk_attr_alt (alternative_mask e)
 {
-  return attr_rtx (EQ_ATTR_ALT, (HOST_WIDE_INT) e, (HOST_WIDE_INT) 0);
+  return attr_rtx (EQ_ATTR_ALT, (HOST_WIDE_INT) e, HOST_WIDE_INT_0);
 }
 
 /* Given an expression, see if it can be simplified for a particular insn
--- gcc/genautomata.cc.jj   2024-01-03 11:51:32.524717408 +0100
+++ gcc/genautomata.cc  2024-02-23 22:07:04.667985357 +0100
@@ -3416,13 +3416,13 @@ finish_alt_states (void)
 
 /* Set bit number bitno in the bit string.  The macro is not side
effect proof.  */
-#define bitmap_set_bit(bitstring, bitno)   
  \
+#define bitmap_set_bit(bitstring, bitno) \
   ((bitstring)[(bitno) / (sizeof (*(bitstrin