[PATCH] RISC-V: Add vmsge vv C api tests

2023-02-13 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/vmsge_vv-1.c: New test.
* gcc.target/riscv/rvv/base/vmsge_vv-2.c: New test.
* gcc.target/riscv/rvv/base/vmsge_vv-3.c: New test.
* gcc.target/riscv/rvv/base/vmsge_vv_m-1.c: New test.
* gcc.target/riscv/rvv/base/vmsge_vv_m-2.c: New test.
* gcc.target/riscv/rvv/base/vmsge_vv_m-3.c: New test.
* gcc.target/riscv/rvv/base/vmsge_vv_mu-1.c: New test.
* gcc.target/riscv/rvv/base/vmsge_vv_mu-2.c: New test.
* gcc.target/riscv/rvv/base/vmsge_vv_mu-3.c: New test.
* gcc.target/riscv/rvv/base/vmsgeu_vv-1.c: New test.
* gcc.target/riscv/rvv/base/vmsgeu_vv-2.c: New test.
* gcc.target/riscv/rvv/base/vmsgeu_vv-3.c: New test.
* gcc.target/riscv/rvv/base/vmsgeu_vv_m-1.c: New test.
* gcc.target/riscv/rvv/base/vmsgeu_vv_m-2.c: New test.
* gcc.target/riscv/rvv/base/vmsgeu_vv_m-3.c: New test.
* gcc.target/riscv/rvv/base/vmsgeu_vv_mu-1.c: New test.
* gcc.target/riscv/rvv/base/vmsgeu_vv_mu-2.c: New test.
* gcc.target/riscv/rvv/base/vmsgeu_vv_mu-3.c: New test.

---
 .../gcc.target/riscv/rvv/base/vmsge_vv-1.c| 160 ++
 .../gcc.target/riscv/rvv/base/vmsge_vv-2.c| 160 ++
 .../gcc.target/riscv/rvv/base/vmsge_vv-3.c| 160 ++
 .../gcc.target/riscv/rvv/base/vmsge_vv_m-1.c  | 160 ++
 .../gcc.target/riscv/rvv/base/vmsge_vv_m-2.c  | 160 ++
 .../gcc.target/riscv/rvv/base/vmsge_vv_m-3.c  | 160 ++
 .../gcc.target/riscv/rvv/base/vmsge_vv_mu-1.c | 160 ++
 .../gcc.target/riscv/rvv/base/vmsge_vv_mu-2.c | 160 ++
 .../gcc.target/riscv/rvv/base/vmsge_vv_mu-3.c | 160 ++
 .../gcc.target/riscv/rvv/base/vmsgeu_vv-1.c   | 160 ++
 .../gcc.target/riscv/rvv/base/vmsgeu_vv-2.c   | 160 ++
 .../gcc.target/riscv/rvv/base/vmsgeu_vv-3.c   | 160 ++
 .../gcc.target/riscv/rvv/base/vmsgeu_vv_m-1.c | 160 ++
 .../gcc.target/riscv/rvv/base/vmsgeu_vv_m-2.c | 160 ++
 .../gcc.target/riscv/rvv/base/vmsgeu_vv_m-3.c | 160 ++
 .../riscv/rvv/base/vmsgeu_vv_mu-1.c   | 160 ++
 .../riscv/rvv/base/vmsgeu_vv_mu-2.c   | 160 ++
 .../riscv/rvv/base/vmsgeu_vv_mu-3.c   | 160 ++
 18 files changed, 2880 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsge_vv-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsge_vv-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsge_vv-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsge_vv_m-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsge_vv_m-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsge_vv_m-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsge_vv_mu-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsge_vv_mu-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsge_vv_mu-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsgeu_vv-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsgeu_vv-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsgeu_vv-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsgeu_vv_m-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsgeu_vv_m-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsgeu_vv_m-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsgeu_vv_mu-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsgeu_vv_mu-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmsgeu_vv_mu-3.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vmsge_vv-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/vmsge_vv-1.c
new file mode 100644
index 000..a3cb8bd02ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vmsge_vv-1.c
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+vbool64_t test___riscv_vmsge_vv_i8mf8_b64(vbool64_t mask,vint8mf8_t 
op1,vint8mf8_t op2,size_t vl)
+{
+return __riscv_vmsge_vv_i8mf8_b64(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmsge_vv_i8mf4_b32(vbool32_t mask,vint8mf4_t 
op1,vint8mf4_t op2,size_t vl)
+{
+return __riscv_vmsge_vv_i8mf4_b32(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmsge_vv_i8mf2_b16(vbool16_t mask,vint8mf2_t 
op1,vint8mf2_t op2,size_t vl)
+{
+return __riscv_vmsge_vv_i8mf2_b16(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmsge_vv_i8m1_b8(vbool8_t mask,vint8m1_t op1,vint8m1_t 
op2,size_t vl)
+{
+return __riscv_vmsge_vv_i8m1_b8(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmsge_vv_i8m2_b4(vbool4_t mask,vint8m2_t op1,vint8m2_t 
op2

Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread Richard Biener via Gcc-patches
On Sat, 11 Feb 2023, juzhe.zh...@rivai.ai wrote:

> Thanks for contributing this.
> Hi, Richard. Can you help us with this issue?
> In RVV, we have vbool8_t (VNx8BImode), vbool16_t (VNx4BImode), vbool32_t 
> (VNx2BImode), vbool64_t (VNx1BImode)
> Since we are using 1bit-mask which is 1-BOOL occupy 1bit.
> According to RVV ISA, we adjust these modes as follows:
> 
> VNx8BImode poly (8,8) NUNTTS (each nunits is 1bit mask)
> VNx4BImode poly(4,4) NUNTTS (each nunits is 1bit mask)
> VNx2BImode poly(2,2) NUNTTS (each nunits is 1bit mask)
> VNx1BImode poly (1,1) NUNTTS (each nunits is 1bit mask)

So how's VNx1BImode laid out for N == 2?  Is that still a single
byte and two consecutive bits?  I suppose so.

But then GET_MODE_PRECISION (GET_MODE_INNER (..)) should always be 1?

I'm not sure what GET_MODE_PRECISION of the vector mode itself
should be here, but then I wonder ...

> If we tried GET_MODE_BITSIZE or GET_MODE_NUNITS to get value, their value are 
> different.
> However, If we tried GET_MODE_SIZE of these modes, they are the same (poly 
> (1,1)).
> Such scenario make these tied together and gives the wrong code gen since 
> their bitsize are different.
> Consider the case as this:
> #include "riscv_vector.h"
> void foo5_3 (int32_t * restrict in, int32_t * restrict out, size_t n, int 
> cond)
> {
>   vint8m1_t v = *(vint8m1_t*)in;
>   *(vint8m1_t*)out = v;  vbool16_t v4 = *(vbool16_t *)in;
>   *(vbool16_t *)(out + 300) = v4;
>   vbool8_t v3 = *(vbool8_t*)in;
>   *(vbool8_t*)(out + 200) = v3;
> }
> The second vbool8_t load (vlm.v) is missing. Since GCC gives "v3 = 
> VIEW_CONVERT (vbool8_t) v4" in gimple.
> We failed to fix it in RISC-V backend. Can you help us with this? Thanks.

... why for the loads the "padding" is not loaded?  The above testcase
is probably more complicated than necessary as well?

Thanks,
Richard.
 
>
> juzhe.zh...@rivai.ai
>  
> From: incarnation.p.lee
> Date: 2023-02-11 16:46
> To: gcc-patches
> CC: juzhe.zhong; kito.cheng; rguenther; Pan Li
> Subject: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
> From: Pan Li 
>  
> Fix the bug for mode tieable of the rvv bool types. The vbool*_t
> cannot be tied as the actually load/store size is determinated by
> the vl. The mode size of rvv bool types are also adjusted for the
> underlying optimization pass. The rvv bool type is vbool*_t, aka
> vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t, vbool32_t, and
> vbool64_t.
>  
> PR 108185
> PR 108654
>  
> gcc/ChangeLog:
>  
> * config/riscv/riscv-modes.def (ADJUST_BYTESIZE):
> * config/riscv/riscv.cc (riscv_v_adjust_bytesize):
> (riscv_modes_tieable_p):
> * config/riscv/riscv.h (riscv_v_adjust_bytesize):
> * machmode.h (VECTOR_BOOL_MODE_P):
> * tree-ssa-sccvn.cc (visit_reference_op_load):
>  
> gcc/testsuite/ChangeLog:
>  
> * gcc.target/riscv/pr108185-1.c: New test.
> * gcc.target/riscv/pr108185-2.c: New test.
> * gcc.target/riscv/pr108185-3.c: New test.
> * gcc.target/riscv/pr108185-4.c: New test.
> * gcc.target/riscv/pr108185-5.c: New test.
> * gcc.target/riscv/pr108185-6.c: New test.
> * gcc.target/riscv/pr108185-7.c: New test.
> * gcc.target/riscv/pr108185-8.c: New test.
>  
> Signed-off-by: Pan Li 
> ---
> gcc/config/riscv/riscv-modes.def| 14 ++--
> gcc/config/riscv/riscv.cc   | 34 -
> gcc/config/riscv/riscv.h|  2 +
> gcc/machmode.h  |  3 +
> gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +
> gcc/tree-ssa-sccvn.cc   | 13 +++-
> 13 files changed, 608 insertions(+), 11 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c
>  
> diff --git a/gcc/config/riscv/riscv-modes.def 
> b/gcc/config/riscv/riscv-modes.def
> index d5305efa8a6..cc21d3c83a2 100644
> --- a/gcc/config/riscv/riscv-modes.def
> +++ b/gcc/config/riscv/riscv-modes.def
> @@ -64,13 +64,13 @@ ADJUST_ALIGNMENT (VNx16BI, 1);
> ADJUST_ALIGNMENT (VNx32BI, 1);
> ADJUST_ALIGNMENT (VNx64BI, 1);
> -ADJUST_BYTESIZE (VNx1BI, riscv_vector_chunks * risc

Re: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread juzhe.zh...@rivai.ai
>> But then GET_MODE_PRECISION (GET_MODE_INNER (..)) should always be 1?
Yes, I think so.

Let's explain RVV more clearly.
Let's suppose we have vector-length = 64bits in RVV CPU.
VNx1BI is exactly 1 consecutive bits.
VNx2BI is exactly 2 consecutive bits.
VNx4BI is exactly 4 consecutive bits.
VNx8BI is exactly 8 consecutive bits.

For VNx1BI (vbool64_t ), we load it wich this asm:
vsetvl e8mf8
vlm.v

For VNx2BI (vbool32_t ), we load it wich this asm:
vsetvl e8mf4
vlm.v

For VNx4BI (vbool16_t ), we load it wich this asm:
vsetvl e8mf2
vlm.v

For VNx8BI (vbool8_t ), we load it wich this asm:
vsetvl e8m1
vlm.v

In case of this code sequence:
vbool16_t v4 = *(vbool16_t *)in;
vbool8_t v3 = *(vbool8_t*)in;

Since VNx4BI (vbool16_t ) is smaller than VNx8BI (vbool8_t )
We can't just use the data loaded by VNx4BI (vbool16_t ) in  VNx8BI (vbool8_t ).
But we can use the data loaded by VNx8BI (vbool8_t  ) in  VNx4BI (vbool16_t ).

In this example, GCC thinks data loaded for vbool8_t v3 can be replaced by 
vbool16_t v4 which is already loaded
It's incorrect for RVV.

Maybe @kito can give us more information about RVV ISA if I don't explain it 
clearly.


juzhe.zh...@rivai.ai
 
From: Richard Biener
Date: 2023-02-13 16:07
To: juzhe.zhong
CC: Pan Li; gcc-patches; kito.cheng; richard.sandiford; ams
Subject: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
On Sat, 11 Feb 2023, juzhe.zh...@rivai.ai wrote:
 
> Thanks for contributing this.
> Hi, Richard. Can you help us with this issue?
> In RVV, we have vbool8_t (VNx8BImode), vbool16_t (VNx4BImode), vbool32_t 
> (VNx2BImode), vbool64_t (VNx1BImode)
> Since we are using 1bit-mask which is 1-BOOL occupy 1bit.
> According to RVV ISA, we adjust these modes as follows:
> 
> VNx8BImode poly (8,8) NUNTTS (each nunits is 1bit mask)
> VNx4BImode poly(4,4) NUNTTS (each nunits is 1bit mask)
> VNx2BImode poly(2,2) NUNTTS (each nunits is 1bit mask)
> VNx1BImode poly (1,1) NUNTTS (each nunits is 1bit mask)
 
So how's VNx1BImode laid out for N == 2?  Is that still a single
byte and two consecutive bits?  I suppose so.
 
But then GET_MODE_PRECISION (GET_MODE_INNER (..)) should always be 1?
 
I'm not sure what GET_MODE_PRECISION of the vector mode itself
should be here, but then I wonder ...
 
> If we tried GET_MODE_BITSIZE or GET_MODE_NUNITS to get value, their value are 
> different.
> However, If we tried GET_MODE_SIZE of these modes, they are the same (poly 
> (1,1)).
> Such scenario make these tied together and gives the wrong code gen since 
> their bitsize are different.
> Consider the case as this:
> #include "riscv_vector.h"
> void foo5_3 (int32_t * restrict in, int32_t * restrict out, size_t n, int 
> cond)
> {
>   vint8m1_t v = *(vint8m1_t*)in;
>   *(vint8m1_t*)out = v;  vbool16_t v4 = *(vbool16_t *)in;
>   *(vbool16_t *)(out + 300) = v4;
>   vbool8_t v3 = *(vbool8_t*)in;
>   *(vbool8_t*)(out + 200) = v3;
> }
> The second vbool8_t load (vlm.v) is missing. Since GCC gives "v3 = 
> VIEW_CONVERT (vbool8_t) v4" in gimple.
> We failed to fix it in RISC-V backend. Can you help us with this? Thanks.
 
... why for the loads the "padding" is not loaded?  The above testcase
is probably more complicated than necessary as well?
 
Thanks,
Richard.
>
> juzhe.zh...@rivai.ai
>  
> From: incarnation.p.lee
> Date: 2023-02-11 16:46
> To: gcc-patches
> CC: juzhe.zhong; kito.cheng; rguenther; Pan Li
> Subject: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
> From: Pan Li 
>  
> Fix the bug for mode tieable of the rvv bool types. The vbool*_t
> cannot be tied as the actually load/store size is determinated by
> the vl. The mode size of rvv bool types are also adjusted for the
> underlying optimization pass. The rvv bool type is vbool*_t, aka
> vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t, vbool32_t, and
> vbool64_t.
>  
> PR 108185
> PR 108654
>  
> gcc/ChangeLog:
>  
> * config/riscv/riscv-modes.def (ADJUST_BYTESIZE):
> * config/riscv/riscv.cc (riscv_v_adjust_bytesize):
> (riscv_modes_tieable_p):
> * config/riscv/riscv.h (riscv_v_adjust_bytesize):
> * machmode.h (VECTOR_BOOL_MODE_P):
> * tree-ssa-sccvn.cc (visit_reference_op_load):
>  
> gcc/testsuite/ChangeLog:
>  
> * gcc.target/riscv/pr108185-1.c: New test.
> * gcc.target/riscv/pr108185-2.c: New test.
> * gcc.target/riscv/pr108185-3.c: New test.
> * gcc.target/riscv/pr108185-4.c: New test.
> * gcc.target/riscv/pr108185-5.c: New test.
> * gcc.target/riscv/pr108185-6.c: New test.
> * gcc.target/riscv/pr108185-7.c: New test.
> * gcc.target/riscv/pr108185-8.c: New test.
>  
> Signed-off-by: Pan Li 
> ---
> gcc/config/riscv/riscv-modes.def| 14 ++--
> gcc/config/riscv/riscv.cc   | 34 -
> gcc/config/riscv/riscv.h|  2 +
> gcc/machmode.h  |  3 +
> gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++

[PATCH] RISC-V: Add vmseq vv C api tests

2023-02-13 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/vmseq_vv-1.c: New test.
* gcc.target/riscv/rvv/base/vmseq_vv-2.c: New test.
* gcc.target/riscv/rvv/base/vmseq_vv-3.c: New test.
* gcc.target/riscv/rvv/base/vmseq_vv_m-1.c: New test.
* gcc.target/riscv/rvv/base/vmseq_vv_m-2.c: New test.
* gcc.target/riscv/rvv/base/vmseq_vv_m-3.c: New test.
* gcc.target/riscv/rvv/base/vmseq_vv_mu-1.c: New test.
* gcc.target/riscv/rvv/base/vmseq_vv_mu-2.c: New test.
* gcc.target/riscv/rvv/base/vmseq_vv_mu-3.c: New test.

---
 .../gcc.target/riscv/rvv/base/vmseq_vv-1.c| 292 ++
 .../gcc.target/riscv/rvv/base/vmseq_vv-2.c| 292 ++
 .../gcc.target/riscv/rvv/base/vmseq_vv-3.c| 292 ++
 .../gcc.target/riscv/rvv/base/vmseq_vv_m-1.c  | 292 ++
 .../gcc.target/riscv/rvv/base/vmseq_vv_m-2.c  | 292 ++
 .../gcc.target/riscv/rvv/base/vmseq_vv_m-3.c  | 292 ++
 .../gcc.target/riscv/rvv/base/vmseq_vv_mu-1.c | 292 ++
 .../gcc.target/riscv/rvv/base/vmseq_vv_mu-2.c | 292 ++
 .../gcc.target/riscv/rvv/base/vmseq_vv_mu-3.c | 292 ++
 9 files changed, 2628 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmseq_vv-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmseq_vv-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmseq_vv-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmseq_vv_m-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmseq_vv_m-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmseq_vv_m-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmseq_vv_mu-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmseq_vv_mu-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vmseq_vv_mu-3.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vmseq_vv-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/vmseq_vv-1.c
new file mode 100644
index 000..9c7ea9f2424
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vmseq_vv-1.c
@@ -0,0 +1,292 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+vbool64_t test___riscv_vmseq_vv_i8mf8_b64(vbool64_t mask,vint8mf8_t 
op1,vint8mf8_t op2,size_t vl)
+{
+return __riscv_vmseq_vv_i8mf8_b64(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmseq_vv_i8mf4_b32(vbool32_t mask,vint8mf4_t 
op1,vint8mf4_t op2,size_t vl)
+{
+return __riscv_vmseq_vv_i8mf4_b32(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmseq_vv_i8mf2_b16(vbool16_t mask,vint8mf2_t 
op1,vint8mf2_t op2,size_t vl)
+{
+return __riscv_vmseq_vv_i8mf2_b16(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmseq_vv_i8m1_b8(vbool8_t mask,vint8m1_t op1,vint8m1_t 
op2,size_t vl)
+{
+return __riscv_vmseq_vv_i8m1_b8(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmseq_vv_i8m2_b4(vbool4_t mask,vint8m2_t op1,vint8m2_t 
op2,size_t vl)
+{
+return __riscv_vmseq_vv_i8m2_b4(op1,op2,vl);
+}
+
+
+vbool2_t test___riscv_vmseq_vv_i8m4_b2(vbool2_t mask,vint8m4_t op1,vint8m4_t 
op2,size_t vl)
+{
+return __riscv_vmseq_vv_i8m4_b2(op1,op2,vl);
+}
+
+
+vbool1_t test___riscv_vmseq_vv_i8m8_b1(vbool1_t mask,vint8m8_t op1,vint8m8_t 
op2,size_t vl)
+{
+return __riscv_vmseq_vv_i8m8_b1(op1,op2,vl);
+}
+
+
+vbool64_t test___riscv_vmseq_vv_i16mf4_b64(vbool64_t mask,vint16mf4_t 
op1,vint16mf4_t op2,size_t vl)
+{
+return __riscv_vmseq_vv_i16mf4_b64(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmseq_vv_i16mf2_b32(vbool32_t mask,vint16mf2_t 
op1,vint16mf2_t op2,size_t vl)
+{
+return __riscv_vmseq_vv_i16mf2_b32(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmseq_vv_i16m1_b16(vbool16_t mask,vint16m1_t 
op1,vint16m1_t op2,size_t vl)
+{
+return __riscv_vmseq_vv_i16m1_b16(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmseq_vv_i16m2_b8(vbool8_t mask,vint16m2_t 
op1,vint16m2_t op2,size_t vl)
+{
+return __riscv_vmseq_vv_i16m2_b8(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmseq_vv_i16m4_b4(vbool4_t mask,vint16m4_t 
op1,vint16m4_t op2,size_t vl)
+{
+return __riscv_vmseq_vv_i16m4_b4(op1,op2,vl);
+}
+
+
+vbool2_t test___riscv_vmseq_vv_i16m8_b2(vbool2_t mask,vint16m8_t 
op1,vint16m8_t op2,size_t vl)
+{
+return __riscv_vmseq_vv_i16m8_b2(op1,op2,vl);
+}
+
+
+vbool64_t test___riscv_vmseq_vv_i32mf2_b64(vbool64_t mask,vint32mf2_t 
op1,vint32mf2_t op2,size_t vl)
+{
+return __riscv_vmseq_vv_i32mf2_b64(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmseq_vv_i32m1_b32(vbool32_t mask,vint32m1_t 
op1,vint32m1_t op2,size_t vl)
+{
+return __riscv_vmseq_vv_i32m1_b32(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmseq_vv_i32m2_b16(vbool16_t mask,vint32m2_t 
op1,vint32m2_t op2,size_t vl)
+{
+return __riscv_vmseq_vv_i32m2_b16(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmseq_vv_i32m4_b8(vbool8_t mask,v

[PATCH] RISC-V: Add binop constraints tests for integer compare

2023-02-13 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/binop_vv_constraint-2.c: New test.
* gcc.target/riscv/rvv/base/binop_vv_constraint-3.c: New test.
* gcc.target/riscv/rvv/base/binop_vv_constraint-4.c: New test.
* gcc.target/riscv/rvv/base/binop_vv_constraint-5.c: New test.
* gcc.target/riscv/rvv/base/binop_vv_constraint-6.c: New test.
* gcc.target/riscv/rvv/base/binop_vv_constraint-7.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-123.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-124.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-125.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-126.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-127.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-128.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-129.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-130.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-131.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-132.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-133.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-134.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-135.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-136.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-137.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-138.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-139.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-140.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-141.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-142.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-143.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-144.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-145.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-146.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-147.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-148.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-149.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-151.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-152.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-153.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-154.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-155.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-156.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-157.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-158.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-159.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-160.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-161.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-162.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-163.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-164.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-165.c: New test.
* gcc.target/riscv/rvv/base/binop_vx_constraint-166.c: New test.

---
 .../riscv/rvv/base/binop_vv_constraint-2.c|  15 ++
 .../riscv/rvv/base/binop_vv_constraint-3.c|  27 
 .../riscv/rvv/base/binop_vv_constraint-4.c|  27 
 .../riscv/rvv/base/binop_vv_constraint-5.c|  29 
 .../riscv/rvv/base/binop_vv_constraint-6.c|  27 
 .../riscv/rvv/base/binop_vv_constraint-7.c|  29 
 .../riscv/rvv/base/binop_vx_constraint-123.c  |  15 ++
 .../riscv/rvv/base/binop_vx_constraint-124.c  |  27 
 .../riscv/rvv/base/binop_vx_constraint-125.c  |  27 
 .../riscv/rvv/base/binop_vx_constraint-126.c  |  29 
 .../riscv/rvv/base/binop_vx_constraint-127.c  |  27 
 .../riscv/rvv/base/binop_vx_constraint-128.c  |  29 
 .../riscv/rvv/base/binop_vx_constraint-129.c  |  69 +
 .../riscv/rvv/base/binop_vx_constraint-130.c  |  69 +
 .../riscv/rvv/base/binop_vx_constraint-131.c  |  69 +
 .../riscv/rvv/base/binop_vx_constraint-132.c  |  59 
 .../riscv/rvv/base/binop_vx_constraint-133.c  |  69 +
 .../riscv/rvv/base/binop_vx_constraint-134.c  |  69 +
 .../riscv/rvv/base/binop_vx_constraint-135.c  |  69 +
 .../riscv/rvv/base/binop_vx_constraint-136.c  |  59 
 .../riscv/rvv/base/binop_vx_constraint-137.c  | 123 
 .../riscv/rvv/base/binop_vx_constraint-138.c

[PATCH] RISC-V: Add vmsne vv C++ tests

2023-02-13 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/vmsne_vv-1.C: New test.
* g++.target/riscv/rvv/base/vmsne_vv-2.C: New test.
* g++.target/riscv/rvv/base/vmsne_vv-3.C: New test.
* g++.target/riscv/rvv/base/vmsne_vv_m-1.C: New test.
* g++.target/riscv/rvv/base/vmsne_vv_m-2.C: New test.
* g++.target/riscv/rvv/base/vmsne_vv_m-3.C: New test.
* g++.target/riscv/rvv/base/vmsne_vv_mu-1.C: New test.
* g++.target/riscv/rvv/base/vmsne_vv_mu-2.C: New test.
* g++.target/riscv/rvv/base/vmsne_vv_mu-3.C: New test.

---
 .../g++.target/riscv/rvv/base/vmsne_vv-1.C| 292 ++
 .../g++.target/riscv/rvv/base/vmsne_vv-2.C| 292 ++
 .../g++.target/riscv/rvv/base/vmsne_vv-3.C| 292 ++
 .../g++.target/riscv/rvv/base/vmsne_vv_m-1.C  | 292 ++
 .../g++.target/riscv/rvv/base/vmsne_vv_m-2.C  | 292 ++
 .../g++.target/riscv/rvv/base/vmsne_vv_m-3.C  | 292 ++
 .../g++.target/riscv/rvv/base/vmsne_vv_mu-1.C | 292 ++
 .../g++.target/riscv/rvv/base/vmsne_vv_mu-2.C | 292 ++
 .../g++.target/riscv/rvv/base/vmsne_vv_mu-3.C | 292 ++
 9 files changed, 2628 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsne_vv-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsne_vv-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsne_vv-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsne_vv_m-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsne_vv_m-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsne_vv_m-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsne_vv_mu-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsne_vv_mu-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsne_vv_mu-3.C

diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/vmsne_vv-1.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/vmsne_vv-1.C
new file mode 100644
index 000..219a0392bda
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/vmsne_vv-1.C
@@ -0,0 +1,292 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+vbool64_t test___riscv_vmsne(vbool64_t mask,vint8mf8_t op1,vint8mf8_t 
op2,size_t vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmsne(vbool32_t mask,vint8mf4_t op1,vint8mf4_t 
op2,size_t vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmsne(vbool16_t mask,vint8mf2_t op1,vint8mf2_t 
op2,size_t vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmsne(vbool8_t mask,vint8m1_t op1,vint8m1_t op2,size_t 
vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmsne(vbool4_t mask,vint8m2_t op1,vint8m2_t op2,size_t 
vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool2_t test___riscv_vmsne(vbool2_t mask,vint8m4_t op1,vint8m4_t op2,size_t 
vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool1_t test___riscv_vmsne(vbool1_t mask,vint8m8_t op1,vint8m8_t op2,size_t 
vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool64_t test___riscv_vmsne(vbool64_t mask,vint16mf4_t op1,vint16mf4_t 
op2,size_t vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmsne(vbool32_t mask,vint16mf2_t op1,vint16mf2_t 
op2,size_t vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmsne(vbool16_t mask,vint16m1_t op1,vint16m1_t 
op2,size_t vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmsne(vbool8_t mask,vint16m2_t op1,vint16m2_t op2,size_t 
vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmsne(vbool4_t mask,vint16m4_t op1,vint16m4_t op2,size_t 
vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool2_t test___riscv_vmsne(vbool2_t mask,vint16m8_t op1,vint16m8_t op2,size_t 
vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool64_t test___riscv_vmsne(vbool64_t mask,vint32mf2_t op1,vint32mf2_t 
op2,size_t vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmsne(vbool32_t mask,vint32m1_t op1,vint32m1_t 
op2,size_t vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmsne(vbool16_t mask,vint32m2_t op1,vint32m2_t 
op2,size_t vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmsne(vbool8_t mask,vint32m4_t op1,vint32m4_t op2,size_t 
vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmsne(vbool4_t mask,vint32m8_t op1,vint32m8_t op2,size_t 
vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool64_t test___riscv_vmsne(vbool64_t mask,vint64m1_t op1,vint64m1_t 
op2,size_t vl)
+{
+return __riscv_vmsne(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmsne(vbool32_t mask,vint64m

[PATCH] RISC-V: Add vmslt vv C++ api tests

2023-02-13 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/vmslt_vv-1.C: New test.
* g++.target/riscv/rvv/base/vmslt_vv-2.C: New test.
* g++.target/riscv/rvv/base/vmslt_vv-3.C: New test.
* g++.target/riscv/rvv/base/vmslt_vv_m-1.C: New test.
* g++.target/riscv/rvv/base/vmslt_vv_m-2.C: New test.
* g++.target/riscv/rvv/base/vmslt_vv_m-3.C: New test.
* g++.target/riscv/rvv/base/vmslt_vv_mu-1.C: New test.
* g++.target/riscv/rvv/base/vmslt_vv_mu-2.C: New test.
* g++.target/riscv/rvv/base/vmslt_vv_mu-3.C: New test.
* g++.target/riscv/rvv/base/vmsltu_vv-1.C: New test.
* g++.target/riscv/rvv/base/vmsltu_vv-2.C: New test.
* g++.target/riscv/rvv/base/vmsltu_vv-3.C: New test.
* g++.target/riscv/rvv/base/vmsltu_vv_m-1.C: New test.
* g++.target/riscv/rvv/base/vmsltu_vv_m-2.C: New test.
* g++.target/riscv/rvv/base/vmsltu_vv_m-3.C: New test.
* g++.target/riscv/rvv/base/vmsltu_vv_mu-1.C: New test.
* g++.target/riscv/rvv/base/vmsltu_vv_mu-2.C: New test.
* g++.target/riscv/rvv/base/vmsltu_vv_mu-3.C: New test.

---
 .../g++.target/riscv/rvv/base/vmslt_vv-1.C| 160 ++
 .../g++.target/riscv/rvv/base/vmslt_vv-2.C| 160 ++
 .../g++.target/riscv/rvv/base/vmslt_vv-3.C| 160 ++
 .../g++.target/riscv/rvv/base/vmslt_vv_m-1.C  | 160 ++
 .../g++.target/riscv/rvv/base/vmslt_vv_m-2.C  | 160 ++
 .../g++.target/riscv/rvv/base/vmslt_vv_m-3.C  | 160 ++
 .../g++.target/riscv/rvv/base/vmslt_vv_mu-1.C | 160 ++
 .../g++.target/riscv/rvv/base/vmslt_vv_mu-2.C | 160 ++
 .../g++.target/riscv/rvv/base/vmslt_vv_mu-3.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsltu_vv-1.C   | 160 ++
 .../g++.target/riscv/rvv/base/vmsltu_vv-2.C   | 160 ++
 .../g++.target/riscv/rvv/base/vmsltu_vv-3.C   | 160 ++
 .../g++.target/riscv/rvv/base/vmsltu_vv_m-1.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsltu_vv_m-2.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsltu_vv_m-3.C | 160 ++
 .../riscv/rvv/base/vmsltu_vv_mu-1.C   | 160 ++
 .../riscv/rvv/base/vmsltu_vv_mu-2.C   | 160 ++
 .../riscv/rvv/base/vmsltu_vv_mu-3.C   | 160 ++
 18 files changed, 2880 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmslt_vv-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmslt_vv-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmslt_vv-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmslt_vv_m-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmslt_vv_m-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmslt_vv_m-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmslt_vv_mu-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmslt_vv_mu-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmslt_vv_mu-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsltu_vv-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsltu_vv-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsltu_vv-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsltu_vv_m-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsltu_vv_m-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsltu_vv_m-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsltu_vv_mu-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsltu_vv_mu-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsltu_vv_mu-3.C

diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/vmslt_vv-1.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/vmslt_vv-1.C
new file mode 100644
index 000..eb0c11909f1
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/vmslt_vv-1.C
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+vbool64_t test___riscv_vmslt(vbool64_t mask,vint8mf8_t op1,vint8mf8_t 
op2,size_t vl)
+{
+return __riscv_vmslt(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmslt(vbool32_t mask,vint8mf4_t op1,vint8mf4_t 
op2,size_t vl)
+{
+return __riscv_vmslt(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmslt(vbool16_t mask,vint8mf2_t op1,vint8mf2_t 
op2,size_t vl)
+{
+return __riscv_vmslt(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmslt(vbool8_t mask,vint8m1_t op1,vint8m1_t op2,size_t 
vl)
+{
+return __riscv_vmslt(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmslt(vbool4_t mask,vint8m2_t op1,vint8m2_t op2,size_t 
vl)
+{
+return __riscv_vmslt(op1,op2,vl);
+}
+
+
+vbool2_t test___riscv_vmslt(vbool2_t mask,vint8m4

[PATCH] RISC-V: Add vmsle vv C++ api tests

2023-02-13 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/vmsle_vv-1.C: New test.
* g++.target/riscv/rvv/base/vmsle_vv-2.C: New test.
* g++.target/riscv/rvv/base/vmsle_vv-3.C: New test.
* g++.target/riscv/rvv/base/vmsle_vv_m-1.C: New test.
* g++.target/riscv/rvv/base/vmsle_vv_m-2.C: New test.
* g++.target/riscv/rvv/base/vmsle_vv_m-3.C: New test.
* g++.target/riscv/rvv/base/vmsle_vv_mu-1.C: New test.
* g++.target/riscv/rvv/base/vmsle_vv_mu-2.C: New test.
* g++.target/riscv/rvv/base/vmsle_vv_mu-3.C: New test.
* g++.target/riscv/rvv/base/vmsleu_vv-1.C: New test.
* g++.target/riscv/rvv/base/vmsleu_vv-2.C: New test.
* g++.target/riscv/rvv/base/vmsleu_vv-3.C: New test.
* g++.target/riscv/rvv/base/vmsleu_vv_m-1.C: New test.
* g++.target/riscv/rvv/base/vmsleu_vv_m-2.C: New test.
* g++.target/riscv/rvv/base/vmsleu_vv_m-3.C: New test.
* g++.target/riscv/rvv/base/vmsleu_vv_mu-1.C: New test.
* g++.target/riscv/rvv/base/vmsleu_vv_mu-2.C: New test.
* g++.target/riscv/rvv/base/vmsleu_vv_mu-3.C: New test.

---
 .../g++.target/riscv/rvv/base/vmsle_vv-1.C| 160 ++
 .../g++.target/riscv/rvv/base/vmsle_vv-2.C| 160 ++
 .../g++.target/riscv/rvv/base/vmsle_vv-3.C| 160 ++
 .../g++.target/riscv/rvv/base/vmsle_vv_m-1.C  | 160 ++
 .../g++.target/riscv/rvv/base/vmsle_vv_m-2.C  | 160 ++
 .../g++.target/riscv/rvv/base/vmsle_vv_m-3.C  | 160 ++
 .../g++.target/riscv/rvv/base/vmsle_vv_mu-1.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsle_vv_mu-2.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsle_vv_mu-3.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsleu_vv-1.C   | 160 ++
 .../g++.target/riscv/rvv/base/vmsleu_vv-2.C   | 160 ++
 .../g++.target/riscv/rvv/base/vmsleu_vv-3.C   | 160 ++
 .../g++.target/riscv/rvv/base/vmsleu_vv_m-1.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsleu_vv_m-2.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsleu_vv_m-3.C | 160 ++
 .../riscv/rvv/base/vmsleu_vv_mu-1.C   | 160 ++
 .../riscv/rvv/base/vmsleu_vv_mu-2.C   | 160 ++
 .../riscv/rvv/base/vmsleu_vv_mu-3.C   | 160 ++
 18 files changed, 2880 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsle_vv-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsle_vv-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsle_vv-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsle_vv_m-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsle_vv_m-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsle_vv_m-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsle_vv_mu-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsle_vv_mu-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsle_vv_mu-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsleu_vv-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsleu_vv-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsleu_vv-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsleu_vv_m-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsleu_vv_m-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsleu_vv_m-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsleu_vv_mu-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsleu_vv_mu-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsleu_vv_mu-3.C

diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/vmsle_vv-1.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/vmsle_vv-1.C
new file mode 100644
index 000..a2dc91ff033
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/vmsle_vv-1.C
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+vbool64_t test___riscv_vmsle(vbool64_t mask,vint8mf8_t op1,vint8mf8_t 
op2,size_t vl)
+{
+return __riscv_vmsle(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmsle(vbool32_t mask,vint8mf4_t op1,vint8mf4_t 
op2,size_t vl)
+{
+return __riscv_vmsle(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmsle(vbool16_t mask,vint8mf2_t op1,vint8mf2_t 
op2,size_t vl)
+{
+return __riscv_vmsle(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmsle(vbool8_t mask,vint8m1_t op1,vint8m1_t op2,size_t 
vl)
+{
+return __riscv_vmsle(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmsle(vbool4_t mask,vint8m2_t op1,vint8m2_t op2,size_t 
vl)
+{
+return __riscv_vmsle(op1,op2,vl);
+}
+
+
+vbool2_t test___riscv_vmsle(vbool2_t mask,vint8m4

[PATCH] RISC-V: Add vmsgt vv C++ tests

2023-02-13 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/vmsgt_vv-1.C: New test.
* g++.target/riscv/rvv/base/vmsgt_vv-2.C: New test.
* g++.target/riscv/rvv/base/vmsgt_vv-3.C: New test.
* g++.target/riscv/rvv/base/vmsgt_vv_m-1.C: New test.
* g++.target/riscv/rvv/base/vmsgt_vv_m-2.C: New test.
* g++.target/riscv/rvv/base/vmsgt_vv_m-3.C: New test.
* g++.target/riscv/rvv/base/vmsgt_vv_mu-1.C: New test.
* g++.target/riscv/rvv/base/vmsgt_vv_mu-2.C: New test.
* g++.target/riscv/rvv/base/vmsgt_vv_mu-3.C: New test.
* g++.target/riscv/rvv/base/vmsgtu_vv-1.C: New test.
* g++.target/riscv/rvv/base/vmsgtu_vv-2.C: New test.
* g++.target/riscv/rvv/base/vmsgtu_vv-3.C: New test.
* g++.target/riscv/rvv/base/vmsgtu_vv_m-1.C: New test.
* g++.target/riscv/rvv/base/vmsgtu_vv_m-2.C: New test.
* g++.target/riscv/rvv/base/vmsgtu_vv_m-3.C: New test.
* g++.target/riscv/rvv/base/vmsgtu_vv_mu-1.C: New test.
* g++.target/riscv/rvv/base/vmsgtu_vv_mu-2.C: New test.
* g++.target/riscv/rvv/base/vmsgtu_vv_mu-3.C: New test.

---
 .../g++.target/riscv/rvv/base/vmsgt_vv-1.C| 160 ++
 .../g++.target/riscv/rvv/base/vmsgt_vv-2.C| 160 ++
 .../g++.target/riscv/rvv/base/vmsgt_vv-3.C| 160 ++
 .../g++.target/riscv/rvv/base/vmsgt_vv_m-1.C  | 160 ++
 .../g++.target/riscv/rvv/base/vmsgt_vv_m-2.C  | 160 ++
 .../g++.target/riscv/rvv/base/vmsgt_vv_m-3.C  | 160 ++
 .../g++.target/riscv/rvv/base/vmsgt_vv_mu-1.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsgt_vv_mu-2.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsgt_vv_mu-3.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsgtu_vv-1.C   | 160 ++
 .../g++.target/riscv/rvv/base/vmsgtu_vv-2.C   | 160 ++
 .../g++.target/riscv/rvv/base/vmsgtu_vv-3.C   | 160 ++
 .../g++.target/riscv/rvv/base/vmsgtu_vv_m-1.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsgtu_vv_m-2.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsgtu_vv_m-3.C | 160 ++
 .../riscv/rvv/base/vmsgtu_vv_mu-1.C   | 160 ++
 .../riscv/rvv/base/vmsgtu_vv_mu-2.C   | 160 ++
 .../riscv/rvv/base/vmsgtu_vv_mu-3.C   | 160 ++
 18 files changed, 2880 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgt_vv-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgt_vv-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgt_vv-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgt_vv_m-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgt_vv_m-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgt_vv_m-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgt_vv_mu-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgt_vv_mu-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgt_vv_mu-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgtu_vv-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgtu_vv-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgtu_vv-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgtu_vv_m-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgtu_vv_m-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgtu_vv_m-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgtu_vv_mu-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgtu_vv_mu-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgtu_vv_mu-3.C

diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/vmsgt_vv-1.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/vmsgt_vv-1.C
new file mode 100644
index 000..8ad3c737aa4
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/vmsgt_vv-1.C
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+vbool64_t test___riscv_vmsgt(vbool64_t mask,vint8mf8_t op1,vint8mf8_t 
op2,size_t vl)
+{
+return __riscv_vmsgt(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmsgt(vbool32_t mask,vint8mf4_t op1,vint8mf4_t 
op2,size_t vl)
+{
+return __riscv_vmsgt(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmsgt(vbool16_t mask,vint8mf2_t op1,vint8mf2_t 
op2,size_t vl)
+{
+return __riscv_vmsgt(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmsgt(vbool8_t mask,vint8m1_t op1,vint8m1_t op2,size_t 
vl)
+{
+return __riscv_vmsgt(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmsgt(vbool4_t mask,vint8m2_t op1,vint8m2_t op2,size_t 
vl)
+{
+return __riscv_vmsgt(op1,op2,vl);
+}
+
+
+vbool2_t test___riscv_vmsgt(vbool2_t mask,vint8m4

[PATCH] RISC-V: Add vmsge vv C++ tests

2023-02-13 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/vmsge_vv-1.C: New test.
* g++.target/riscv/rvv/base/vmsge_vv-2.C: New test.
* g++.target/riscv/rvv/base/vmsge_vv-3.C: New test.
* g++.target/riscv/rvv/base/vmsge_vv_m-1.C: New test.
* g++.target/riscv/rvv/base/vmsge_vv_m-2.C: New test.
* g++.target/riscv/rvv/base/vmsge_vv_m-3.C: New test.
* g++.target/riscv/rvv/base/vmsge_vv_mu-1.C: New test.
* g++.target/riscv/rvv/base/vmsge_vv_mu-2.C: New test.
* g++.target/riscv/rvv/base/vmsge_vv_mu-3.C: New test.
* g++.target/riscv/rvv/base/vmsgeu_vv-1.C: New test.
* g++.target/riscv/rvv/base/vmsgeu_vv-2.C: New test.
* g++.target/riscv/rvv/base/vmsgeu_vv-3.C: New test.
* g++.target/riscv/rvv/base/vmsgeu_vv_m-1.C: New test.
* g++.target/riscv/rvv/base/vmsgeu_vv_m-2.C: New test.
* g++.target/riscv/rvv/base/vmsgeu_vv_m-3.C: New test.
* g++.target/riscv/rvv/base/vmsgeu_vv_mu-1.C: New test.
* g++.target/riscv/rvv/base/vmsgeu_vv_mu-2.C: New test.
* g++.target/riscv/rvv/base/vmsgeu_vv_mu-3.C: New test.
---
 .../g++.target/riscv/rvv/base/vmsge_vv-1.C| 160 ++
 .../g++.target/riscv/rvv/base/vmsge_vv-2.C| 160 ++
 .../g++.target/riscv/rvv/base/vmsge_vv-3.C| 160 ++
 .../g++.target/riscv/rvv/base/vmsge_vv_m-1.C  | 160 ++
 .../g++.target/riscv/rvv/base/vmsge_vv_m-2.C  | 160 ++
 .../g++.target/riscv/rvv/base/vmsge_vv_m-3.C  | 160 ++
 .../g++.target/riscv/rvv/base/vmsge_vv_mu-1.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsge_vv_mu-2.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsge_vv_mu-3.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsgeu_vv-1.C   | 160 ++
 .../g++.target/riscv/rvv/base/vmsgeu_vv-2.C   | 160 ++
 .../g++.target/riscv/rvv/base/vmsgeu_vv-3.C   | 160 ++
 .../g++.target/riscv/rvv/base/vmsgeu_vv_m-1.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsgeu_vv_m-2.C | 160 ++
 .../g++.target/riscv/rvv/base/vmsgeu_vv_m-3.C | 160 ++
 .../riscv/rvv/base/vmsgeu_vv_mu-1.C   | 160 ++
 .../riscv/rvv/base/vmsgeu_vv_mu-2.C   | 160 ++
 .../riscv/rvv/base/vmsgeu_vv_mu-3.C   | 160 ++
 18 files changed, 2880 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsge_vv-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsge_vv-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsge_vv-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsge_vv_m-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsge_vv_m-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsge_vv_m-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsge_vv_mu-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsge_vv_mu-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsge_vv_mu-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgeu_vv-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgeu_vv-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgeu_vv-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgeu_vv_m-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgeu_vv_m-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgeu_vv_m-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgeu_vv_mu-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgeu_vv_mu-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmsgeu_vv_mu-3.C

diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/vmsge_vv-1.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/vmsge_vv-1.C
new file mode 100644
index 000..556a900decc
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/vmsge_vv-1.C
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+vbool64_t test___riscv_vmsge(vbool64_t mask,vint8mf8_t op1,vint8mf8_t 
op2,size_t vl)
+{
+return __riscv_vmsge(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmsge(vbool32_t mask,vint8mf4_t op1,vint8mf4_t 
op2,size_t vl)
+{
+return __riscv_vmsge(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmsge(vbool16_t mask,vint8mf2_t op1,vint8mf2_t 
op2,size_t vl)
+{
+return __riscv_vmsge(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmsge(vbool8_t mask,vint8m1_t op1,vint8m1_t op2,size_t 
vl)
+{
+return __riscv_vmsge(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmsge(vbool4_t mask,vint8m2_t op1,vint8m2_t op2,size_t 
vl)
+{
+return __riscv_vmsge(op1,op2,vl);
+}
+
+
+vbool2_t test___riscv_vmsge(vbool2_t mask,vint8m4_

[PATCH] RISC-V: Add vmseq vv C++ tests

2023-02-13 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/vmseq_vv-1.C: New test.
* g++.target/riscv/rvv/base/vmseq_vv-2.C: New test.
* g++.target/riscv/rvv/base/vmseq_vv-3.C: New test.
* g++.target/riscv/rvv/base/vmseq_vv_m-1.C: New test.
* g++.target/riscv/rvv/base/vmseq_vv_m-2.C: New test.
* g++.target/riscv/rvv/base/vmseq_vv_m-3.C: New test.
* g++.target/riscv/rvv/base/vmseq_vv_mu-1.C: New test.
* g++.target/riscv/rvv/base/vmseq_vv_mu-2.C: New test.
* g++.target/riscv/rvv/base/vmseq_vv_mu-3.C: New test.

---
 .../g++.target/riscv/rvv/base/vmseq_vv-1.C| 292 ++
 .../g++.target/riscv/rvv/base/vmseq_vv-2.C| 292 ++
 .../g++.target/riscv/rvv/base/vmseq_vv-3.C| 292 ++
 .../g++.target/riscv/rvv/base/vmseq_vv_m-1.C  | 292 ++
 .../g++.target/riscv/rvv/base/vmseq_vv_m-2.C  | 292 ++
 .../g++.target/riscv/rvv/base/vmseq_vv_m-3.C  | 292 ++
 .../g++.target/riscv/rvv/base/vmseq_vv_mu-1.C | 292 ++
 .../g++.target/riscv/rvv/base/vmseq_vv_mu-2.C | 292 ++
 .../g++.target/riscv/rvv/base/vmseq_vv_mu-3.C | 292 ++
 9 files changed, 2628 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmseq_vv-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmseq_vv-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmseq_vv-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmseq_vv_m-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmseq_vv_m-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmseq_vv_m-3.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmseq_vv_mu-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmseq_vv_mu-2.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vmseq_vv_mu-3.C

diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/vmseq_vv-1.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/vmseq_vv-1.C
new file mode 100644
index 000..15115ce5706
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/vmseq_vv-1.C
@@ -0,0 +1,292 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+vbool64_t test___riscv_vmseq(vbool64_t mask,vint8mf8_t op1,vint8mf8_t 
op2,size_t vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmseq(vbool32_t mask,vint8mf4_t op1,vint8mf4_t 
op2,size_t vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmseq(vbool16_t mask,vint8mf2_t op1,vint8mf2_t 
op2,size_t vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmseq(vbool8_t mask,vint8m1_t op1,vint8m1_t op2,size_t 
vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmseq(vbool4_t mask,vint8m2_t op1,vint8m2_t op2,size_t 
vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool2_t test___riscv_vmseq(vbool2_t mask,vint8m4_t op1,vint8m4_t op2,size_t 
vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool1_t test___riscv_vmseq(vbool1_t mask,vint8m8_t op1,vint8m8_t op2,size_t 
vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool64_t test___riscv_vmseq(vbool64_t mask,vint16mf4_t op1,vint16mf4_t 
op2,size_t vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmseq(vbool32_t mask,vint16mf2_t op1,vint16mf2_t 
op2,size_t vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmseq(vbool16_t mask,vint16m1_t op1,vint16m1_t 
op2,size_t vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmseq(vbool8_t mask,vint16m2_t op1,vint16m2_t op2,size_t 
vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmseq(vbool4_t mask,vint16m4_t op1,vint16m4_t op2,size_t 
vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool2_t test___riscv_vmseq(vbool2_t mask,vint16m8_t op1,vint16m8_t op2,size_t 
vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool64_t test___riscv_vmseq(vbool64_t mask,vint32mf2_t op1,vint32mf2_t 
op2,size_t vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmseq(vbool32_t mask,vint32m1_t op1,vint32m1_t 
op2,size_t vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool16_t test___riscv_vmseq(vbool16_t mask,vint32m2_t op1,vint32m2_t 
op2,size_t vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool8_t test___riscv_vmseq(vbool8_t mask,vint32m4_t op1,vint32m4_t op2,size_t 
vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool4_t test___riscv_vmseq(vbool4_t mask,vint32m8_t op1,vint32m8_t op2,size_t 
vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool64_t test___riscv_vmseq(vbool64_t mask,vint64m1_t op1,vint64m1_t 
op2,size_t vl)
+{
+return __riscv_vmseq(op1,op2,vl);
+}
+
+
+vbool32_t test___riscv_vmseq(vbool32_t mask,vint64m

Re: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread Richard Biener via Gcc-patches
On Mon, 13 Feb 2023, juzhe.zh...@rivai.ai wrote:

> >> But then GET_MODE_PRECISION (GET_MODE_INNER (..)) should always be 1?
> Yes, I think so.
> 
> Let's explain RVV more clearly.
> Let's suppose we have vector-length = 64bits in RVV CPU.
> VNx1BI is exactly 1 consecutive bits.
> VNx2BI is exactly 2 consecutive bits.
> VNx4BI is exactly 4 consecutive bits.
> VNx8BI is exactly 8 consecutive bits.
> 
> For VNx1BI (vbool64_t ), we load it wich this asm:
> vsetvl e8mf8
> vlm.v
> 
> For VNx2BI (vbool32_t ), we load it wich this asm:
> vsetvl e8mf4
> vlm.v
> 
> For VNx4BI (vbool16_t ), we load it wich this asm:
> vsetvl e8mf2
> vlm.v
> 
> For VNx8BI (vbool8_t ), we load it wich this asm:
> vsetvl e8m1
> vlm.v
> 
> In case of this code sequence:
> vbool16_t v4 = *(vbool16_t *)in;
> vbool8_t v3 = *(vbool8_t*)in;
> 
> Since VNx4BI (vbool16_t ) is smaller than VNx8BI (vbool8_t )
> We can't just use the data loaded by VNx4BI (vbool16_t ) in  VNx8BI (vbool8_t 
> ).
> But we can use the data loaded by VNx8BI (vbool8_t  ) in  VNx4BI (vbool16_t ).
>
> In this example, GCC thinks data loaded for vbool8_t v3 can be replaced by 
> vbool16_t v4 which is already loaded
> It's incorrect for RVV.

OK, so the 'vlm.v' instruction will zero the padding bits (according to
vsetvl), but I doubt the memory subsystem will not load a whole byte.

Then GET_MODE_PRECISION of VNx4BI has to be smaller than 
GET_MODE_PRECISION of VNx8BI, even if their size is the same.

I suppose that ADJUST_NUNITS should be able to do this, but then we
have in aarch64-modes.def

VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2);
VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2);
VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2);
VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2);

ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8);
ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4);
ADJUST_NUNITS (VNx4BI, aarch64_sve_vg * 2);
ADJUST_NUNITS (VNx2BI, aarch64_sve_vg);

so all VNxMBI modes are 2 bytes in size but their component is always
BImode but IIRC the elements of VNx2BImode occupy 4 bits each?

For riscv we have

VECTOR_BOOL_MODE (VNx1BI, 1, BI, 1);
ADJUST_NUNITS (VNx1BI, riscv_v_adjust_nunits (VNx1BImode, 1));

so here it would be natural to set the mode precision to
a poly-int computed by the component precision times nunits?  OTOH
we have to look at the component precision vs. size as well and

/* Single bit mode used for booleans.  */ 
BOOL_MODE (BI, 1, 1); 

BOOL_MODE is not documented, but its precision and size, so BImode
has a size of 1.  That makes VECTOR_BOOL_MODE very special since
the layout isn't derived from the component mode.  Deriving the
layout from the precision would make aarch64 incorrect and
would need BI2 and BI4 modes at least.

Adding a parameter to ADJUST_NUNITS might be the way to go instead,
specifying the number of bits in a component?

Richard.


> Maybe @kito can give us more information about RVV ISA if I don't explain it 
> clearly.
> 
> 
> juzhe.zh...@rivai.ai
>  
> From: Richard Biener
> Date: 2023-02-13 16:07
> To: juzhe.zhong
> CC: Pan Li; gcc-patches; kito.cheng; richard.sandiford; ams
> Subject: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
> On Sat, 11 Feb 2023, juzhe.zh...@rivai.ai wrote:
>  
> > Thanks for contributing this.
> > Hi, Richard. Can you help us with this issue?
> > In RVV, we have vbool8_t (VNx8BImode), vbool16_t (VNx4BImode), vbool32_t 
> > (VNx2BImode), vbool64_t (VNx1BImode)
> > Since we are using 1bit-mask which is 1-BOOL occupy 1bit.
> > According to RVV ISA, we adjust these modes as follows:
> > 
> > VNx8BImode poly (8,8) NUNTTS (each nunits is 1bit mask)
> > VNx4BImode poly(4,4) NUNTTS (each nunits is 1bit mask)
> > VNx2BImode poly(2,2) NUNTTS (each nunits is 1bit mask)
> > VNx1BImode poly (1,1) NUNTTS (each nunits is 1bit mask)
>  
> So how's VNx1BImode laid out for N == 2?  Is that still a single
> byte and two consecutive bits?  I suppose so.
>  
> But then GET_MODE_PRECISION (GET_MODE_INNER (..)) should always be 1?
>  
> I'm not sure what GET_MODE_PRECISION of the vector mode itself
> should be here, but then I wonder ...
>  
> > If we tried GET_MODE_BITSIZE or GET_MODE_NUNITS to get value, their value 
> > are different.
> > However, If we tried GET_MODE_SIZE of these modes, they are the same (poly 
> > (1,1)).
> > Such scenario make these tied together and gives the wrong code gen since 
> > their bitsize are different.
> > Consider the case as this:
> > #include "riscv_vector.h"
> > void foo5_3 (int32_t * restrict in, int32_t * restrict out, size_t n, int 
> > cond)
> > {
> >   vint8m1_t v = *(vint8m1_t*)in;
> >   *(vint8m1_t*)out = v;  vbool16_t v4 = *(vbool16_t *)in;
> >   *(vbool16_t *)(out + 300) = v4;
> >   vbool8_t v3 = *(vbool8_t*)in;
> >   *(vbool8_t*)(out + 200) = v3;
> > }
> > The second vbool8_t load (vlm.v) is missing. Since GCC gives "v3 = 
> > VIEW_CONVERT (vbool8_t) v4" in gimple.
> > We failed to fix it in RISC-V backend. Can you help us with this? Thanks.
>  
> ... why for the loads the "padding" is not loaded?  The 

Re: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread juzhe.zh...@rivai.ai
I am not sure changing the precision inner mode of BImode is correct for RVV.
Since by definition , each single 1-bit mask in RVV mask layout are consecutive.
Maybe we can wait for Kito answer this question ?  



juzhe.zh...@rivai.ai
 
From: Richard Biener
Date: 2023-02-13 16:46
To: juzhe.zh...@rivai.ai
CC: incarnation.p.lee; gcc-patches; Kito.cheng; richard.sandiford; ams
Subject: Re: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
On Mon, 13 Feb 2023, juzhe.zh...@rivai.ai wrote:
 
> >> But then GET_MODE_PRECISION (GET_MODE_INNER (..)) should always be 1?
> Yes, I think so.
> 
> Let's explain RVV more clearly.
> Let's suppose we have vector-length = 64bits in RVV CPU.
> VNx1BI is exactly 1 consecutive bits.
> VNx2BI is exactly 2 consecutive bits.
> VNx4BI is exactly 4 consecutive bits.
> VNx8BI is exactly 8 consecutive bits.
> 
> For VNx1BI (vbool64_t ), we load it wich this asm:
> vsetvl e8mf8
> vlm.v
> 
> For VNx2BI (vbool32_t ), we load it wich this asm:
> vsetvl e8mf4
> vlm.v
> 
> For VNx4BI (vbool16_t ), we load it wich this asm:
> vsetvl e8mf2
> vlm.v
> 
> For VNx8BI (vbool8_t ), we load it wich this asm:
> vsetvl e8m1
> vlm.v
> 
> In case of this code sequence:
> vbool16_t v4 = *(vbool16_t *)in;
> vbool8_t v3 = *(vbool8_t*)in;
> 
> Since VNx4BI (vbool16_t ) is smaller than VNx8BI (vbool8_t )
> We can't just use the data loaded by VNx4BI (vbool16_t ) in  VNx8BI (vbool8_t 
> ).
> But we can use the data loaded by VNx8BI (vbool8_t  ) in  VNx4BI (vbool16_t ).
>
> In this example, GCC thinks data loaded for vbool8_t v3 can be replaced by 
> vbool16_t v4 which is already loaded
> It's incorrect for RVV.
 
OK, so the 'vlm.v' instruction will zero the padding bits (according to
vsetvl), but I doubt the memory subsystem will not load a whole byte.
 
Then GET_MODE_PRECISION of VNx4BI has to be smaller than 
GET_MODE_PRECISION of VNx8BI, even if their size is the same.
 
I suppose that ADJUST_NUNITS should be able to do this, but then we
have in aarch64-modes.def
 
VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2);
VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2);
VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2);
VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2);
 
ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8);
ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4);
ADJUST_NUNITS (VNx4BI, aarch64_sve_vg * 2);
ADJUST_NUNITS (VNx2BI, aarch64_sve_vg);
 
so all VNxMBI modes are 2 bytes in size but their component is always
BImode but IIRC the elements of VNx2BImode occupy 4 bits each?
 
For riscv we have
 
VECTOR_BOOL_MODE (VNx1BI, 1, BI, 1);
ADJUST_NUNITS (VNx1BI, riscv_v_adjust_nunits (VNx1BImode, 1));
 
so here it would be natural to set the mode precision to
a poly-int computed by the component precision times nunits?  OTOH
we have to look at the component precision vs. size as well and
 
/* Single bit mode used for booleans.  */ 
BOOL_MODE (BI, 1, 1); 
 
BOOL_MODE is not documented, but its precision and size, so BImode
has a size of 1.  That makes VECTOR_BOOL_MODE very special since
the layout isn't derived from the component mode.  Deriving the
layout from the precision would make aarch64 incorrect and
would need BI2 and BI4 modes at least.
 
Adding a parameter to ADJUST_NUNITS might be the way to go instead,
specifying the number of bits in a component?
 
Richard.
 
 
> Maybe @kito can give us more information about RVV ISA if I don't explain it 
> clearly.
> 
> 
> juzhe.zh...@rivai.ai
>  
> From: Richard Biener
> Date: 2023-02-13 16:07
> To: juzhe.zhong
> CC: Pan Li; gcc-patches; kito.cheng; richard.sandiford; ams
> Subject: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
> On Sat, 11 Feb 2023, juzhe.zh...@rivai.ai wrote:
>  
> > Thanks for contributing this.
> > Hi, Richard. Can you help us with this issue?
> > In RVV, we have vbool8_t (VNx8BImode), vbool16_t (VNx4BImode), vbool32_t 
> > (VNx2BImode), vbool64_t (VNx1BImode)
> > Since we are using 1bit-mask which is 1-BOOL occupy 1bit.
> > According to RVV ISA, we adjust these modes as follows:
> > 
> > VNx8BImode poly (8,8) NUNTTS (each nunits is 1bit mask)
> > VNx4BImode poly(4,4) NUNTTS (each nunits is 1bit mask)
> > VNx2BImode poly(2,2) NUNTTS (each nunits is 1bit mask)
> > VNx1BImode poly (1,1) NUNTTS (each nunits is 1bit mask)
>  
> So how's VNx1BImode laid out for N == 2?  Is that still a single
> byte and two consecutive bits?  I suppose so.
>  
> But then GET_MODE_PRECISION (GET_MODE_INNER (..)) should always be 1?
>  
> I'm not sure what GET_MODE_PRECISION of the vector mode itself
> should be here, but then I wonder ...
>  
> > If we tried GET_MODE_BITSIZE or GET_MODE_NUNITS to get value, their value 
> > are different.
> > However, If we tried GET_MODE_SIZE of these modes, they are the same (poly 
> > (1,1)).
> > Such scenario make these tied together and gives the wrong code gen since 
> > their bitsize are different.
> > Consider the case as this:
> > #include "riscv_vector.h"
> > void foo5_3 (int32_t * restrict in, int32_t * restrict out, size_t n, int 
> > cond)
> > 

[PATCH (pushed)] docs: document new param

2023-02-13 Thread Martin Liška

gcc/ChangeLog:

* doc/invoke.texi: Document ira-simple-lra-insn-threshold.
---
 gcc/doc/invoke.texi | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 51447a78584..26de582e41e 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -15511,6 +15511,9 @@ available alternatives for preferred register class 
even if it has
 found some choice with an appropriate register class and respect the
 found qualified matching constraint.
 
+@item ira-simple-lra-insn-threshold

+Approximate function insn number in 1K units triggering simple local RA.
+
 @item lra-inheritance-ebb-probability-cutoff
 LRA tries to reuse values reloaded in registers in subsequent insns.
 This optimization is called inheritance.  EBB is used as a region to
--
2.39.1



[committed] arc: Don't use millicode thunks unless asked for.

2023-02-13 Thread Claudiu Zissulescu via Gcc-patches
ARC has enter_s/leave_s instructions which can save/restore the entire
function context. It is not needed the millicode thunks anylonger when
compiling for size, thus, make their usage optional.

gcc/

* common/config/arc/arc-common.cc (arc_option_optimization_table):
Remove millicode from list.

gcc/testsuite/

* gcc.target/arc/milli-1.c: Update test.
---
 gcc/common/config/arc/arc-common.cc| 1 -
 gcc/testsuite/gcc.target/arc/milli-1.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/common/config/arc/arc-common.cc 
b/gcc/common/config/arc/arc-common.cc
index 6423f6a9647..95f5dd61201 100644
--- a/gcc/common/config/arc/arc-common.cc
+++ b/gcc/common/config/arc/arc-common.cc
@@ -54,7 +54,6 @@ static const struct default_options 
arc_option_optimization_table[] =
 { OPT_LEVELS_SIZE, OPT_fsection_anchors, NULL, 1 },
 { OPT_LEVELS_SIZE, OPT_mcase_vector_pcrel, NULL, 1 },
 { OPT_LEVELS_SIZE, OPT_msize_level_, NULL, 3 },
-{ OPT_LEVELS_SIZE, OPT_mmillicode, NULL, 1 },
 { OPT_LEVELS_SIZE, OPT_fif_conversion, NULL, 0 },
 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
 { OPT_LEVELS_3_PLUS_SPEED_ONLY, OPT_msize_level_, NULL, 0 },
diff --git a/gcc/testsuite/gcc.target/arc/milli-1.c 
b/gcc/testsuite/gcc.target/arc/milli-1.c
index b501b39eb81..5fba064ca66 100644
--- a/gcc/testsuite/gcc.target/arc/milli-1.c
+++ b/gcc/testsuite/gcc.target/arc/milli-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-Os" } */
+/* { dg-options "-Os -mmillicode" } */
 
 /* Test if we restore correctly blink when using millicode.  */
 extern void bar (void);
-- 
2.30.2



Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> On Mon, 13 Feb 2023, juzhe.zh...@rivai.ai wrote:
>
>> >> But then GET_MODE_PRECISION (GET_MODE_INNER (..)) should always be 1?
>> Yes, I think so.
>> 
>> Let's explain RVV more clearly.
>> Let's suppose we have vector-length = 64bits in RVV CPU.
>> VNx1BI is exactly 1 consecutive bits.
>> VNx2BI is exactly 2 consecutive bits.
>> VNx4BI is exactly 4 consecutive bits.
>> VNx8BI is exactly 8 consecutive bits.
>> 
>> For VNx1BI (vbool64_t ), we load it wich this asm:
>> vsetvl e8mf8
>> vlm.v
>> 
>> For VNx2BI (vbool32_t ), we load it wich this asm:
>> vsetvl e8mf4
>> vlm.v
>> 
>> For VNx4BI (vbool16_t ), we load it wich this asm:
>> vsetvl e8mf2
>> vlm.v
>> 
>> For VNx8BI (vbool8_t ), we load it wich this asm:
>> vsetvl e8m1
>> vlm.v
>> 
>> In case of this code sequence:
>> vbool16_t v4 = *(vbool16_t *)in;
>> vbool8_t v3 = *(vbool8_t*)in;
>> 
>> Since VNx4BI (vbool16_t ) is smaller than VNx8BI (vbool8_t )
>> We can't just use the data loaded by VNx4BI (vbool16_t ) in  VNx8BI 
>> (vbool8_t ).
>> But we can use the data loaded by VNx8BI (vbool8_t  ) in  VNx4BI (vbool16_t 
>> ).
>>
>> In this example, GCC thinks data loaded for vbool8_t v3 can be replaced by 
>> vbool16_t v4 which is already loaded
>> It's incorrect for RVV.
>
> OK, so the 'vlm.v' instruction will zero the padding bits (according to
> vsetvl), but I doubt the memory subsystem will not load a whole byte.
>
> Then GET_MODE_PRECISION of VNx4BI has to be smaller than 
> GET_MODE_PRECISION of VNx8BI, even if their size is the same.
>
> I suppose that ADJUST_NUNITS should be able to do this, but then we
> have in aarch64-modes.def
>
> VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2);
> VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2);
> VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2);
> VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2);
>
> ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8);
> ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4);
> ADJUST_NUNITS (VNx4BI, aarch64_sve_vg * 2);
> ADJUST_NUNITS (VNx2BI, aarch64_sve_vg);
>
> so all VNxMBI modes are 2 bytes in size but their component is always
> BImode but IIRC the elements of VNx2BImode occupy 4 bits each?

Yeah.  Only the low bit is significant, so it's still a 1-bit element.
But the padding is distributed evenly across the elements rather than
being grouped at one end of the predicate.

> For riscv we have
>
> VECTOR_BOOL_MODE (VNx1BI, 1, BI, 1);
> ADJUST_NUNITS (VNx1BI, riscv_v_adjust_nunits (VNx1BImode, 1));
>
> so here it would be natural to set the mode precision to
> a poly-int computed by the component precision times nunits?  OTOH
> we have to look at the component precision vs. size as well and
>
> /* Single bit mode used for booleans.  */ 
> BOOL_MODE (BI, 1, 1); 
>
> BOOL_MODE is not documented, but its precision and size, so BImode
> has a size of 1.  That makes VECTOR_BOOL_MODE very special since
> the layout isn't derived from the component mode.  Deriving the
> layout from the precision would make aarch64 incorrect and
> would need BI2 and BI4 modes at least.

I think the elements have to stay BI for AArch64.  Using BI2 (with a
precision of 2) would make both bits significant.

I'm not sure the RVV case fits into the existing mode layout scheme.
AFAIK we don't currently support vector modes with padding at one end.
If that's right, the fix is likely to involve more than just tweaking
the mode parameters.

What's the byte size of VNx1BI, expressed as a function of N?
If it's CEIL (N, 8) then we don't have a way of representing that yet.

Thanks,
Richard


Re: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread juzhe.zh...@rivai.ai

>> What's the byte size of VNx1BI, expressed as a function of N?
>> If it's CEIL (N, 8) then we don't have a way of representing that yet.
N is a poly value.
RVV like SVE support scalable vector.
the N is poly (1,1).

VNx1B mode nunits = poly(1,1) units.
VNx1B mode bitsize =poly (1,1) bitsize.
VNx1B mode bytesize = poly(1,1) units (currently). Ideally and more accurate, 
it should be VNx1B mode bytesize =poly (1/8,1/8).
However, it can't represent it like this. GCC consider its bytesize as  poly 
(1,1) bytesize.


VNx2B mode nunits = poly(2,2) units.
VNx2B mode bitsize =poly (2,2) bitsize.
VNx2B mode bytesize = poly(2,2) units (currently). Ideally and more accurate, 
it should be VNx1B mode bytesize =poly (2/8,2/8).
However, it can't represent it like this. GCC consider its bytesize as  poly 
(1,1) bytesize.

VNx4BI,VNx8BI, likewise.

So their bitsize are different but byteszie are all same.


juzhe.zh...@rivai.ai
 
From: Richard Sandiford
Date: 2023-02-13 17:41
To: Richard Biener
CC: juzhe.zhong\@rivai.ai; incarnation.p.lee; gcc-patches; Kito.cheng; ams
Subject: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
Richard Biener  writes:
> On Mon, 13 Feb 2023, juzhe.zh...@rivai.ai wrote:
>
>> >> But then GET_MODE_PRECISION (GET_MODE_INNER (..)) should always be 1?
>> Yes, I think so.
>> 
>> Let's explain RVV more clearly.
>> Let's suppose we have vector-length = 64bits in RVV CPU.
>> VNx1BI is exactly 1 consecutive bits.
>> VNx2BI is exactly 2 consecutive bits.
>> VNx4BI is exactly 4 consecutive bits.
>> VNx8BI is exactly 8 consecutive bits.
>> 
>> For VNx1BI (vbool64_t ), we load it wich this asm:
>> vsetvl e8mf8
>> vlm.v
>> 
>> For VNx2BI (vbool32_t ), we load it wich this asm:
>> vsetvl e8mf4
>> vlm.v
>> 
>> For VNx4BI (vbool16_t ), we load it wich this asm:
>> vsetvl e8mf2
>> vlm.v
>> 
>> For VNx8BI (vbool8_t ), we load it wich this asm:
>> vsetvl e8m1
>> vlm.v
>> 
>> In case of this code sequence:
>> vbool16_t v4 = *(vbool16_t *)in;
>> vbool8_t v3 = *(vbool8_t*)in;
>> 
>> Since VNx4BI (vbool16_t ) is smaller than VNx8BI (vbool8_t )
>> We can't just use the data loaded by VNx4BI (vbool16_t ) in  VNx8BI 
>> (vbool8_t ).
>> But we can use the data loaded by VNx8BI (vbool8_t  ) in  VNx4BI (vbool16_t 
>> ).
>>
>> In this example, GCC thinks data loaded for vbool8_t v3 can be replaced by 
>> vbool16_t v4 which is already loaded
>> It's incorrect for RVV.
>
> OK, so the 'vlm.v' instruction will zero the padding bits (according to
> vsetvl), but I doubt the memory subsystem will not load a whole byte.
>
> Then GET_MODE_PRECISION of VNx4BI has to be smaller than 
> GET_MODE_PRECISION of VNx8BI, even if their size is the same.
>
> I suppose that ADJUST_NUNITS should be able to do this, but then we
> have in aarch64-modes.def
>
> VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2);
> VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2);
> VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2);
> VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2);
>
> ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8);
> ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4);
> ADJUST_NUNITS (VNx4BI, aarch64_sve_vg * 2);
> ADJUST_NUNITS (VNx2BI, aarch64_sve_vg);
>
> so all VNxMBI modes are 2 bytes in size but their component is always
> BImode but IIRC the elements of VNx2BImode occupy 4 bits each?
 
Yeah.  Only the low bit is significant, so it's still a 1-bit element.
But the padding is distributed evenly across the elements rather than
being grouped at one end of the predicate.
 
> For riscv we have
>
> VECTOR_BOOL_MODE (VNx1BI, 1, BI, 1);
> ADJUST_NUNITS (VNx1BI, riscv_v_adjust_nunits (VNx1BImode, 1));
>
> so here it would be natural to set the mode precision to
> a poly-int computed by the component precision times nunits?  OTOH
> we have to look at the component precision vs. size as well and
>
> /* Single bit mode used for booleans.  */ 
> BOOL_MODE (BI, 1, 1); 
>
> BOOL_MODE is not documented, but its precision and size, so BImode
> has a size of 1.  That makes VECTOR_BOOL_MODE very special since
> the layout isn't derived from the component mode.  Deriving the
> layout from the precision would make aarch64 incorrect and
> would need BI2 and BI4 modes at least.
 
I think the elements have to stay BI for AArch64.  Using BI2 (with a
precision of 2) would make both bits significant.
 
I'm not sure the RVV case fits into the existing mode layout scheme.
AFAIK we don't currently support vector modes with padding at one end.
If that's right, the fix is likely to involve more than just tweaking
the mode parameters.
 
What's the byte size of VNx1BI, expressed as a function of N?
If it's CEIL (N, 8) then we don't have a way of representing that yet.
 
Thanks,
Richard
 


Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread Richard Biener via Gcc-patches
On Mon, 13 Feb 2023, Richard Sandiford wrote:

> Richard Biener  writes:
> > On Mon, 13 Feb 2023, juzhe.zh...@rivai.ai wrote:
> >
> >> >> But then GET_MODE_PRECISION (GET_MODE_INNER (..)) should always be 1?
> >> Yes, I think so.
> >> 
> >> Let's explain RVV more clearly.
> >> Let's suppose we have vector-length = 64bits in RVV CPU.
> >> VNx1BI is exactly 1 consecutive bits.
> >> VNx2BI is exactly 2 consecutive bits.
> >> VNx4BI is exactly 4 consecutive bits.
> >> VNx8BI is exactly 8 consecutive bits.
> >> 
> >> For VNx1BI (vbool64_t ), we load it wich this asm:
> >> vsetvl e8mf8
> >> vlm.v
> >> 
> >> For VNx2BI (vbool32_t ), we load it wich this asm:
> >> vsetvl e8mf4
> >> vlm.v
> >> 
> >> For VNx4BI (vbool16_t ), we load it wich this asm:
> >> vsetvl e8mf2
> >> vlm.v
> >> 
> >> For VNx8BI (vbool8_t ), we load it wich this asm:
> >> vsetvl e8m1
> >> vlm.v
> >> 
> >> In case of this code sequence:
> >> vbool16_t v4 = *(vbool16_t *)in;
> >> vbool8_t v3 = *(vbool8_t*)in;
> >> 
> >> Since VNx4BI (vbool16_t ) is smaller than VNx8BI (vbool8_t )
> >> We can't just use the data loaded by VNx4BI (vbool16_t ) in  VNx8BI 
> >> (vbool8_t ).
> >> But we can use the data loaded by VNx8BI (vbool8_t  ) in  VNx4BI 
> >> (vbool16_t ).
> >>
> >> In this example, GCC thinks data loaded for vbool8_t v3 can be replaced by 
> >> vbool16_t v4 which is already loaded
> >> It's incorrect for RVV.
> >
> > OK, so the 'vlm.v' instruction will zero the padding bits (according to
> > vsetvl), but I doubt the memory subsystem will not load a whole byte.
> >
> > Then GET_MODE_PRECISION of VNx4BI has to be smaller than 
> > GET_MODE_PRECISION of VNx8BI, even if their size is the same.
> >
> > I suppose that ADJUST_NUNITS should be able to do this, but then we
> > have in aarch64-modes.def
> >
> > VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2);
> > VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2);
> > VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2);
> > VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2);
> >
> > ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8);
> > ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4);
> > ADJUST_NUNITS (VNx4BI, aarch64_sve_vg * 2);
> > ADJUST_NUNITS (VNx2BI, aarch64_sve_vg);
> >
> > so all VNxMBI modes are 2 bytes in size but their component is always
> > BImode but IIRC the elements of VNx2BImode occupy 4 bits each?
> 
> Yeah.  Only the low bit is significant, so it's still a 1-bit element.
> But the padding is distributed evenly across the elements rather than
> being grouped at one end of the predicate.

I wonder what we'd do for a target that makes the high bit significant ;)

> > For riscv we have
> >
> > VECTOR_BOOL_MODE (VNx1BI, 1, BI, 1);
> > ADJUST_NUNITS (VNx1BI, riscv_v_adjust_nunits (VNx1BImode, 1));
> >
> > so here it would be natural to set the mode precision to
> > a poly-int computed by the component precision times nunits?  OTOH
> > we have to look at the component precision vs. size as well and
> >
> > /* Single bit mode used for booleans.  */ 
> > BOOL_MODE (BI, 1, 1); 
> >
> > BOOL_MODE is not documented, but its precision and size, so BImode
> > has a size of 1.  That makes VECTOR_BOOL_MODE very special since
> > the layout isn't derived from the component mode.  Deriving the
> > layout from the precision would make aarch64 incorrect and
> > would need BI2 and BI4 modes at least.
> 
> I think the elements have to stay BI for AArch64.  Using BI2 (with a
> precision of 2) would make both bits significant.

I think what's "wrong" with a BImode component mode is not the
precision but the size - we don't support bit-precision component
types on the GENERIC side but for bool vector modes we pack the
components to a bit size and aarch64 has varying bit sizes here
(and thus components with padding).  I don't think we support
modes with sizes less than a unit but since bool modes are special
we could re-purpose their precision to mean bitsize.

> I'm not sure the RVV case fits into the existing mode layout scheme.
> AFAIK we don't currently support vector modes with padding at one end.
> If that's right, the fix is likely to involve more than just tweaking
> the mode parameters.
> 
> What's the byte size of VNx1BI, expressed as a function of N?
> If it's CEIL (N, 8) then we don't have a way of representing that yet.

PARTIAL_VECTOR_MODE?  (ick)

Richard.


RE: [PATCH 1/2]middle-end: Fix wrong overmatching of div-bitmask by using new optabs [PR108583]

2023-02-13 Thread Tamar Christina via Gcc-patches
> -Original Message-
> From: Andrew MacLeod 
> Sent: Friday, February 10, 2023 8:59 PM
> To: Richard Biener ; Richard Sandiford
> 
> Cc: Tamar Christina ; Tamar Christina via Gcc-
> patches ; nd ;
> j...@ventanamicro.com
> Subject: Re: [PATCH 1/2]middle-end: Fix wrong overmatching of div-bitmask
> by using new optabs [PR108583]
> 
> 
> On 2/10/23 13:34, Richard Biener wrote:
> >
> >>> In any case, if you disagree I don’t' really see a way forward aside
> >>> from making this its own pattern running it before the overwidening
> pattern.
> >> I think we should look to see if ranger can be persuaded to provide
> >> the range of the 16-bit addition, even though the statement that
> >> produces it isn't part of a BB.  It shouldn't matter that the
> >> addition originally came from a 32-bit one: the range follows
> >> directly from the ranges of the operands (i.e. the fact that the
> >> operands are the results of widening conversions).
> > I think you can ask ranger on operations on names defined in the IL,
> > so you can work yourself through the sequence of operations in the
> > pattern sequence to compute ranges on their defs (and possibly even
> > store them in the SSA info).  You just need to pick the correct ranger
> > API for this…. Andrew CCed
> >
> >
> Its not clear to me whats being asked...
> 
> Expressions don't need to be in the IL to do range calculations.. I believe we
> support arbitrary tree expressions via range_of_expr.
> 
> if you have 32 bit ranges that you want to do 16 bit addition on, you can also
> cast those ranges to a 16bit type,
> 
> my32bitrange.cast (my16bittype);
> 
> then invoke range-ops directly via getting the handler:
> 
> handler = range_op_handler (PLUS_EXPR, 16bittype_tree); if (handler)
>     handler->fold (result, my16bittype, mycasted32bitrange,
> myothercasted32bitrange)
> 
> There are higher level APIs if what you have on hand is closer to IL than
> random ranges
> 
> Describe exactly what it is you want to do... and I'll try to direct you to 
> the
> best way to do it.

The vectorizer has  a pattern matcher that runs at startup on the scalar code.
This pattern matcher can replace one or more statements with alternative ones,
these can be either existing tree_code or new internal functions.

One of the patterns here is a overwidening detection pattern which reduces the
precision that an operation is to be done in during vectorization.

Another one is widening multiplication, which replaced PLUS_EXPR with 
WIDEN_PLUS_EXPR.

These can be chained, so e.g. a widening addition done on ints can be reduced 
to a widen addition
done on shorts.

The question is whether given the new expression that the vectorizer has
created whether ranger can tell what the precision is.  get_range_query fails 
because presumably
it has no idea about the new operations created
 and also doesn't know about any new IFNs.

Thanks,
Tamar

> 
> Andrew
> 
> 



Re: [PATCH] testsuite: adjust patterns in RISC-V tests to skip unwind table directives

2023-02-13 Thread Andreas Schwab via Gcc-patches
On Feb 09 2023, Andrew Pinski via Gcc-patches wrote:

> Maybe you could use check-function-bodies for these files?
> check-function-bodies does remove cfi and other .line directives too
> when doing the scanning.

It doesn't work for negative assertions, though.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread Richard Sandiford via Gcc-patches
"juzhe.zh...@rivai.ai"  writes:
>>> What's the byte size of VNx1BI, expressed as a function of N?
>>> If it's CEIL (N, 8) then we don't have a way of representing that yet.
> N is a poly value.
> RVV like SVE support scalable vector.
> the N is poly (1,1).
>
> VNx1B mode nunits = poly(1,1) units.
> VNx1B mode bitsize =poly (1,1) bitsize.
> VNx1B mode bytesize = poly(1,1) units (currently). Ideally and more accurate, 
> it should be VNx1B mode bytesize =poly (1/8,1/8).

But this would be a fractional bytesize, and like Richard says,
the memory subsystem would always access full bytes.  So I think
the bytesize would have to be at least CEIL (N, 8).

> However, it can't represent it like this. GCC consider its bytesize as  poly 
> (1,1) bytesize.

Ah, OK.  That (making the size N bytes) does seem like a reasonable
workaround, provided that it matches the C types, etc.  So the total
amount of padding is 7N bits (I assume at the msb of the type when
viewed as an integer).

I agree that what (IIUC) was discussed upthread works, i.e.:

  bytesize = N
  bitsize = N * 8 (fixed function of bytesize)
  precision = N
  nunits = N
  unit_size = 1
  unit_precision = 1

But target-independent code won't expect this layout, so supporting
it will involve more than just adjusting the parameters.

Thanks,
Richard


Re: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread juzhe.zh...@rivai.ai
Yeah, I am aggree with you. Memory system access should always at least 1-byte.

So, consider such following code:

vsetvl e8,mf8 
vlm.v v8, a0 (v8 is a 1-bit mask (Not sure what the behavior dealing with this 
case))
vsm.v v8,a1
vsetvl e8,m1
vlm.v v8, a0  (v8 is a 8-bit mask)
vsm.v v8,a2
(Note: both vlm.v are loading same address)

Such asm will not happen in GCC. It will become like this since bool modes are 
tied:

vsetvl e8,mf8 
vlm.v v8, a0  (v8 is a 8-bit mask)
vsm.v v8,a0
vsm.v v8,a1

I am not sure whether it's correct. Maybe I should ask RVV ISA community.



juzhe.zh...@rivai.ai
 
From: Richard Sandiford
Date: 2023-02-13 18:18
To: juzhe.zhong\@rivai.ai
CC: rguenther; incarnation.p.lee; gcc-patches; Kito.cheng; ams
Subject: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
"juzhe.zh...@rivai.ai"  writes:
>>> What's the byte size of VNx1BI, expressed as a function of N?
>>> If it's CEIL (N, 8) then we don't have a way of representing that yet.
> N is a poly value.
> RVV like SVE support scalable vector.
> the N is poly (1,1).
>
> VNx1B mode nunits = poly(1,1) units.
> VNx1B mode bitsize =poly (1,1) bitsize.
> VNx1B mode bytesize = poly(1,1) units (currently). Ideally and more accurate, 
> it should be VNx1B mode bytesize =poly (1/8,1/8).
 
But this would be a fractional bytesize, and like Richard says,
the memory subsystem would always access full bytes.  So I think
the bytesize would have to be at least CEIL (N, 8).
 
> However, it can't represent it like this. GCC consider its bytesize as  poly 
> (1,1) bytesize.
 
Ah, OK.  That (making the size N bytes) does seem like a reasonable
workaround, provided that it matches the C types, etc.  So the total
amount of padding is 7N bits (I assume at the msb of the type when
viewed as an integer).
 
I agree that what (IIUC) was discussed upthread works, i.e.:
 
  bytesize = N
  bitsize = N * 8 (fixed function of bytesize)
  precision = N
  nunits = N
  unit_size = 1
  unit_precision = 1
 
But target-independent code won't expect this layout, so supporting
it will involve more than just adjusting the parameters.
 
Thanks,
Richard
 


[PATCH 0/5] RISC-V: Implement Scalar Cryptography Extension

2023-02-13 Thread Liao Shihua
This patch implement RISC-V Scalar Cryptography extension.
It includes machine descrption , intrinsic and testcase .

Liao Shihua (5):
  Add prototypes for RISC-V Crypto built-in functions
  Implement ZBKB, ZBKC and ZBKX extensions
  Implement ZKND and ZKNE extensions
  Implement ZKNH extensions
  Implement ZKSH and ZKSED extensions

 gcc/config.gcc|   2 +-
 gcc/config/riscv/bitmanip.md  |  20 +-
 gcc/config/riscv/constraints.md   |   8 +
 gcc/config/riscv/crypto.md| 437 ++
 gcc/config/riscv/riscv-builtins.cc|  26 ++
 gcc/config/riscv/riscv-crypto.def |  94 
 gcc/config/riscv/riscv-ftypes.def |  10 +
 gcc/config/riscv/riscv.md |   4 +-
 gcc/config/riscv/riscv_scalar_crypto.h| 218 +
 gcc/testsuite/gcc.target/riscv/zbkb32.c   |  36 ++
 gcc/testsuite/gcc.target/riscv/zbkb64.c   |  28 ++
 gcc/testsuite/gcc.target/riscv/zbkc32.c   |  18 +
 gcc/testsuite/gcc.target/riscv/zbkc64.c   |  17 +
 gcc/testsuite/gcc.target/riscv/zbkx32.c   |  19 +
 gcc/testsuite/gcc.target/riscv/zbkx64.c   |  18 +
 gcc/testsuite/gcc.target/riscv/zknd32.c   |  18 +
 gcc/testsuite/gcc.target/riscv/zknd64.c   |  36 ++
 gcc/testsuite/gcc.target/riscv/zkne32.c   |  18 +
 gcc/testsuite/gcc.target/riscv/zkne64.c   |  30 ++
 gcc/testsuite/gcc.target/riscv/zknh-sha256.c  |  29 ++
 .../gcc.target/riscv/zknh-sha512-32.c |  43 ++
 .../gcc.target/riscv/zknh-sha512-64.c |  31 ++
 gcc/testsuite/gcc.target/riscv/zksed.c|  20 +
 gcc/testsuite/gcc.target/riscv/zksh.c |  19 +
 24 files changed, 1187 insertions(+), 12 deletions(-)
 create mode 100644 gcc/config/riscv/crypto.md
 create mode 100644 gcc/config/riscv/riscv-crypto.def
 create mode 100644 gcc/config/riscv/riscv_scalar_crypto.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknd32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknd64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zkne32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zkne64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha256.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zksed.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zksh.c

-- 
2.38.1.windows.1



Patch ping

2023-02-13 Thread Jakub Jelinek via Gcc-patches
I'd like to ping a few pending patches:

https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607534.html
  - PR107846 - P1 - c-family: Account for integral promotions of left shifts 
for -Wshift-overflow warning

https://gcc.gnu.org/pipermail/gcc-patches/2023-January/610285.html
  - PR108464 - P1 - file-prefix-map: Fix up -f*-prefix-map= (3 variants)

https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611647.html
  - PR108702 - P1 - c++: Don't defer local statics initialized with constant 
expressions

https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606382.html
  - PR107703 - P3, ABI - libgcc, i386: Add __fix{,uns}bfti and __float{,un}tibf

https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606973.html
  - PR107465 - P2 - c-family: Fix up -Wsign-compare BIT_NOT_EXPR handling

https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607104.html
  - PR107465 - P2 - c-family: Incremental fix for -Wsign-compare BIT_NOT_EXPR 
handling

https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607145.html
  - PR107558 - P2 - c++: Don't clear TREE_READONLY for -fmerge-all-constants 
for non-aggregates

https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608932.html
  - PR108079 - P2 - c, c++, cgraphunit: Prevent duplicated -Wunused-value 
warnings

https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611615.html
  - PR108716 - P2 - c++, debug: Fix up locus of DW_TAG_imported_module

https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611180.html
  - PR108634 - P3 - tree: Use comdat tree_code_{type,length} even for C++11/14

https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605965.html
  - ABI - aarch64: Add bfloat16_t support for aarch64 (enabling it in GCC 14
will be harder)

Thanks

Jakub



[PATCH] LoongArch: Fix multiarch tuple canonization

2023-02-13 Thread Xi Ruoyao via Gcc-patches
Multiarch tuple will be coded in file or directory names in
multiarch-aware distros, so one ABI should have only one multiarch
tuple.  For example, "--target=loongarch64-linux-gnu --with-abi=lp64s"
and "--target=loongarch64-linux-gnusf" should both set multiarch tuple
to "loongarch64-linux-gnusf".  Before this commit,
"--target=loongarch64-linux-gnu --with-abi=lp64s --disable-multilib"
will produce wrong result (loongarch64-linux-gnu).

A recent LoongArch psABI revision mandates "loongarch64-linux-gnu" to be
used for -mabi=lp64d (instead of "loongarch64-linux-gnuf64") for some
non-technical reason [1].  Note that we cannot make
"loongarch64-linux-gnuf64" an alias for "loongarch64-linux-gnu" because
to implement such an alias, we must create thousands of symlinks in the
distro and doing so would be completely unpractical.  This commit also
aligns GCC with the revision.

Tested by building cross compilers with --enable-multiarch and multiple
combinations of --target=loongarch64-linux-gnu*, --with-abi=lp64{s,f,d},
and --{enable,disable}-multilib; and run "xgcc --print-multiarch" then
manually verify the result with eyesight.

Ok for trunk and backport to releases/gcc-12?

[1]: https://github.com/loongson/LoongArch-Documentation/pull/80

gcc/ChangeLog:

* config.gcc (triplet_abi): Set its value based on $with_abi,
instead of $target.
(la_canonical_triplet): Set it after $triplet_abi is set
correctly.
* config/loongarch/t-linux (MULTILIB_OSDIRNAMES): Make the
multiarch tuple for lp64d "loongarch64-linux-gnu" (without
"f64" suffix).
---
 gcc/config.gcc   | 14 +++---
 gcc/config/loongarch/t-linux |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 067720ac795..c070e6ecd2e 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4889,20 +4889,16 @@ case "${target}" in
case ${target} in
loongarch64-*-*-*f64)
abi_pattern="lp64d"
-   triplet_abi="f64"
;;
loongarch64-*-*-*f32)
abi_pattern="lp64f"
-   triplet_abi="f32"
;;
loongarch64-*-*-*sf)
abi_pattern="lp64s"
-   triplet_abi="sf"
;;
loongarch64-*-*-*)
abi_pattern="lp64[dfs]"
abi_default="lp64d"
-   triplet_abi=""
;;
*)
echo "Unsupported target ${target}." 1>&2
@@ -4923,9 +4919,6 @@ case "${target}" in
  ;;
esac
 
-   la_canonical_triplet="loongarch64-${triplet_os}${triplet_abi}"
-
-
# Perform initial sanity checks on --with-* options.
case ${with_arch} in
"" | loongarch64 | la464) ;; # OK, append here.
@@ -4996,6 +4989,13 @@ case "${target}" in
;;
esac
 
+   case ${with_abi} in
+ "lp64d") triplet_abi="";;
+ "lp64f") triplet_abi="f32";;
+ "lp64s") triplet_abi="sf";;
+   esac
+   la_canonical_triplet="loongarch64-${triplet_os}${triplet_abi}"
+
# Set default value for with_abiext (internal)
case ${with_abiext} in
"")
diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux
index 131c45fdced..e40da179203 100644
--- a/gcc/config/loongarch/t-linux
+++ b/gcc/config/loongarch/t-linux
@@ -40,7 +40,7 @@ ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),)
 
 MULTILIB_OSDIRNAMES = \
   mabi.lp64d=../lib64$\
-  $(call if_multiarch,:loongarch64-linux-gnuf64)
+  $(call if_multiarch,:loongarch64-linux-gnu)
 
 MULTILIB_OSDIRNAMES += \
   mabi.lp64f=../lib64/f32$\
-- 
2.39.1



Re: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread juzhe.zh...@rivai.ai
>> Yeah, I am aggree with you. Memory system access should always at least 
>> 1-byte.

>> So, consider such following code:

>> vsetvl e8,mf8 
>> vlm.v v8, a0 (v8 is a 1-bit mask (Not sure what the behavior dealing with 
>> this case))
>> vsm.v v8,a1
>> vsetvl e8,m1
>> vlm.v v8, a0  (v8 is a 8-bit mask)
>> vsm.v v8,a2
>> (Note: both vlm.v are loading same address)

>> Such asm will not happen in GCC. It will become like this since bool modes 
>> are tied:

>> vsetvl e8,mf8 
>> vlm.v v8, a0  (v8 is a 8-bit mask)
>> vsm.v v8,a0
>> vsm.v v8,a1

>> I am not sure whether it's correct. Maybe I should ask RVV ISA community.

Such case may not be appropriate to talke about. Since 1bit mask for VNx1BI is 
the minimum value.
Since the size is a poly value (1,1). It can be only be 1 bit or 1bytes or 
2bytes...etc. It's a compile-time unknown which is denpending on CPU vector 
length.
This case should be represent as this:

vsetvl e8,mf8 
vlm.v v8, a0 (v8 is a N x 1-bit mask, N is compile-time unknown))
vsm.v v8,a1
vsetvl e8,m1
vlm.v v8, a0  (v8 is a N x 8-bit mask, N is compile-time unknown)
vsm.v v8,a2
(Note: both vlm.v are loading same address)

Such asm will not happen in GCC. It will become like this since bool modes are 
tied:

vsetvl e8,mf8 
vlm.v v8, a0 (v8 is a N x 1-bit mask, N is compile-time unknown))
vsm.v v8,a0
vsm.v v8,a1

Such asm codegen is incorrect, this is what we want to fix.



juzhe.zh...@rivai.ai
 
From: juzhe.zh...@rivai.ai
Date: 2023-02-13 18:28
To: richard.sandiford
CC: rguenther; incarnation.p.lee; gcc-patches; Kito.cheng; ams
Subject: Re: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
Yeah, I am aggree with you. Memory system access should always at least 1-byte.

So, consider such following code:

vsetvl e8,mf8 
vlm.v v8, a0 (v8 is a 1-bit mask (Not sure what the behavior dealing with this 
case))
vsm.v v8,a1
vsetvl e8,m1
vlm.v v8, a0  (v8 is a 8-bit mask)
vsm.v v8,a2
(Note: both vlm.v are loading same address)

Such asm will not happen in GCC. It will become like this since bool modes are 
tied:

vsetvl e8,mf8 
vlm.v v8, a0  (v8 is a 8-bit mask)
vsm.v v8,a0
vsm.v v8,a1

I am not sure whether it's correct. Maybe I should ask RVV ISA community.



juzhe.zh...@rivai.ai
 
From: Richard Sandiford
Date: 2023-02-13 18:18
To: juzhe.zhong\@rivai.ai
CC: rguenther; incarnation.p.lee; gcc-patches; Kito.cheng; ams
Subject: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
"juzhe.zh...@rivai.ai"  writes:
>>> What's the byte size of VNx1BI, expressed as a function of N?
>>> If it's CEIL (N, 8) then we don't have a way of representing that yet.
> N is a poly value.
> RVV like SVE support scalable vector.
> the N is poly (1,1).
>
> VNx1B mode nunits = poly(1,1) units.
> VNx1B mode bitsize =poly (1,1) bitsize.
> VNx1B mode bytesize = poly(1,1) units (currently). Ideally and more accurate, 
> it should be VNx1B mode bytesize =poly (1/8,1/8).
 
But this would be a fractional bytesize, and like Richard says,
the memory subsystem would always access full bytes.  So I think
the bytesize would have to be at least CEIL (N, 8).
 
> However, it can't represent it like this. GCC consider its bytesize as  poly 
> (1,1) bytesize.
 
Ah, OK.  That (making the size N bytes) does seem like a reasonable
workaround, provided that it matches the C types, etc.  So the total
amount of padding is 7N bits (I assume at the msb of the type when
viewed as an integer).
 
I agree that what (IIUC) was discussed upthread works, i.e.:
 
  bytesize = N
  bitsize = N * 8 (fixed function of bytesize)
  precision = N
  nunits = N
  unit_size = 1
  unit_precision = 1
 
But target-independent code won't expect this layout, so supporting
it will involve more than just adjusting the parameters.
 
Thanks,
Richard
 


Re: [PATCH] s390: Add LEN_LOAD/LEN_STORE support.

2023-02-13 Thread Andreas Krebbel via Gcc-patches
On 2/2/23 09:43, Robin Dapp wrote:
> Hi,
> 
> this patch adds LEN_LOAD/LEN_STORE support for z14 and newer.
> It defines a bias value of -1 and implements the LEN_LOAD and LEN_STORE
> optabs.
> 
> It also includes various vll/vstl testcases adapted from Kewen Lin's patch
> for Power.
> 
> Bootstrapped and regtested on z13-z16.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>   * config/s390/predicates.md (vll_bias_operand): Add -1 bias.
>   * config/s390/s390.cc (s390_option_override_internal): Make
>   partial vector usage the default from z13 on.
>   * config/s390/vector.md (len_load_v16qi): Add.
>   (len_store_v16qi): Add.

...

> +;
> +; Implement len_load/len_store optabs with vll/vstl.
> +(define_expand "len_load_v16qi"
> +  [(match_operand:V16QI 0 "register_operand")
> +   (match_operand:V16QI 1 "memory_operand")
> +   (match_operand:QI 2 "register_operand")
> +   (match_operand:QI 3 "vll_bias_operand")
> +  ]
> +  "TARGET_VX && TARGET_64BIT"
> +{
> +  rtx src1 = XEXP (operands[1], 0);
> +  rtx src = gen_reg_rtx (Pmode);
> +  emit_move_insn (src, src1);
> +  rtx mem = gen_rtx_MEM (BLKmode, src);

Do you really need a copy of the address register? Couldn't you just do a
src = adjust_address (operands[1], BLKmode, 0);

> +
> +  rtx len = gen_lowpart (SImode, operands[2]);
> +  emit_insn (gen_vllv16qi (operands[0], len, mem));

You create a paradoxical subreg of the QImode input but vll actually uses the 
whole 32 bit value.
Couldn't we end up with uninitialized bytes being used as part of the length 
then? Do we need a
zero-extend here?

Bye,

Andreas



[Ping] ifcvt: Fix regression in aarch64/fcsel_1.c

2023-02-13 Thread Richard Sandiford via Gcc-patches
Ping for the patch below



aarch64/fcsel_1.c contains:

double
f_2 (double a, double b, double c, double d)
{
  if (a > b)
return c;
  else
return d;
}

which started failing in the GCC 12 timeframe.  When it passed,
the RTL had the form:

[A]
  (set (reg ret) (reg c))
  (set (pc) (if_then_else (gt ...) (label_ref ret) (pc)))
edge to ret, fallthru to else
else:
  (set (reg ret) (reg d))
fallthru to ret
ret:
  ...exit...

i.e. a branch around.  Now the RTL has form:

[B]
  (set (reg ret) (reg d))
  (set (pc) (if_then_else (gt ...) (label_ref then) (pc)))
edge to then, fallthru to ret
ret:
  ...exit...

then:
  (set (reg ret) (reg c))
edge to ret

i.e. a branch out.

Both are valid, of course, and there's no easy way to predict
which we'll get.  But ifcvt canonicalises its representation on:

  if (cond) goto fallthru else goto non-fallthru

That is, it canoncalises on the branch-around case for half-diamonds.
It therefore wants to invert the comparison in [B] to get:

  if (...) goto ret else goto then

But that isn't possible for strict FP gt, so the optimisation fails.

Canonicalising on the branch-around case seems like the wrong choice for
half diamonds.  The natural way of expressing a conditional branch is
for the label_ref to be the "then" destination and pc to be the "else"
destination.  And the natural choice of condition seems to be the one
under which extra stuff *is* done, rather than the one under which extra
stuff *isn't* done.  But that decision goes back at least 20 years and
it doesn't seem like a good idea to change it in stage 4.

This patch instead allows the internal structure to store the
condition in inverted form.  For simplicity it handles only
conditional moves, which is the one case that is needed
to fix the known regression.  (There are probably unknown
regressions too, but still.)

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


gcc/
* ifcvt.h (noce_if_info::cond_inverted): New field.
* ifcvt.cc (cond_move_convert_if_block): Swap the then and else
values when cond_inverted is true.
(noce_find_if_block): Allow the condition to be inverted when
handling conditional moves.
---
 gcc/ifcvt.cc | 31 +++
 gcc/ifcvt.h  |  8 
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 008796838f7..63ef42b3c34 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -4253,6 +4253,9 @@ cond_move_convert_if_block (struct noce_if_info *if_infop,
e = dest;
}
 
+  if (if_infop->cond_inverted)
+   std::swap (t, e);
+
   target = noce_emit_cmove (if_infop, dest, code, cond_arg0, cond_arg1,
t, e);
   if (!target)
@@ -4405,7 +4408,6 @@ noce_find_if_block (basic_block test_bb, edge then_edge, 
edge else_edge,
   basic_block then_bb, else_bb, join_bb;
   bool then_else_reversed = false;
   rtx_insn *jump;
-  rtx cond;
   rtx_insn *cond_earliest;
   struct noce_if_info if_info;
   bool speed_p = optimize_bb_for_speed_p (test_bb);
@@ -4481,25 +4483,28 @@ noce_find_if_block (basic_block test_bb, edge 
then_edge, edge else_edge,
   if (! onlyjump_p (jump))
 return FALSE;
 
-  /* If this is not a standard conditional jump, we can't parse it.  */
-  cond = noce_get_condition (jump, &cond_earliest, then_else_reversed);
-  if (!cond)
-return FALSE;
-
-  /* We must be comparing objects whose modes imply the size.  */
-  if (GET_MODE (XEXP (cond, 0)) == BLKmode)
-return FALSE;
-
   /* Initialize an IF_INFO struct to pass around.  */
   memset (&if_info, 0, sizeof if_info);
   if_info.test_bb = test_bb;
   if_info.then_bb = then_bb;
   if_info.else_bb = else_bb;
   if_info.join_bb = join_bb;
-  if_info.cond = cond;
+  if_info.cond = noce_get_condition (jump, &cond_earliest,
+then_else_reversed);;
   rtx_insn *rev_cond_earliest;
   if_info.rev_cond = noce_get_condition (jump, &rev_cond_earliest,
 !then_else_reversed);
+  if (!if_info.cond && !if_info.rev_cond)
+return FALSE;
+  if (!if_info.cond)
+{
+  std::swap (if_info.cond, if_info.rev_cond);
+  std::swap (cond_earliest, rev_cond_earliest);
+  if_info.cond_inverted = true;
+}
+  /* We must be comparing objects whose modes imply the size.  */
+  if (GET_MODE (XEXP (if_info.cond, 0)) == BLKmode)
+return FALSE;
   gcc_assert (if_info.rev_cond == NULL_RTX
  || rev_cond_earliest == cond_earliest);
   if_info.cond_earliest = cond_earliest;
@@ -4518,7 +4523,9 @@ noce_find_if_block (basic_block test_bb, edge then_edge, 
edge else_edge,
 
   /* Do the real work.  */
 
-  if (noce_process_if_block (&if_info))
+  /* ??? noce_process_if_block has not yet been updated to handle
+ inverted conditions.  */
+  if (!if_info.cond_inverted && noce_process_if_block (&if_info))
 return TRUE;
 
   if (HAVE_conditional_

Re: [PATCH] IBM zSystems: Fix predicate execute_operation

2023-02-13 Thread Andreas Krebbel via Gcc-patches
On 2/11/23 17:10, Stefan Schulze Frielinghaus wrote:
> Use constrain_operands in order to check whether there exists a valid
> alternative instead of extract_constrain_insn which ICEs in case no
> alternative is found.
> 
> Bootstrapped and regtested on IBM zSystems.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/predicates.md (execute_operation): Use
>   constrain_operands instead of extract_constrain_insn in order to
>   determine wheter there exists a valid alternative.

Ok. Thanks!

Andreas



[Ping^3] gomp: Various fixes for SVE types [PR101018]

2023-02-13 Thread Richard Sandiford via Gcc-patches
Ping^3 [https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606741.html]



Various parts of the omp code checked whether the size of a decl
was an INTEGER_CST in order to determine whether the decl was
variable-sized or not.  If it was variable-sized, it was expected
to have a DECL_VALUE_EXPR replacement, as for VLAs.

This patch uses poly_int_tree_p instead, so that variable-length
SVE vectors are treated like constant-length vectors.  This means
that some structures become poly_int-sized, with some fields at
poly_int offsets, but we already have code to handle that.

An alternative would have been to handle the data via indirection
instead.  However, that's likely to be more complicated, and it
would contradict is_variable_sized, which already uses a check
for TREE_CONSTANT rather than INTEGER_CST.

gimple_add_tmp_var should probably not add a safelen of 1
for SVE vectors, but that's really a separate thing and might
be hard to test.

Tested on aarch64-linux-gnu.  OK to install?

Richard


gcc/
PR middle-end/101018
* poly-int.h (can_and_p): New function.
* fold-const.cc (poly_int_binop): Use it to optimize BIT_AND_EXPRs
involving POLY_INT_CSTs.
* expr.cc (get_inner_reference): Fold poly_uint64 size_trees
into the constant bitsize.
* gimplify.cc (gimplify_bind_expr): Use poly_int_tree_p instead
of INTEGER_CST when checking for constant-sized omp data.
(omp_add_variable): Likewise.
(omp_notice_variable): Likewise.
(gimplify_adjust_omp_clauses_1): Likewise.
(gimplify_adjust_omp_clauses): Likewise.
* omp-low.cc (scan_sharing_clauses): Likewise.
(lower_omp_target): Likewise.

gcc/testsuite/
PR middle-end/101018
* gcc.target/aarch64/sve/acle/pr101018-1.c: New test.
* gcc.target/aarch64/sve/acle/pr101018-2.c: Likewise
---
 gcc/expr.cc   |  4 +--
 gcc/fold-const.cc |  7 +
 gcc/gimplify.cc   | 23 
 gcc/omp-low.cc| 10 +++
 gcc/poly-int.h| 19 +
 .../aarch64/sve/acle/general/pr101018-1.c | 27 +++
 .../aarch64/sve/acle/general/pr101018-2.c | 23 
 7 files changed, 94 insertions(+), 19 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr101018-1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr101018-2.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index d9407432ea5..a304c583d16 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7941,10 +7941,10 @@ get_inner_reference (tree exp, poly_int64_pod *pbitsize,
 
   if (size_tree != 0)
 {
-  if (! tree_fits_uhwi_p (size_tree))
+  if (! tree_fits_poly_uint64_p (size_tree))
mode = BLKmode, *pbitsize = -1;
   else
-   *pbitsize = tree_to_uhwi (size_tree);
+   *pbitsize = tree_to_poly_uint64 (size_tree);
 }
 
   *preversep = reverse_storage_order_for_component_p (exp);
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index b89cac91cae..000600017e2 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -1183,6 +1183,13 @@ poly_int_binop (poly_wide_int &res, enum tree_code code,
return false;
   break;
 
+case BIT_AND_EXPR:
+  if (TREE_CODE (arg2) != INTEGER_CST
+ || !can_and_p (wi::to_poly_wide (arg1), wi::to_wide (arg2),
+&res))
+   return false;
+  break;
+
 case BIT_IOR_EXPR:
   if (TREE_CODE (arg2) != INTEGER_CST
  || !can_ior_p (wi::to_poly_wide (arg1), wi::to_wide (arg2),
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index f06ce3cc77a..096738c8ed4 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -7352,7 +7352,7 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree 
decl, unsigned int flags)
   /* When adding a variable-sized variable, we have to handle all sorts
  of additional bits of data: the pointer replacement variable, and
  the parameters of the type.  */
-  if (DECL_SIZE (decl) && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+  if (DECL_SIZE (decl) && !poly_int_tree_p (DECL_SIZE (decl)))
 {
   /* Add the pointer replacement variable as PRIVATE if the variable
 replacement is private, else FIRSTPRIVATE since we'll need the
@@ -8002,7 +8002,8 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree 
decl, bool in_code)
   && (flags & (GOVD_SEEN | GOVD_LOCAL)) == GOVD_SEEN
   && DECL_SIZE (decl))
 {
-  if (TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+  tree size;
+  if (!poly_int_tree_p (DECL_SIZE (decl)))
{
  splay_tree_node n2;
  tree t = DECL_VALUE_EXPR (decl);
@@ -8013,16 +8014,14 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree 
decl, bool in_code)
  n2->value |= GOVD_SEEN;
}
   else if (omp_privatize_by_reference (decl)
- 

[PATCH 0/5] RISC-V: Implement Scalar Cryptography Extension

2023-02-13 Thread Liao Shihua
This patch implement RISC-V Scalar Cryptography extension.
It includes machine descrption , intrinsic and testcase .

Liao Shihua (5):
  Add prototypes for RISC-V Crypto built-in functions
  Implement ZBKB, ZBKC and ZBKX extensions
  Implement ZKND and ZKNE extensions
  Implement ZKNH extensions
  Implement ZKSH and ZKSED extensions

 gcc/config.gcc|   2 +-
 gcc/config/riscv/bitmanip.md  |  20 +-
 gcc/config/riscv/constraints.md   |   8 +
 gcc/config/riscv/crypto.md| 437 ++
 gcc/config/riscv/riscv-builtins.cc|  26 ++
 gcc/config/riscv/riscv-crypto.def |  94 
 gcc/config/riscv/riscv-ftypes.def |  10 +
 gcc/config/riscv/riscv.md |   4 +-
 gcc/config/riscv/riscv_scalar_crypto.h| 218 +
 gcc/testsuite/gcc.target/riscv/zbkb32.c   |  36 ++
 gcc/testsuite/gcc.target/riscv/zbkb64.c   |  28 ++
 gcc/testsuite/gcc.target/riscv/zbkc32.c   |  18 +
 gcc/testsuite/gcc.target/riscv/zbkc64.c   |  17 +
 gcc/testsuite/gcc.target/riscv/zbkx32.c   |  19 +
 gcc/testsuite/gcc.target/riscv/zbkx64.c   |  18 +
 gcc/testsuite/gcc.target/riscv/zknd32.c   |  18 +
 gcc/testsuite/gcc.target/riscv/zknd64.c   |  36 ++
 gcc/testsuite/gcc.target/riscv/zkne32.c   |  18 +
 gcc/testsuite/gcc.target/riscv/zkne64.c   |  30 ++
 gcc/testsuite/gcc.target/riscv/zknh-sha256.c  |  29 ++
 .../gcc.target/riscv/zknh-sha512-32.c |  43 ++
 .../gcc.target/riscv/zknh-sha512-64.c |  31 ++
 gcc/testsuite/gcc.target/riscv/zksed.c|  20 +
 gcc/testsuite/gcc.target/riscv/zksh.c |  19 +
 24 files changed, 1187 insertions(+), 12 deletions(-)
 create mode 100644 gcc/config/riscv/crypto.md
 create mode 100644 gcc/config/riscv/riscv-crypto.def
 create mode 100644 gcc/config/riscv/riscv_scalar_crypto.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknd32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknd64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zkne32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zkne64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha256.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zksed.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zksh.c

-- 
2.38.1.windows.1



[PATCH 4/5] RISC-V: Implement ZKNH extensions

2023-02-13 Thread Liao Shihua
  Implement ZKNH extensions.
  ZKNH is NIST Suite: Hash Function Instructions.

gcc/ChangeLog:

* config/riscv/crypto.md (riscv_sha256sig0_):Add ZKNH's 
instructions.
(riscv_sha256sig1_): Likewise.
(riscv_sha256sum0_): Likewise.
(riscv_sha256sum1_): Likewise.
(riscv_sha512sig0h): Likewise.
(riscv_sha512sig0l): Likewise.
(riscv_sha512sig1h): Likewise.
(riscv_sha512sig1l): Likewise.
(riscv_sha512sum0r): Likewise.
(riscv_sha512sum1r): Likewise.
(riscv_sha512sig0): Likewise.
(riscv_sha512sig1): Likewise.
(riscv_sha512sum0): Likewise.
(riscv_sha512sum1): Likewise.
* config/riscv/riscv-builtins.cc (AVAIL): Add ZKNH's AVAIL.
* config/riscv/riscv-crypto.def (RISCV_BUILTIN): Add ZKNH's built-in 
functions.
(DIRECT_BUILTIN): Likewise.
* config/riscv/riscv_scalar_crypto.h (__riscv_sha256sig0): Add ZKNH's 
intrinsics.
(__riscv_sha256sig1): Likewise.
(__riscv_sha256sum0): Likewise.
(__riscv_sha256sum1): Likewise.
(__riscv_sha512sig0h): Likewise.
(__riscv_sha512sig0l): Likewise.
(__riscv_sha512sig1h): Likewise.
(__riscv_sha512sig1l): Likewise.
(__riscv_sha512sum0r): Likewise.
(__riscv_sha512sum1r): Likewise.
(__riscv_sha512sig0): Likewise.
(__riscv_sha512sig1): Likewise.
(__riscv_sha512sum0): Likewise.
(__riscv_sha512sum1): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zknh-sha256.c: New test.
* gcc.target/riscv/zknh-sha512-32.c: New test.
* gcc.target/riscv/zknh-sha512-64.c: New test.

---
 gcc/config/riscv/crypto.md| 138 ++
 gcc/config/riscv/riscv-builtins.cc|   2 +
 gcc/config/riscv/riscv-crypto.def |  21 +++
 gcc/config/riscv/riscv_scalar_crypto.h|  48 ++
 gcc/testsuite/gcc.target/riscv/zknh-sha256.c  |  29 
 .../gcc.target/riscv/zknh-sha512-32.c |  43 ++
 .../gcc.target/riscv/zknh-sha512-64.c |  31 
 7 files changed, 312 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha256.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-64.c

diff --git a/gcc/config/riscv/crypto.md b/gcc/config/riscv/crypto.md
index a97fd398217..236eba69e46 100644
--- a/gcc/config/riscv/crypto.md
+++ b/gcc/config/riscv/crypto.md
@@ -48,6 +48,22 @@
 UNSPEC_AES_ESM
 UNSPEC_AES_ESI
 UNSPEC_AES_ESMI
+
+;; ZKNH unspecs
+UNSPEC_SHA_256_SIG0
+UNSPEC_SHA_256_SIG1
+UNSPEC_SHA_256_SUM0
+UNSPEC_SHA_256_SUM1
+UNSPEC_SHA_512_SIG0
+UNSPEC_SHA_512_SIG0H
+UNSPEC_SHA_512_SIG0L
+UNSPEC_SHA_512_SIG1
+UNSPEC_SHA_512_SIG1H
+UNSPEC_SHA_512_SIG1L
+UNSPEC_SHA_512_SUM0
+UNSPEC_SHA_512_SUM0R
+UNSPEC_SHA_512_SUM1
+UNSPEC_SHA_512_SUM1R
 ])
 
 ;; ZBKB extension
@@ -249,3 +265,125 @@
   [(set_attr "type" "crypto")])
 
 
+
+;; ZKNH - SHA256
+
+(define_insn "riscv_sha256sig0_"
+  [(set (match_operand:X 0 "register_operand" "=r")
+(unspec:X [(match_operand:X 1 "register_operand" "r")]
+  UNSPEC_SHA_256_SIG0))]
+  "TARGET_ZKNH"
+  "sha256sig0\t%0,%1"
+  [(set_attr "type" "crypto")])
+
+(define_insn "riscv_sha256sig1_"
+  [(set (match_operand:X 0 "register_operand" "=r")
+(unspec:X [(match_operand:X 1 "register_operand" "r")]
+  UNSPEC_SHA_256_SIG1))]
+  "TARGET_ZKNH"
+  "sha256sig1\t%0,%1"
+  [(set_attr "type" "crypto")])
+
+(define_insn "riscv_sha256sum0_"
+  [(set (match_operand:X 0 "register_operand" "=r")
+(unspec:X [(match_operand:X 1 "register_operand" "r")]
+  UNSPEC_SHA_256_SUM0))]
+  "TARGET_ZKNH"
+  "sha256sum0\t%0,%1"
+  [(set_attr "type" "crypto")])
+
+(define_insn "riscv_sha256sum1_"
+  [(set (match_operand:X 0 "register_operand" "=r")
+(unspec:X [(match_operand:X 1 "register_operand" "r")]
+  UNSPEC_SHA_256_SUM1))]
+  "TARGET_ZKNH"
+  "sha256sum1\t%0,%1"
+  [(set_attr "type" "crypto")])
+
+;; ZKNH - SHA512
+
+(define_insn "riscv_sha512sig0h"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+   (match_operand:SI 2 "register_operand" "r")]
+   UNSPEC_SHA_512_SIG0H))]
+  "TARGET_ZKNH && !TARGET_64BIT"
+  "sha512sig0h\t%0,%1,%2"
+  [(set_attr "type" "crypto")])
+
+(define_insn "riscv_sha512sig0l"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+   (match_operand:SI 2 "register_operand" "r")]
+   UNSPEC_SHA_512_SIG0L))]
+  "TARGET_ZKNH && !TARGET_64BIT"
+  "sha512sig0l\t%0,%1,%2"
+  [(set_attr "type" "crypto")])
+
+(define_insn "riscv_sha512sig1h"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+(unspec:

[PATCH 1/5] RISC-V: Add prototypes for RISC-V Crypto built-in functions

2023-02-13 Thread Liao Shihua
Add prototypes for RISC-V Crypto built-in functions .

gcc/ChangeLog:

* config/riscv/riscv-builtins.cc (RISCV_FTYPE_NAME2): New enumeration 
identifier.
(RISCV_FTYPE_NAME3): Likewise.
(RISCV_ATYPE_QI): New Argument types.
(RISCV_ATYPE_HI): Likewise.
(RISCV_FTYPE_ATYPES2): New RISCV_ATYPE.
(RISCV_FTYPE_ATYPES3): Likewise.
* config/riscv/riscv-ftypes.def (2): New Definitions of prototypes.
(3):Likewise
---
 gcc/config/riscv/riscv-builtins.cc |  8 
 gcc/config/riscv/riscv-ftypes.def  | 10 ++
 2 files changed, 18 insertions(+)

diff --git a/gcc/config/riscv/riscv-builtins.cc 
b/gcc/config/riscv/riscv-builtins.cc
index 25ca407f9a9..ded91e17554 100644
--- a/gcc/config/riscv/riscv-builtins.cc
+++ b/gcc/config/riscv/riscv-builtins.cc
@@ -42,6 +42,8 @@ along with GCC; see the file COPYING3.  If not see
 /* Macros to create an enumeration identifier for a function prototype.  */
 #define RISCV_FTYPE_NAME0(A) RISCV_##A##_FTYPE
 #define RISCV_FTYPE_NAME1(A, B) RISCV_##A##_FTYPE_##B
+#define RISCV_FTYPE_NAME2(A, B, C) RISCV_##A##_FTYPE_##B##_##C
+#define RISCV_FTYPE_NAME3(A, B, C, D) RISCV_##A##_FTYPE_##B##_##C##_##D
 
 /* Classifies the prototype of a built-in function.  */
 enum riscv_function_type {
@@ -132,6 +134,8 @@ AVAIL (always, (!0))
 /* Argument types.  */
 #define RISCV_ATYPE_VOID void_type_node
 #define RISCV_ATYPE_USI unsigned_intSI_type_node
+#define RISCV_ATYPE_QI intQI_type_node
+#define RISCV_ATYPE_HI intHI_type_node
 #define RISCV_ATYPE_SI intSI_type_node
 #define RISCV_ATYPE_DI intDI_type_node
 #define RISCV_ATYPE_VOID_PTR ptr_type_node
@@ -142,6 +146,10 @@ AVAIL (always, (!0))
   RISCV_ATYPE_##A
 #define RISCV_FTYPE_ATYPES1(A, B) \
   RISCV_ATYPE_##A, RISCV_ATYPE_##B
+#define RISCV_FTYPE_ATYPES2(A, B, C) \
+  RISCV_ATYPE_##A, RISCV_ATYPE_##B, RISCV_ATYPE_##C
+#define RISCV_FTYPE_ATYPES3(A, B, C, D) \
+  RISCV_ATYPE_##A, RISCV_ATYPE_##B, RISCV_ATYPE_##C, RISCV_ATYPE_##D
 
 static const struct riscv_builtin_description riscv_builtins[] = {
   #include "riscv-cmo.def"
diff --git a/gcc/config/riscv/riscv-ftypes.def 
b/gcc/config/riscv/riscv-ftypes.def
index 3a40c33e7c2..3b518195a29 100644
--- a/gcc/config/riscv/riscv-ftypes.def
+++ b/gcc/config/riscv/riscv-ftypes.def
@@ -32,3 +32,13 @@ DEF_RISCV_FTYPE (1, (VOID, USI))
 DEF_RISCV_FTYPE (1, (VOID, VOID_PTR))
 DEF_RISCV_FTYPE (1, (SI, SI))
 DEF_RISCV_FTYPE (1, (DI, DI))
+DEF_RISCV_FTYPE (2, (SI, QI, QI))
+DEF_RISCV_FTYPE (2, (SI, HI, HI))
+DEF_RISCV_FTYPE (2, (SI, SI, SI))
+DEF_RISCV_FTYPE (2, (DI, QI, QI))
+DEF_RISCV_FTYPE (2, (DI, HI, HI))
+DEF_RISCV_FTYPE (2, (DI, SI, SI))
+DEF_RISCV_FTYPE (2, (DI, DI, SI))
+DEF_RISCV_FTYPE (2, (DI, DI, DI))
+DEF_RISCV_FTYPE (3, (SI, SI, SI, SI))
+DEF_RISCV_FTYPE (3, (DI, DI, DI, SI))
-- 
2.38.1.windows.1



[PATCH 5/5] RISC-V: Implement ZKSH and ZKSED extensions

2023-02-13 Thread Liao Shihua
  Implement ZKSH and ZKSED extensions.
  ZKSH  is  ShangMi Suite: SM3 Hash Function Instructions.
  ZKSED is ShangMi Suite: SM4 Block Cipher Instructions.

gcc/ChangeLog:

* config/riscv/crypto.md (riscv_sm3p0_): Add ZKSH's and ZKSED's 
instructions.
(riscv_sm3p1_): Likewise.
(riscv_sm4ed_): Likewise.
(riscv_sm4ks_): Likewise.
* config/riscv/riscv-builtins.cc (AVAIL): Add ZKSH's and ZKSED's AVAIL.
* config/riscv/riscv-crypto.def (RISCV_BUILTIN): Add ZKSH's and ZKSED's 
built-in functions.
* config/riscv/riscv_scalar_crypto.h (__riscv_sm4ks): Add ZKSH's and 
ZKSED's intrinsics.
(__riscv_sm4ed): Likewise.
(__riscv_sm3p0): Likewise.
(__riscv_sm3p1): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zksed.c: New test.
* gcc.target/riscv/zksh.c: New test.
---
 gcc/config/riscv/crypto.md | 48 ++
 gcc/config/riscv/riscv-builtins.cc |  4 +++
 gcc/config/riscv/riscv-crypto.def  | 12 +++
 gcc/config/riscv/riscv_scalar_crypto.h | 19 ++
 gcc/testsuite/gcc.target/riscv/zksed.c | 20 +++
 gcc/testsuite/gcc.target/riscv/zksh.c  | 19 ++
 6 files changed, 122 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zksed.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zksh.c

diff --git a/gcc/config/riscv/crypto.md b/gcc/config/riscv/crypto.md
index 236eba69e46..564a685d690 100644
--- a/gcc/config/riscv/crypto.md
+++ b/gcc/config/riscv/crypto.md
@@ -64,6 +64,14 @@
 UNSPEC_SHA_512_SUM0R
 UNSPEC_SHA_512_SUM1
 UNSPEC_SHA_512_SUM1R
+
+;; ZKSH unspecs
+UNSPEC_SM3_P0
+UNSPEC_SM3_P1
+
+;; ZKSED unspecs
+UNSPEC_SM4_ED
+UNSPEC_SM4_KS
 ])
 
 ;; ZBKB extension
@@ -387,3 +395,43 @@
   "TARGET_ZKNH && TARGET_64BIT"
   "sha512sum1\t%0,%1"
   [(set_attr "type" "crypto")])
+
+ ;; ZKSH
+
+(define_insn "riscv_sm3p0_"
+  [(set (match_operand:X 0 "register_operand" "=r")
+(unspec:X [(match_operand:X 1 "register_operand" "r")]
+  UNSPEC_SM3_P0))]
+  "TARGET_ZKSH"
+  "sm3p0\t%0,%1"
+  [(set_attr "type" "crypto")])
+
+(define_insn "riscv_sm3p1_"
+  [(set (match_operand:X 0 "register_operand" "=r")
+(unspec:X [(match_operand:X 1 "register_operand" "r")]
+  UNSPEC_SM3_P1))]
+  "TARGET_ZKSH"
+  "sm3p1\t%0,%1"
+  [(set_attr "type" "crypto")])
+
+;; ZKSED 
+
+(define_insn "riscv_sm4ed_"
+  [(set (match_operand:X 0 "register_operand" "=r")
+(unspec:X [(match_operand:X 1 "register_operand" "r")
+  (match_operand:X 2 "register_operand" "r")
+  (match_operand:SI 3 "register_operand" "D03")]
+  UNSPEC_SM4_ED))]
+  "TARGET_ZKSED"
+  "sm4ed\t%0,%1,%2,%3"
+  [(set_attr "type" "crypto")])
+
+(define_insn "riscv_sm4ks_"
+  [(set (match_operand:X 0 "register_operand" "=r")
+(unspec:X [(match_operand:X 1 "register_operand" "r")
+  (match_operand:X 2 "register_operand" "r")
+  (match_operand:SI 3 "register_operand" "D03")]
+  UNSPEC_SM4_KS))]
+  "TARGET_ZKSED"
+  "sm4ks\t%0,%1,%2,%3"
+  [(set_attr "type" "crypto")])
diff --git a/gcc/config/riscv/riscv-builtins.cc 
b/gcc/config/riscv/riscv-builtins.cc
index 2a35167e6fb..18c0cce6b8b 100644
--- a/gcc/config/riscv/riscv-builtins.cc
+++ b/gcc/config/riscv/riscv-builtins.cc
@@ -113,6 +113,10 @@ AVAIL (crypto_zkne64, TARGET_ZKNE && TARGET_64BIT)
 AVAIL (crypto_zkne_or_zknd, (TARGET_ZKNE || TARGET_ZKND) && TARGET_64BIT)
 AVAIL (crypto_zknh32, TARGET_ZKNH && !TARGET_64BIT)
 AVAIL (crypto_zknh64, TARGET_ZKNH && TARGET_64BIT)
+AVAIL (crypto_zksh32, TARGET_ZKSH && !TARGET_64BIT)
+AVAIL (crypto_zksh64, TARGET_ZKSH && TARGET_64BIT)
+AVAIL (crypto_zksed32, TARGET_ZKSED && !TARGET_64BIT)
+AVAIL (crypto_zksed64, TARGET_ZKSED && TARGET_64BIT)
 AVAIL (always, (!0))
 
 /* Construct a riscv_builtin_description from the given arguments.
diff --git a/gcc/config/riscv/riscv-crypto.def 
b/gcc/config/riscv/riscv-crypto.def
index 831ab8c0d01..7774b801aec 100644
--- a/gcc/config/riscv/riscv-crypto.def
+++ b/gcc/config/riscv/riscv-crypto.def
@@ -80,3 +80,15 @@ DIRECT_BUILTIN (sha512sig0, RISCV_DI_FTYPE_DI, 
crypto_zknh64),
 DIRECT_BUILTIN (sha512sig1, RISCV_DI_FTYPE_DI, crypto_zknh64),
 DIRECT_BUILTIN (sha512sum0, RISCV_DI_FTYPE_DI, crypto_zknh64),
 DIRECT_BUILTIN (sha512sum1, RISCV_DI_FTYPE_DI, crypto_zknh64),
+
+// ZKSH
+RISCV_BUILTIN (sm3p0_si, "sm3p0", RISCV_BUILTIN_DIRECT, RISCV_SI_FTYPE_SI, 
crypto_zksh32),
+RISCV_BUILTIN (sm3p0_di, "sm3p0", RISCV_BUILTIN_DIRECT, RISCV_DI_FTYPE_DI, 
crypto_zksh64),
+RISCV_BUILTIN (sm3p1_si, "sm3p1", RISCV_BUILTIN_DIRECT, RISCV_SI_FTYPE_SI, 
crypto_zksh32),
+RISCV_BUILTIN (sm3p1_di, "sm3p1", RISCV_BUILTIN_DIRECT, RISCV_DI_FTYPE_DI, 
crypto_zksh64),
+
+// ZKSED
+RISCV_BUILTIN (sm4ed_si, "sm4ed", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE_SI_SI_SI, crypto_zksed32),
+RISCV_BUILTIN (sm4ed_di, "sm4ed", RISCV_BUILTIN_DIRECT

[PATCH 3/5] RISC-V: Implement ZKND and ZKNE extensions

2023-02-13 Thread Liao Shihua
  Implement ZKND and ZKNE extensions.
  ZKND is NIST Suite: AES Decryption.
  ZKNE is NIST Suite: AES Encryption.
  
gcc/ChangeLog:

* config/riscv/constraints.md (D03): New constraints of bs.
(DsA):New constraints of rnum.
* config/riscv/crypto.md (riscv_aes32dsi):Add ZKND,ZKNE instructions.
(riscv_aes32dsmi): Likewise.
(riscv_aes64ds): Likewise.
(riscv_aes64dsm): Likewise.
(riscv_aes64im): Likewise.
(riscv_aes64ks1i): Likewise.
(riscv_aes64ks2): Likewise.
(riscv_aes32esi): Likewise.
(riscv_aes32esmi): Likewise.
(riscv_aes64es): Likewise.
(riscv_aes64esm): Likewise.
* config/riscv/riscv-builtins.cc (AVAIL): Add ZKND's and ZKNE's AVAIL. 
* config/riscv/riscv-crypto.def (DIRECT_BUILTIN):Add ZKND's and ZKNE's 
built-in functions. 
* config/riscv/riscv_scalar_crypto.h (__riscv_aes32dsi):Add ZKND's and 
ZKNE's intrinsics. 
(__riscv_aes32dsmi): Likewise.
(__riscv_aes64ds): Likewise.
(__riscv_aes64dsm): Likewise.
(__riscv_aes64im): Likewise.
(__riscv_aes64ks1i): Likewise.
(__riscv_aes64ks2): Likewise.
(__riscv_aes32esi): Likewise.
(__riscv_aes32esmi): Likewise.
(__riscv_aes64es): Likewise.
(__riscv_aes64esm): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zknd32.c: New test.
* gcc.target/riscv/zknd64.c: New test.
* gcc.target/riscv/zkne32.c: New test.
* gcc.target/riscv/zkne64.c: New test.

---
 gcc/config/riscv/constraints.md |   8 ++
 gcc/config/riscv/crypto.md  | 120 +++-
 gcc/config/riscv/riscv-builtins.cc  |   5 +
 gcc/config/riscv/riscv-crypto.def   |  15 +++
 gcc/config/riscv/riscv_scalar_crypto.h  |  45 +
 gcc/testsuite/gcc.target/riscv/zknd32.c |  18 
 gcc/testsuite/gcc.target/riscv/zknd64.c |  36 +++
 gcc/testsuite/gcc.target/riscv/zkne32.c |  18 
 gcc/testsuite/gcc.target/riscv/zkne64.c |  30 ++
 9 files changed, 294 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknd32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zknd64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zkne32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zkne64.c

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 3637380ee47..3f46f14b10f 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -83,6 +83,14 @@
   (and (match_code "const_int")
(match_test "SINGLE_BIT_MASK_OPERAND (~ival)")))
 
+(define_constraint "D03"
+  "0, 1, 2 or 3 immediate"
+  (match_test "IN_RANGE (ival, 0, 3)"))
+
+(define_constraint "DsA"
+  "0 - 10 immediate"
+  (match_test "IN_RANGE (ival, 0, 10)"))
+
 ;; Floating-point constant +0.0, used for FCVT-based moves when FMV is
 ;; not available in RV32.
 (define_constraint "G"
diff --git a/gcc/config/riscv/crypto.md b/gcc/config/riscv/crypto.md
index 048db920bb6..a97fd398217 100644
--- a/gcc/config/riscv/crypto.md
+++ b/gcc/config/riscv/crypto.md
@@ -34,7 +34,20 @@
 UNSPEC_XPERM8
 UNSPEC_XPERM4
 
-
+;; ZKND unspecs
+UNSPEC_AES_DSI
+UNSPEC_AES_DSMI
+UNSPEC_AES_DS
+UNSPEC_AES_DSM
+UNSPEC_AES_IM
+UNSPEC_AES_KS1I
+UNSPEC_AES_KS2
+
+;; ZKNE unspecs
+UNSPEC_AES_ES
+UNSPEC_AES_ESM
+UNSPEC_AES_ESI
+UNSPEC_AES_ESMI
 ])
 
 ;; ZBKB extension
@@ -129,5 +142,110 @@
   "xperm8\t%0,%1,%2"
   [(set_attr "type" "crypto")])
 
+;; ZKND extension
+
+(define_insn "riscv_aes32dsi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+   (match_operand:SI 2 "register_operand" "r")
+   (match_operand:SI 3 "register_operand" "D03")]
+   UNSPEC_AES_DSI))]
+  "TARGET_ZKND && !TARGET_64BIT"
+  "aes32dsi\t%0,%1,%2,%3"
+  [(set_attr "type" "crypto")])
+
+(define_insn "riscv_aes32dsmi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+   (match_operand:SI 2 "register_operand" "r")
+   (match_operand:SI 3 "register_operand" "D03")]
+   UNSPEC_AES_DSMI))]
+  "TARGET_ZKND && !TARGET_64BIT"
+  "aes32dsmi\t%0,%1,%2,%3"
+  [(set_attr "type" "crypto")])
+
+(define_insn "riscv_aes64ds"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+   (match_operand:DI 2 "register_operand" "r")]
+   UNSPEC_AES_DS))]
+  "TARGET_ZKND && TARGET_64BIT"
+  "aes64ds\t%0,%1,%2"
+  [(set_attr "type" "crypto")])
+
+(define_insn "riscv_aes64dsm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+   (match_operand:DI 2 "register_operand" "r")]
+  

[PATCH 2/5] RISC-V: Implement ZBKB, ZBKC and ZBKX extensions

2023-02-13 Thread Liao Shihua
   Implement ZBKB, ZBKC and ZBKX extensions. 
   ZBKB is Bitmanip instructions for Cryptography.
   ZBKC is Carry-less multiply instructions.
   ZBKX is Crossbar permutation instructions.
   Only add Machine description and intrinsics of these instructions which are 
not defined in the first Bitmanip ratification package.
   If them defined in Bitmanip extension, it will generate by Manchine 
description in bitmanip.md.

gcc/ChangeLog:

* config.gcc: Add intrinsics header in extra_headers.
* config/riscv/bitmanip.md: Add TARGET_ZBKB if these instructions are 
included in ZBKB extension.
* config/riscv/riscv-builtins.cc (AVAIL): Add ZBKB's,ZBKC's,ZBKX's 
AVAIL. 
* config/riscv/riscv.md: include crypto.md.
* config/riscv/crypto.md: Scalar Cryptography Machine description file.
* config/riscv/riscv-crypto.def: Scalar Cryptography built-in function 
file.
* config/riscv/riscv_scalar_crypto.h: Scalar Cryptography intrinsics 
header.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zbkb32.c: New test.
* gcc.target/riscv/zbkb64.c: New test.
* gcc.target/riscv/zbkc32.c: New test.
* gcc.target/riscv/zbkc64.c: New test.
* gcc.target/riscv/zbkx32.c: New test.
* gcc.target/riscv/zbkx64.c: New test.

---
 gcc/config.gcc  |   2 +-
 gcc/config/riscv/bitmanip.md|  20 ++--
 gcc/config/riscv/crypto.md  | 133 
 gcc/config/riscv/riscv-builtins.cc  |   7 ++
 gcc/config/riscv/riscv-crypto.def   |  46 
 gcc/config/riscv/riscv.md   |   4 +-
 gcc/config/riscv/riscv_scalar_crypto.h  | 106 +++
 gcc/testsuite/gcc.target/riscv/zbkb32.c |  36 +++
 gcc/testsuite/gcc.target/riscv/zbkb64.c |  28 +
 gcc/testsuite/gcc.target/riscv/zbkc32.c |  18 
 gcc/testsuite/gcc.target/riscv/zbkc64.c |  17 +++
 gcc/testsuite/gcc.target/riscv/zbkx32.c |  19 
 gcc/testsuite/gcc.target/riscv/zbkx64.c |  18 
 13 files changed, 442 insertions(+), 12 deletions(-)
 create mode 100644 gcc/config/riscv/crypto.md
 create mode 100644 gcc/config/riscv/riscv-crypto.def
 create mode 100644 gcc/config/riscv/riscv_scalar_crypto.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx64.c

diff --git a/gcc/config.gcc b/gcc/config.gcc
index f0958e1c959..951b92b2028 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -532,7 +532,7 @@ riscv*)
extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o 
riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o riscv-vsetvl.o"
extra_objs="${extra_objs} riscv-vector-builtins.o 
riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
d_target_objs="riscv-d.o"
-   extra_headers="riscv_vector.h"
+   extra_headers="riscv_vector.h riscv_scalar_crypto.h"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.cc"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.h"
;;
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 14d18edbe62..f076ba35832 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -189,7 +189,7 @@
   [(set (match_operand:X 0 "register_operand" "=r")
 (bitmanip_bitwise:X (not:X (match_operand:X 1 "register_operand" "r"))
 (match_operand:X 2 "register_operand" "r")))]
-  "TARGET_ZBB"
+  "TARGET_ZBB || TARGET_ZBKB"
   "n\t%0,%2,%1"
   [(set_attr "type" "bitmanip")
(set_attr "mode" "")])
@@ -203,7 +203,7 @@
   (const_int 0)))
(match_operand:DI 2 "register_operand")))
(clobber (match_operand:DI 3 "register_operand"))]
-  "TARGET_ZBB"
+  "TARGET_ZBB || TARGET_ZBKB"
   [(set (match_dup 3) (ashiftrt:DI (match_dup 1) (const_int 63)))
(set (match_dup 0) (and:DI (not:DI (match_dup 3)) (match_dup 2)))])
 
@@ -211,7 +211,7 @@
   [(set (match_operand:X 0 "register_operand" "=r")
 (not:X (xor:X (match_operand:X 1 "register_operand" "r")
   (match_operand:X 2 "register_operand" "r"]
-  "TARGET_ZBB"
+  "TARGET_ZBB || TARGET_ZBKB"
   "xnor\t%0,%1,%2"
   [(set_attr "type" "bitmanip")
(set_attr "mode" "")])
@@ -277,7 +277,7 @@
   [(set (match_operand:SI 0 "register_operand" "=r")
(rotatert:SI (match_operand:SI 1 "register_operand" "r")
 (match_operand:QI 2 "arith_operand" "rI")))]
-  "TARGET_ZBB"
+  "TARGET_ZBB || TARGET_ZBKB"
   "ror%i2%~\t%0,%1,%2"
   [(set_attr "type" "bitmanip")])
 
@@ -285,7 +285,7 @@
   [(set (match_operand:DI 0 "register_operand" "=r")
(rotatert:DI (match_operand:DI 1 "registe

Re: [Ping] ifcvt: Fix regression in aarch64/fcsel_1.c

2023-02-13 Thread Richard Sandiford via Gcc-patches
Richard Sandiford via Gcc-patches  writes:
> Ping for the patch below

Ugh, somehow missed Jeff's OK over the weekend.  Sorry for the noise!

Richard


Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread Andrew Stubbs
I presume I've been CC'd on this conversation because weird vector 
architecture problems have happened to me before. :)


However, I'm not sure I can help much because AMD GCN does not use 
BImode vectors at all. This is partly because loading boolean values 
into a GCN vector would have 31 padding bits for each lane, but mostly 
because the result of comparison instructions is written to a DImode 
scalar register, not into a vector.


I did experiment, long ago, with having a V64BImode that could be stored 
in scalar registers (tieable with DImode), but there wasn't any great 
advantage and it broke VECTOR_MODE_P in most other contexts.


It's possible to store truth values in vectors as integers, and there 
are some cases where we do so (SIMD clone mask arguments, for example), 
but that's mostly to smooth things over in the middle-end.


The problem with padding bits is something I do see: V64QImode has 24 
padding bits for each lane, in register. While there are instructions 
that will load and store QImode vectors correctly, without the padding, 
the backend still has to handle all the sign-extends, zero-extends, and 
truncates explicitly, because the middle-end and expand pass give no 
assistance with that for vectors (unlike scalars).


Andrew
On 13/02/2023 08:07, Richard Biener via Gcc-patches wrote:

On Sat, 11 Feb 2023, juzhe.zh...@rivai.ai wrote:


Thanks for contributing this.
Hi, Richard. Can you help us with this issue?
In RVV, we have vbool8_t (VNx8BImode), vbool16_t (VNx4BImode), vbool32_t 
(VNx2BImode), vbool64_t (VNx1BImode)
Since we are using 1bit-mask which is 1-BOOL occupy 1bit.
According to RVV ISA, we adjust these modes as follows:

VNx8BImode poly (8,8) NUNTTS (each nunits is 1bit mask)
VNx4BImode poly(4,4) NUNTTS (each nunits is 1bit mask)
VNx2BImode poly(2,2) NUNTTS (each nunits is 1bit mask)
VNx1BImode poly (1,1) NUNTTS (each nunits is 1bit mask)


So how's VNx1BImode laid out for N == 2?  Is that still a single
byte and two consecutive bits?  I suppose so.

But then GET_MODE_PRECISION (GET_MODE_INNER (..)) should always be 1?

I'm not sure what GET_MODE_PRECISION of the vector mode itself
should be here, but then I wonder ...


If we tried GET_MODE_BITSIZE or GET_MODE_NUNITS to get value, their value are 
different.
However, If we tried GET_MODE_SIZE of these modes, they are the same (poly 
(1,1)).
Such scenario make these tied together and gives the wrong code gen since their 
bitsize are different.
Consider the case as this:
#include "riscv_vector.h"
void foo5_3 (int32_t * restrict in, int32_t * restrict out, size_t n, int cond)
{
   vint8m1_t v = *(vint8m1_t*)in;
   *(vint8m1_t*)out = v;  vbool16_t v4 = *(vbool16_t *)in;
   *(vbool16_t *)(out + 300) = v4;
   vbool8_t v3 = *(vbool8_t*)in;
   *(vbool8_t*)(out + 200) = v3;
}
The second vbool8_t load (vlm.v) is missing. Since GCC gives "v3 = VIEW_CONVERT 
(vbool8_t) v4" in gimple.
We failed to fix it in RISC-V backend. Can you help us with this? Thanks.


... why for the loads the "padding" is not loaded?  The above testcase
is probably more complicated than necessary as well?

Thanks,
Richard.
  


juzhe.zh...@rivai.ai
  
From: incarnation.p.lee

Date: 2023-02-11 16:46
To: gcc-patches
CC: juzhe.zhong; kito.cheng; rguenther; Pan Li
Subject: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
From: Pan Li 
  
Fix the bug for mode tieable of the rvv bool types. The vbool*_t

cannot be tied as the actually load/store size is determinated by
the vl. The mode size of rvv bool types are also adjusted for the
underlying optimization pass. The rvv bool type is vbool*_t, aka
vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t, vbool32_t, and
vbool64_t.
  
PR 108185

PR 108654
  
gcc/ChangeLog:
  
* config/riscv/riscv-modes.def (ADJUST_BYTESIZE):

* config/riscv/riscv.cc (riscv_v_adjust_bytesize):
(riscv_modes_tieable_p):
* config/riscv/riscv.h (riscv_v_adjust_bytesize):
* machmode.h (VECTOR_BOOL_MODE_P):
* tree-ssa-sccvn.cc (visit_reference_op_load):
  
gcc/testsuite/ChangeLog:
  
* gcc.target/riscv/pr108185-1.c: New test.

* gcc.target/riscv/pr108185-2.c: New test.
* gcc.target/riscv/pr108185-3.c: New test.
* gcc.target/riscv/pr108185-4.c: New test.
* gcc.target/riscv/pr108185-5.c: New test.
* gcc.target/riscv/pr108185-6.c: New test.
* gcc.target/riscv/pr108185-7.c: New test.
* gcc.target/riscv/pr108185-8.c: New test.
  
Signed-off-by: Pan Li 

---
gcc/config/riscv/riscv-modes.def| 14 ++--
gcc/config/riscv/riscv.cc   | 34 -
gcc/config/riscv/riscv.h|  2 +
gcc/machmode.h  |  3 +
gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108

[PATCH] tree-optimization/108691 - indirect calls to setjmp

2023-02-13 Thread Richard Biener via Gcc-patches
DCE now chokes on indirect setjmp calls becoming direct because
that exposes them too late to be subject to abnormal edge creation.
The following patch honors gimple_call_ctrl_altering for those and
_not_ treat formerly indirect calls to setjmp as calls to setjmp.

Unfortunately there's no way to have an indirect call to setjmp
properly annotated (the returns_twice attribute is ignored on types).

RTL expansion late discovers returns-twice for the purpose of
adding REG_SETJMP notes and also sets ->calls_setjmp
(instead of asserting it is set).  There's no good way to
transfer proper knowledge around here so I'm using ->calls_setjmp
as a flag to indicate whether gimple_call_ctrl_altering was set.

Comments on what's the most sensible thing to do here?  Supporting
returns_twice on indirect calls wouldn't be difficult, so we're
talking about how to handle this kind of "legacy" situation?

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK?

Thanks,
Richard.

PR tree-optimization/108691
* cfgexpand.cc (expand_call_stmt): Clear cfun->calls_setjmp
temporarily if the call is not control-altering.
* calls.cc (emit_call_1): Do not add REG_SETJMP if
cfun->calls_setjmp is not set.  Do not alter cfun->calls_setjmp.
* tree-ssa-dce.cc (eliminate_unnecessary_stmts): For calls
that are not control-altering do not set cfun->calls_setjmp.

* gcc.dg/pr108691.c: New testcase.
---
 gcc/calls.cc| 10 +++
 gcc/cfgexpand.cc|  7 +
 gcc/testsuite/gcc.dg/pr108691.c |  9 ++
 gcc/tree-ssa-dce.cc | 49 +++--
 4 files changed, 49 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr108691.c

diff --git a/gcc/calls.cc b/gcc/calls.cc
index 4d7f6c3d291..0242d52cfb3 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -506,11 +506,11 @@ emit_call_1 (rtx funexp, tree fntree ATTRIBUTE_UNUSED, 
tree fndecl ATTRIBUTE_UNU
   if (ecf_flags & ECF_NORETURN)
 add_reg_note (call_insn, REG_NORETURN, const0_rtx);
 
-  if (ecf_flags & ECF_RETURNS_TWICE)
-{
-  add_reg_note (call_insn, REG_SETJMP, const0_rtx);
-  cfun->calls_setjmp = 1;
-}
+  if (ecf_flags & ECF_RETURNS_TWICE
+  /* We rely on GIMPLE setting this flag and here use it to
+catch formerly indirect and not control-altering calls.  */
+  && cfun->calls_setjmp)
+add_reg_note (call_insn, REG_SETJMP, const0_rtx);
 
   SIBLING_CALL_P (call_insn) = ((ecf_flags & ECF_SIBCALL) != 0);
 
diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index 25b1558dcb9..ab143a6d2d3 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -2808,6 +2808,11 @@ expand_call_stmt (gcall *stmt)
   /* Must come after copying location.  */
   copy_warning (exp, stmt);
 
+  /* For calls that do not alter control flow avoid REG_SETJMP notes.  */
+  bool saved_calls_setjmp = cfun->calls_setjmp;
+  if (!gimple_call_ctrl_altering_p (stmt))
+cfun->calls_setjmp = false;
+
   /* Ensure RTL is created for debug args.  */
   if (decl && DECL_HAS_DEBUG_ARGS_P (decl))
 {
@@ -2846,6 +2851,8 @@ expand_call_stmt (gcall *stmt)
 }
 
   mark_transaction_restart_calls (stmt);
+
+  cfun->calls_setjmp = saved_calls_setjmp;
 }
 
 
diff --git a/gcc/testsuite/gcc.dg/pr108691.c b/gcc/testsuite/gcc.dg/pr108691.c
new file mode 100644
index 000..e412df10f22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr108691.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern int __attribute__((returns_twice)) setjmp(void*);
+
+void bbb(void) {
+  int (*fnptr)(void*) = setjmp;
+  fnptr(0);
+}
diff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc
index ceeb0ad5ab3..a655b06f800 100644
--- a/gcc/tree-ssa-dce.cc
+++ b/gcc/tree-ssa-dce.cc
@@ -1313,7 +1313,6 @@ eliminate_unnecessary_stmts (bool aggressive)
   basic_block bb;
   gimple_stmt_iterator gsi, psi;
   gimple *stmt;
-  tree call;
   auto_vec to_remove_edges;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -1416,50 +1415,58 @@ eliminate_unnecessary_stmts (bool aggressive)
  remove_dead_stmt (&gsi, bb, to_remove_edges);
  continue;
}
- else if (is_gimple_call (stmt))
+ else if (gcall *call = dyn_cast  (stmt))
{
- tree name = gimple_call_lhs (stmt);
+ tree name = gimple_call_lhs (call);
 
- notice_special_calls (as_a  (stmt));
+ bool saved_calls_setjmp = cfun->calls_setjmp;
+ notice_special_calls (call);
+ /* Ignore ECF_RETURNS_TWICE from calls not marked as
+control altering.  */
+ if (!saved_calls_setjmp
+ && cfun->calls_setjmp
+ && !gimple_call_ctrl_altering_p (call))
+   cfun->calls_setjmp = false;
 
  /* When LHS of var = call (); is dead, simplify it into
 call (); saving one operand.  */
+ tree fndecl;
  

Re: [PATCH] tree-optimization/108691 - indirect calls to setjmp

2023-02-13 Thread Jakub Jelinek via Gcc-patches
On Mon, Feb 13, 2023 at 12:00:56PM +0100, Richard Biener wrote:
> DCE now chokes on indirect setjmp calls becoming direct because
> that exposes them too late to be subject to abnormal edge creation.
> The following patch honors gimple_call_ctrl_altering for those and
> _not_ treat formerly indirect calls to setjmp as calls to setjmp.
> 
> Unfortunately there's no way to have an indirect call to setjmp
> properly annotated (the returns_twice attribute is ignored on types).
> 
> RTL expansion late discovers returns-twice for the purpose of
> adding REG_SETJMP notes and also sets ->calls_setjmp
> (instead of asserting it is set).  There's no good way to
> transfer proper knowledge around here so I'm using ->calls_setjmp
> as a flag to indicate whether gimple_call_ctrl_altering was set.
> 
> Comments on what's the most sensible thing to do here?  Supporting
> returns_twice on indirect calls wouldn't be difficult, so we're
> talking about how to handle this kind of "legacy" situation?

One thing is supporting returns_twice on function types, but another one
is that initially none of the calls will be marked that way and even later,
it is up to the user if they mark it or not.

Could we e.g. prevent turning such indirect calls into direct calls?

Anyway, notice_special_calls is called in various spots, not just DCE,
wouldn't it be better to simply not set calls_setjmp flag in there if
the current function already has cfg and the call isn't ctrl altering?

Jakub



Re: [Patch] builtin-declaration-mismatch-7: fix LLP64 targets

2023-02-13 Thread Jonathan Yong via Gcc-patches

On 2/13/23 06:28, Jeff Law wrote:



On 2/11/23 01:33, Jonathan Yong via Gcc-patches wrote:

Attached patch OK?

0001-builtin-declaration-mismatch-7-fix-LLP64-targets.patch

 From 3ba6812366e837a87c15360e83d1f72ffdd29684 Mon Sep 17 00:00:00 2001
From: Jonathan Yong<10wa...@gmail.com>
Date: Sat, 11 Feb 2023 08:30:55 +
Subject: [PATCH] builtin-declaration-mismatch-7: fix LLP64 targets

gcc/testsuite/ChangeLog:

* gcc.dg/Wbuiltin-declaration-mismatch-7.c: Use (long )*
regex pattern to allow long long instead of just long.

OK.  Go ahead and install.

Thanks,
Jeff


Thanks for reviewing, pushed to master branch.



Re: [Patch] pr65658.c: fix excess warnings on LLP64 targets

2023-02-13 Thread Jonathan Yong via Gcc-patches

On 2/13/23 07:04, Jeff Law wrote:



On 1/28/23 11:16, Jonathan Yong via Gcc-patches wrote:

Patch OK?

gcc/testsuite/ChangeLog:

 * gcc.dg/pr65658.c: fix LLP64 test.

0001-pr65658.c-fix-excess-warnings-on-LLP64-targets.patch

 From bd2634e2795723f290dcf4bbb06e70fb6ca6af51 Mon Sep 17 00:00:00 2001
From: Jonathan Yong<10wa...@gmail.com>
Date: Sat, 28 Jan 2023 18:12:50 +
Subject: [PATCH] pr65658.c: fix excess warnings on LLP64 targets

gcc/testsuite/ChangeLog:

* gcc.dg/pr65658.c: fix LLP64 test.

OK.
jeff


Thanks pushed to master branch 391f29e60a95335c925040641e99ddef57edac6d.



Re: realpath() patch to fix symlinks resolution for win32

2023-02-13 Thread Martin Liška

On 2/11/23 22:14, Gerald Pfeifer wrote:

On Sat, 11 Feb 2023, NightStrike wrote:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108350

I would have expected the PR to have been automatically updated based on
the commit email. Any idea why that didn't happen? Not to change the state
to closed, but to add the commit information as a reply.


I assume the fact that the PR reference was spelt as
   PR/108350
without a slash, not a blank, after "PR" may be responsible for the
missing Bugzilla comment.

The documented format - per gcc.gnu.org/codingconventions.html - is
   PR component/12345


It's likely what happens.



Martin? (By the way, where does one best have a look at those hooks?
.git/hooks in the main repository isn't it, it appears?)


I know that server hooks (gcc.gnu.org:/home/gccadmin/hooks-bin) do contain:
email-to-bugzilla-filtered:BUGZILLA_CMD = ("/sourceware/infra/bin/email-to-bugzilla", 
"-G", "gcc")

thus I guess it's /sourceware/infra/bin/email-to-bugzilla what makes the 
filtering of commits
to bugzilla.

Martin



Gerald




Re: [Ping^3] gomp: Various fixes for SVE types [PR101018]

2023-02-13 Thread Jakub Jelinek via Gcc-patches
On Mon, Feb 13, 2023 at 10:45:05AM +, Richard Sandiford wrote:
> Ping^3 [https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606741.html]
> 
> 
> 
> Various parts of the omp code checked whether the size of a decl
> was an INTEGER_CST in order to determine whether the decl was
> variable-sized or not.  If it was variable-sized, it was expected
> to have a DECL_VALUE_EXPR replacement, as for VLAs.
> 
> This patch uses poly_int_tree_p instead, so that variable-length
> SVE vectors are treated like constant-length vectors.  This means
> that some structures become poly_int-sized, with some fields at
> poly_int offsets, but we already have code to handle that.
> 
> An alternative would have been to handle the data via indirection
> instead.  However, that's likely to be more complicated, and it
> would contradict is_variable_sized, which already uses a check
> for TREE_CONSTANT rather than INTEGER_CST.
> 
> gimple_add_tmp_var should probably not add a safelen of 1
> for SVE vectors, but that's really a separate thing and might
> be hard to test.

Generally, OpenMP has lots of clauses on lots of different constructs
and for SVE we need to decide what to do with them, and it would be better
to cover them all in testsuite coverage, so something orders of magnitude
larger than this patch provides and then there is OpenACC too.
Can one add these non-constant poly_int sized types as members of
aggregates?  If yes, they need to be tested in addition to the plain
vectors.

>From data sharing clauses and others:
1) shared (implicit or explicit) - I'd say the non-constant poly_int sized
   stuff should be shared by passing around an address, rather than by
   copying it around by value which can be large (so say similar to
   aggregates rather than scalars), though feel free to argue otherwise
2) for the offloading stuff, I'd say we want to error or sorry at
   gimplification time, both for explicit or implicit map clause on
   target/target data/target {enter,exit} data and on explicit/implicit
   private and firstprivate clauses on target; while it could work fine
   with host fallback, generally the intention is to offload to a different
   device and neither PTX nor AMDGCN have anything similar to SVE and even
   for say potential ssh based offloading to aarch64 there is the
   possibility that the two devices don't agree on the vector sizes
3) private clause just creates another private variable of the same type,
   except for target I think it should generally work fine, but should be
   nevertheless test covered, say on parallel, task and some worksharing
   construct (e.g. for) and simd and also with allocate clause specifying
   allocators
4) firstprivate clause is similar to private, but we need to copy the
   initial value to it; e.g. in case of parallel, host teams or task
   such values are copied through compiler generated artificial struct
   that contains all the stuff needed to be propagated around (and except
   for task/taskloop then propagated back).  For the SVE stuff I think it
   might be nice to put the non-constant sized stuff late in the artificial
   structure so that one can access the constant sized stuff using constant
   offsets
5) lastprivate similar to private with copying value back from one
   particular thread/lane (e.g. should be tested also on simd)
6) reduction/in_reduction/task_reduction - reductions are partly
   privatization clauses, for SVE only user defined reductions apply
   (declare reduction), but need to be tested in lots of constructs,
   parallel, for, simd, taskloop, task reductions and inscan reductions
   (explicit/implicit)
7) copyin - can the SVE vars be threadprivate (thread_local in C++ etc.)?
   If not, at least needs testing
8) linear clause hopefully will reject SVE stuff, but needs testing
9) affinity clause - we just parse/ignore, but still needs testing
10) aligned clause on simd - again, needs testing
11) allocate clause - as I said, for most of the data sharing clauses
coverage needs to be without and with corresponding allocate clauses
12) depend clause - this one again needs testing, it just under the hood
takes address of the passed in variable, so probably should just work
out of the box
13) nontemporal clause on simd - probably works fine, but needs testing
14) uniform clause - this is on declare simd, one needs to test declare
simd with the various cases (vector without clause, uniform, linear)
15) enter/link clauses - as I said in 2), these IMHO should be rejected
16) detach clause - the clause requires specific type, so I think should be
ok
17) use_device_ptr/use_device_addr/is_device_ptr - offloading stuff, so
like 2)
18) inclusive/exclusive clauses - see 6) above for inscan reductions
19) map/has_device_addr - see 2)
20) doacross - requires specific format with scalars, so just check it is
properly rejected
Rest of clauses don't have arguments or have integral/logical expression
operands, so those should 

Re: [PATCH] tree-optimization/108691 - indirect calls to setjmp

2023-02-13 Thread Richard Biener via Gcc-patches
On Mon, 13 Feb 2023, Jakub Jelinek wrote:

> On Mon, Feb 13, 2023 at 12:00:56PM +0100, Richard Biener wrote:
> > DCE now chokes on indirect setjmp calls becoming direct because
> > that exposes them too late to be subject to abnormal edge creation.
> > The following patch honors gimple_call_ctrl_altering for those and
> > _not_ treat formerly indirect calls to setjmp as calls to setjmp.
> > 
> > Unfortunately there's no way to have an indirect call to setjmp
> > properly annotated (the returns_twice attribute is ignored on types).
> > 
> > RTL expansion late discovers returns-twice for the purpose of
> > adding REG_SETJMP notes and also sets ->calls_setjmp
> > (instead of asserting it is set).  There's no good way to
> > transfer proper knowledge around here so I'm using ->calls_setjmp
> > as a flag to indicate whether gimple_call_ctrl_altering was set.
> > 
> > Comments on what's the most sensible thing to do here?  Supporting
> > returns_twice on indirect calls wouldn't be difficult, so we're
> > talking about how to handle this kind of "legacy" situation?
> 
> One thing is supporting returns_twice on function types, but another one
> is that initially none of the calls will be marked that way and even later,
> it is up to the user if they mark it or not.

Yep.

> Could we e.g. prevent turning such indirect calls into direct calls?

We do exactly have gimple_call_fntype and gimple_call_ctrl_altering_p
to not require special-casing indirect to direct call promotion here.

> Anyway, notice_special_calls is called in various spots, not just DCE,
> wouldn't it be better to simply not set calls_setjmp flag in there if
> the current function already has cfg and the call isn't ctrl altering?

I thought about changing gimple_call_flags instead, filtering out
ECF_RETURNS_TWICE.  I just didn't make up my mind on what
property to key at (and to require 'cfun' to be set to query it).
But sure, changing notice_special_calls also works - the only
other relevant caller is the inliner I think, and that could be
replaced by caller |= callee of the two flags tracked instead of
re-scanning each inlined stmt.

Would you be happy with changing notice_special_calls, dropping the
DCE hunk but keeping the cfgexpand/calls.cc hunks?

Thanks,
Richard.


Re: [PATCH 2/2] vect: Make partial trapping ops use predication [PR96373]

2023-02-13 Thread Kewen.Lin via Gcc-patches
Hi Richard,

on 2023/1/27 19:08, Richard Sandiford via Gcc-patches wrote:
> PR96373 points out that a predicated SVE loop currently converts
> trapping unconditional ops into unpredicated vector ops.  Doing
> the operation on inactive lanes can then raise an exception.
> 
> As discussed in the PR trail, we aren't 100% consistent about
> whether we preserve traps or not.  But the direction of travel
> is clearly to improve that rather than live with it.  This patch
> tries to do that for the SVE case.
> 
> Doing this regresses gcc.target/aarch64/sve/fabd_1.c.  I've added
> -fno-trapping-math for now and filed PR108571 to track it.
> A similar problem applies to fsubr_1.d.
> 
> I think this is likely to regress Power 10, since conditional
> operations are only available for masked loops.  I think we'll
> need to add -fno-trapping-math to any affected testcases,
> but I don't have a Power 10 system to test on.  Kewen, would you
> mind giving this a spin and seeing how bad the fallout is?
> 

Sorry for the late reply, I'm just back from vacation.

Thank you for fixing this and caring about Power10!

I tested your proposed patch on one Power10 machine (ppc64le),
it's bootstrapped but some test failures got exposed as below.

< FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times 
mlxvlM 14
< FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times 
mstxvlM 7
< FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times 
mlxvlM 20
< FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times 
mstxvlM 10
< FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times 
mlxvlM 14
< FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times 
mstxvlM 7
< FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times 
mlxvlM 70
< FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times 
mlxvx?M 120
< FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times 
mstxvlM 70
< FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times 
mstxvx?M 70
< FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times 
mlxvlM 21
< FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times 
mstxvlM 21
< FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times 
mstxvx?M 21
< FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times 
mlxvlM 10
< FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times 
mlxvx?M 42
< FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times 
mstxvlM 10
< FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times 
mlxvlM 16
< FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times 
mstxvlM 7
< FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not 
mlxvxM
< FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not 
mstxvxM
< FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times 
mlxvlM 20
< FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times 
mstxvlM 10
< FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not 
mlxvxM
< FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not 
mstxvxM
< FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times 
mlxvlM 20
< FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times 
mstxvlM 10
< FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times 
mlxvlM 14
< FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times 
mstxvlM 7
< FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not 
mlxvxM
< FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not 
mstxvM
< FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not 
mstxvxM
< FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times 
mlxvlM 70
< FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times 
mstxvlM 70
< FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not 
mlxvxM
< FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not 
mstxvM
< FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not 
mstxvxM
< FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times 
mlxvlM 21
< FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times 
mstxvlM 21
< FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times 
mlxvlM 10
< FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times 
mstxvlM 10
< FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times 
mstxvx?M 6
< FAIL: gcc.target/powerpc/p9-vec-length-full-8.c scan-assembler-times 
mlxvlM 30
< FAIL: 

Re: [PATCH] tree-optimization/108691 - indirect calls to setjmp

2023-02-13 Thread Jakub Jelinek via Gcc-patches
On Mon, Feb 13, 2023 at 12:41:48PM +, Richard Biener wrote:
> > Could we e.g. prevent turning such indirect calls into direct calls?
> 
> We do exactly have gimple_call_fntype and gimple_call_ctrl_altering_p
> to not require special-casing indirect to direct call promotion here.

Ah, so if we make returns_twice apply to function types, then we could
just compare if gimple_call_fntype has also returns_twice and if not, not
consider it actually returns_twice.

> > Anyway, notice_special_calls is called in various spots, not just DCE,
> > wouldn't it be better to simply not set calls_setjmp flag in there if
> > the current function already has cfg and the call isn't ctrl altering?
> 
> I thought about changing gimple_call_flags instead, filtering out
> ECF_RETURNS_TWICE.  I just didn't make up my mind on what
> property to key at (and to require 'cfun' to be set to query it).
> But sure, changing notice_special_calls also works - the only
> other relevant caller is the inliner I think, and that could be
> replaced by caller |= callee of the two flags tracked instead of
> re-scanning each inlined stmt.
> 
> Would you be happy with changing notice_special_calls, dropping the
> DCE hunk but keeping the cfgexpand/calls.cc hunks?

I think so.

Jakub



Re: [PATCH] tree-optimization/108691 - indirect calls to setjmp

2023-02-13 Thread Richard Biener via Gcc-patches
On Mon, 13 Feb 2023, Jakub Jelinek wrote:

> On Mon, Feb 13, 2023 at 12:41:48PM +, Richard Biener wrote:
> > > Could we e.g. prevent turning such indirect calls into direct calls?
> > 
> > We do exactly have gimple_call_fntype and gimple_call_ctrl_altering_p
> > to not require special-casing indirect to direct call promotion here.
> 
> Ah, so if we make returns_twice apply to function types, then we could
> just compare if gimple_call_fntype has also returns_twice and if not, not
> consider it actually returns_twice.
> 
> > > Anyway, notice_special_calls is called in various spots, not just DCE,
> > > wouldn't it be better to simply not set calls_setjmp flag in there if
> > > the current function already has cfg and the call isn't ctrl altering?
> > 
> > I thought about changing gimple_call_flags instead, filtering out
> > ECF_RETURNS_TWICE.  I just didn't make up my mind on what
> > property to key at (and to require 'cfun' to be set to query it).
> > But sure, changing notice_special_calls also works - the only
> > other relevant caller is the inliner I think, and that could be
> > replaced by caller |= callee of the two flags tracked instead of
> > re-scanning each inlined stmt.
> > 
> > Would you be happy with changing notice_special_calls, dropping the
> > DCE hunk but keeping the cfgexpand/calls.cc hunks?
> 
> I think so.

I'm testing the following and am queueing the second patch below
for next stage1.

Richard.


>From d5e62f27489c1e7a8696a85e7bc98cc0a26564d2 Mon Sep 17 00:00:00 2001
From: Richard Biener 
Date: Mon, 13 Feb 2023 10:41:51 +0100
Subject: [PATCH 1/2] tree-optimization/108691 - indirect calls to setjmp
To: gcc-patches@gcc.gnu.org

DCE now chokes on indirect setjmp calls becoming direct because
that exposes them too late to be subject to abnormal edge creation.
The following patch honors gimple_call_ctrl_altering for those and
_not_ treat formerly indirect calls to setjmp as calls to setjmp
in notice_special_calls.

Unfortunately there's no way to have an indirect call to setjmp
properly annotated (the returns_twice attribute is ignored on types).

RTL expansion late discovers returns-twice for the purpose of
adding REG_SETJMP notes and also sets ->calls_setjmp
(instead of asserting it is set).  There's no good way to
transfer proper knowledge around here so I'm using ->calls_setjmp
as a flag to indicate whether gimple_call_ctrl_altering_p was set.

PR tree-optimization/108691
* tree-cfg.cc (notice_special_calls): When the CFG is built
honor gimple_call_ctrl_altering_p.
* cfgexpand.cc (expand_call_stmt): Clear cfun->calls_setjmp
temporarily if the call is not control-altering.
* calls.cc (emit_call_1): Do not add REG_SETJMP if
cfun->calls_setjmp is not set.  Do not alter cfun->calls_setjmp.

* gcc.dg/pr108691.c: New testcase.
---
 gcc/calls.cc| 10 +-
 gcc/cfgexpand.cc|  7 +++
 gcc/testsuite/gcc.dg/pr108691.c |  9 +
 gcc/tree-cfg.cc |  4 +++-
 4 files changed, 24 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr108691.c

diff --git a/gcc/calls.cc b/gcc/calls.cc
index 4d7f6c3d291..0242d52cfb3 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -506,11 +506,11 @@ emit_call_1 (rtx funexp, tree fntree ATTRIBUTE_UNUSED, 
tree fndecl ATTRIBUTE_UNU
   if (ecf_flags & ECF_NORETURN)
 add_reg_note (call_insn, REG_NORETURN, const0_rtx);
 
-  if (ecf_flags & ECF_RETURNS_TWICE)
-{
-  add_reg_note (call_insn, REG_SETJMP, const0_rtx);
-  cfun->calls_setjmp = 1;
-}
+  if (ecf_flags & ECF_RETURNS_TWICE
+  /* We rely on GIMPLE setting this flag and here use it to
+catch formerly indirect and not control-altering calls.  */
+  && cfun->calls_setjmp)
+add_reg_note (call_insn, REG_SETJMP, const0_rtx);
 
   SIBLING_CALL_P (call_insn) = ((ecf_flags & ECF_SIBCALL) != 0);
 
diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index 25b1558dcb9..ab143a6d2d3 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -2808,6 +2808,11 @@ expand_call_stmt (gcall *stmt)
   /* Must come after copying location.  */
   copy_warning (exp, stmt);
 
+  /* For calls that do not alter control flow avoid REG_SETJMP notes.  */
+  bool saved_calls_setjmp = cfun->calls_setjmp;
+  if (!gimple_call_ctrl_altering_p (stmt))
+cfun->calls_setjmp = false;
+
   /* Ensure RTL is created for debug args.  */
   if (decl && DECL_HAS_DEBUG_ARGS_P (decl))
 {
@@ -2846,6 +2851,8 @@ expand_call_stmt (gcall *stmt)
 }
 
   mark_transaction_restart_calls (stmt);
+
+  cfun->calls_setjmp = saved_calls_setjmp;
 }
 
 
diff --git a/gcc/testsuite/gcc.dg/pr108691.c b/gcc/testsuite/gcc.dg/pr108691.c
new file mode 100644
index 000..e412df10f22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr108691.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern int __attribute__((returns_twice)) setjmp(void*);
+
+void bbb(void) {
+  int (*

Re: [PATCH] IBM zSystems: Do not propagate scheduler state across basic blocks [PR108102]

2023-02-13 Thread Andreas Krebbel via Gcc-patches
On 2/11/23 16:59, Stefan Schulze Frielinghaus wrote:
> So far we propagate scheduler state across basic blocks within EBBs and
> reset the state otherwise.  In certain circumstances the entry block of
> an EBB might be empty, i.e., no_real_insns_p is true.  In those cases
> scheduler state is not reset and subsequently wrong state is propagated
> to following blocks of the same EBB.
> 
> Since the performance benefit of tracking state across basic blocks is
> questionable on modern hardware, simply reset the state for each basic
> block.
> 
> Fix also resetting f{p,x}d_longrunning.
> 
> Bootstrapped and regtested on IBM zSystems.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_bb_fallthru_entry_likely): Remove.
>   (struct s390_sched_state): Initialise to zero.
>   (s390_sched_variable_issue): For better debuggability also emit
>   the current side.
>   (s390_sched_init): Unconditionally reset scheduler state.

Ok. Thanks!

Andreas




Re: [PATCH 2/2] vect: Make partial trapping ops use predication [PR96373]

2023-02-13 Thread Richard Sandiford via Gcc-patches
"Kewen.Lin"  writes:
> Hi Richard,
>
> on 2023/1/27 19:08, Richard Sandiford via Gcc-patches wrote:
>> PR96373 points out that a predicated SVE loop currently converts
>> trapping unconditional ops into unpredicated vector ops.  Doing
>> the operation on inactive lanes can then raise an exception.
>> 
>> As discussed in the PR trail, we aren't 100% consistent about
>> whether we preserve traps or not.  But the direction of travel
>> is clearly to improve that rather than live with it.  This patch
>> tries to do that for the SVE case.
>> 
>> Doing this regresses gcc.target/aarch64/sve/fabd_1.c.  I've added
>> -fno-trapping-math for now and filed PR108571 to track it.
>> A similar problem applies to fsubr_1.d.
>> 
>> I think this is likely to regress Power 10, since conditional
>> operations are only available for masked loops.  I think we'll
>> need to add -fno-trapping-math to any affected testcases,
>> but I don't have a Power 10 system to test on.  Kewen, would you
>> mind giving this a spin and seeing how bad the fallout is?
>> 
>
> Sorry for the late reply, I'm just back from vacation.
>
> Thank you for fixing this and caring about Power10!
>
> I tested your proposed patch on one Power10 machine (ppc64le),
> it's bootstrapped but some test failures got exposed as below.
>
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times 
> mlxvlM 14
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times 
> mstxvlM 7
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times 
> mlxvlM 20
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times 
> mstxvlM 10
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times 
> mlxvlM 14
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times 
> mstxvlM 7
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times 
> mlxvlM 70
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times 
> mlxvx?M 120
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times 
> mstxvlM 70
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times 
> mstxvx?M 70
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times 
> mlxvlM 21
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times 
> mstxvlM 21
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times 
> mstxvx?M 21
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times 
> mlxvlM 10
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times 
> mlxvx?M 42
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times 
> mstxvlM 10
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times 
> mlxvlM 16
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times 
> mstxvlM 7
> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not 
> mlxvxM
> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not 
> mstxvxM
> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times 
> mlxvlM 20
> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times 
> mstxvlM 10
> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not 
> mlxvxM
> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not 
> mstxvxM
> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times 
> mlxvlM 20
> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times 
> mstxvlM 10
> < FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times 
> mlxvlM 14
> < FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times 
> mstxvlM 7
> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not 
> mlxvxM
> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not 
> mstxvM
> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not 
> mstxvxM
> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times 
> mlxvlM 70
> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times 
> mstxvlM 70
> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not 
> mlxvxM
> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not 
> mstxvM
> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not 
> mstxvxM
> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times 
> mlxvlM 21
> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times 
> mstxvlM 21
> < FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times 
> mlxvlM 10
> < FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-

-foffload-memory=pinned (was: [PATCH 1/5] openmp: Add -foffload-memory)

2023-02-13 Thread Thomas Schwinge
Hi!

On 2022-03-08T11:30:55+, Hafiz Abid Qadeer  wrote:
> From: Andrew Stubbs 
>
> Add a new option.  It will be used in follow-up patches.

> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi

> +@option{-foffload-memory=pinned} forces all host memory to be pinned (this
> +mode may require the user to increase the ulimit setting for locked memory).

So, this is currently implemented via 'mlockall', which, as discussed,
(a) has issues ('ulimit -l'), and (b) doesn't actually achieve what it
meant to achieve (because it doesn't register the page-locked memory with
the GPU driver).

So one idea was to re-purpose the unified shared memory
'gcc/omp-low.cc:pass_usm_transform' (compiler pass that "changes calls to
malloc/free/calloc/realloc and operator new to memory allocation
functions in libgomp with allocator=ompx_unified_shared_mem_alloc"),
.
(I have not yet looked into that in detail.)

Here's now a different idea.  As '-foffload-memory=pinned', per the name
of the option, concerns itself with memory used in offloading but not
host execution generally, why are we actually attempting to "[force] all
host memory to be pinned" -- why not just the memory that's being used
with offloading?  That is, if '-foffload-memory=pinned' is set, register
as page-locked with the GPU driver all memory that appears in OMP
offloading data regions, such as OpenMP 'target' 'map' clauses etc.  That
way, this is directed at the offloading data transfers, as itended, but
at the same time we don't "waste" page-locked memory for generic host
memory allocations.  What do you think -- you, who've spent a lot more
time on this topic than I have, so it's likely possible that I fail to
realize some "details"?


Grüße
 Thomas
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: [PATCH] tree-optimization/108724 - vectorized code getting piecewise expanded

2023-02-13 Thread Jeff Law via Gcc-patches




On 2/10/23 04:02, Richard Biener via Gcc-patches wrote:

This fixes an oversight to when removing the hard limits on using
generic vectors for the vectorizer to enable both SLP and BB
vectorization to use those.  The vectorizer relies on vector lowering
to expand plus, minus and negate to bit operations but vector
lowering has a hard limit on the minimum number of elements per
work item.  Vectorizer costs for the testcase at hand work out
to vectorize a loop with just two work items per vector and that
causes element wise expansion and spilling.

The fix for now is to re-instantiate the hard limit, matching what
vector lowering does.  For the future the way to go is to emit the
lowered sequence directly from the vectorizer instead.

Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?

Thanks,
Richard.

PR tree-optimization/108724
* tree-vect-stmts.cc (vectorizable_operation): Avoid
using word_mode vectors when vector lowering will
decompose them to elementwise operations.

* gcc.target/i386/pr108724.c: New testcase.
OK.  Though can't this be a problem with logicals too?  Or is there 
something special about +- going on here?



jeff


Re: [PATCH] tree-optimization/106722 - fix CD-DCE edge marking

2023-02-13 Thread Jeff Law via Gcc-patches




On 2/10/23 03:12, Richard Biener via Gcc-patches wrote:

The following fixes a latent issue when we mark control edges but
end up with marking a block with no stmts necessary.  In this case
we fail to mark dependent control edges of that block.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Does this look OK?

Thanks,
Richard.

PR tree-optimization/106722
* tree-ssa-dce.cc (mark_last_stmt_necessary): Return
whether we marked a stmt.
(mark_control_dependent_edges_necessary): When
mark_last_stmt_necessary didn't mark any stmt make sure
to mark its control dependent edges.
(propagate_necessity): Likewise.

* gcc.dg/torture/pr108737.c: New testcase.



diff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc
index b2fe9f4f55e..21b3294fc86 100644
--- a/gcc/tree-ssa-dce.cc
+++ b/gcc/tree-ssa-dce.cc
@@ -327,17 +327,23 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool 
aggressive)
  
  /* Mark the last statement of BB as necessary.  */
  
-static void

+static bool

Function comment probably needs an update for the new return value.

OK with that fix.

jeff


Re: [PATCH] tree-optimization/108724 - vectorized code getting piecewise expanded

2023-02-13 Thread Richard Biener via Gcc-patches
On Mon, 13 Feb 2023, Jeff Law wrote:

> 
> 
> On 2/10/23 04:02, Richard Biener via Gcc-patches wrote:
> > This fixes an oversight to when removing the hard limits on using
> > generic vectors for the vectorizer to enable both SLP and BB
> > vectorization to use those.  The vectorizer relies on vector lowering
> > to expand plus, minus and negate to bit operations but vector
> > lowering has a hard limit on the minimum number of elements per
> > work item.  Vectorizer costs for the testcase at hand work out
> > to vectorize a loop with just two work items per vector and that
> > causes element wise expansion and spilling.
> > 
> > The fix for now is to re-instantiate the hard limit, matching what
> > vector lowering does.  For the future the way to go is to emit the
> > lowered sequence directly from the vectorizer instead.
> > 
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?
> > 
> > Thanks,
> > Richard.
> > 
> >  PR tree-optimization/108724
> >  * tree-vect-stmts.cc (vectorizable_operation): Avoid
> >  using word_mode vectors when vector lowering will
> >  decompose them to elementwise operations.
> > 
> >  * gcc.target/i386/pr108724.c: New testcase.
> OK.  Though can't this be a problem with logicals too?  Or is there something
> special about +- going on here?

Logical ops do not cross lanes even when using scalar operations on GPRs.
For +- you have to compute the MSB separately to avoid spilling over to
the next vector lane.

Richard.


[PATCH] tree-optimization/28614 - high FRE time for gcc.c-torture/compile/20001226-1.c

2023-02-13 Thread Richard Biener via Gcc-patches
I noticed that for gcc.c-torture/compile/20001226-1.c even -O1 has
around 50% of the compile-time accounted to FRE.  That's because
we have blocks with a high incoming edge count and
can_track_predicate_on_edge visits all of them even though it could
stop after the second.  The function is also called repeatedly for
the same edge.  The following fixes this and reduces the FRE time
to 1% on the testcase.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/28614
* tree-ssa-sccvn.cc (can_track_predicate_on_edge): Avoid
walking all edges in most cases.
(vn_nary_op_insert_pieces_predicated): Avoid repeated
calls to can_track_predicate_on_edge unless checking is
enabled.
(process_bb): Instead call it once here for each edge
we register possibly multiple predicates on.
---
 gcc/tree-ssa-sccvn.cc | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 028bedbc9a0..e5bb278196a 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -4527,7 +4527,7 @@ vn_nary_op_insert_pieces (unsigned int length, enum 
tree_code code,
 static bool
 can_track_predicate_on_edge (edge pred_e)
 {
-  /* ???  As we are currently recording a basic-block index in
+  /* ???  As we are currently recording the destination basic-block index in
  vn_pval.valid_dominated_by_p and using dominance for the
  validity check we cannot track predicates on all edges.  */
   if (single_pred_p (pred_e->dest))
@@ -4537,14 +4537,14 @@ can_track_predicate_on_edge (edge pred_e)
 return false;
   /* When there's more than one predecessor we cannot track
  predicate validity based on the destination block.  The
- exception is when all other incoming edges are backedges.  */
+ exception is when all other incoming edges sources are
+ dominated by the destination block.  */
   edge_iterator ei;
   edge e;
-  int cnt = 0;
   FOR_EACH_EDGE (e, ei, pred_e->dest->preds)
-if (! dominated_by_p (CDI_DOMINATORS, e->src, e->dest))
-  cnt++;
-  return cnt == 1;
+if (e != pred_e && ! dominated_by_p (CDI_DOMINATORS, e->src, e->dest))
+  return false;
+  return true;
 }
 
 static vn_nary_op_t
@@ -4553,8 +4553,8 @@ vn_nary_op_insert_pieces_predicated (unsigned int length, 
enum tree_code code,
 tree result, unsigned int value_id,
 edge pred_e)
 {
-  if (!can_track_predicate_on_edge (pred_e))
-return NULL;
+  gcc_assert (can_track_predicate_on_edge (pred_e));
+
   if (dump_file && (dump_flags & TDF_DETAILS)
   /* ???  Fix dumping, but currently we only get comparisons.  */
   && TREE_CODE_CLASS (code) == tcc_comparison)
@@ -7933,11 +7933,11 @@ process_bb (rpo_elim &avail, basic_block bb,
tree ops[2];
ops[0] = lhs;
ops[1] = rhs;
-   if (do_region
-   && bitmap_bit_p (exit_bbs, true_e->dest->index))
+   if ((do_region && bitmap_bit_p (exit_bbs, true_e->dest->index))
+   || !can_track_predicate_on_edge (true_e))
  true_e = NULL;
-   if (do_region
-   && bitmap_bit_p (exit_bbs, false_e->dest->index))
+   if ((do_region && bitmap_bit_p (exit_bbs, false_e->dest->index))
+   || !can_track_predicate_on_edge (false_e))
  false_e = NULL;
if (true_e)
  vn_nary_op_insert_pieces_predicated
-- 
2.35.3


Re: -foffload-memory=pinned (was: [PATCH 1/5] openmp: Add -foffload-memory)

2023-02-13 Thread Andrew Stubbs

On 13/02/2023 14:38, Thomas Schwinge wrote:

Hi!

On 2022-03-08T11:30:55+, Hafiz Abid Qadeer  wrote:

From: Andrew Stubbs 

Add a new option.  It will be used in follow-up patches.



--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi



+@option{-foffload-memory=pinned} forces all host memory to be pinned (this
+mode may require the user to increase the ulimit setting for locked memory).


So, this is currently implemented via 'mlockall', which, as discussed,
(a) has issues ('ulimit -l'), and (b) doesn't actually achieve what it
meant to achieve (because it doesn't register the page-locked memory with
the GPU driver).

So one idea was to re-purpose the unified shared memory
'gcc/omp-low.cc:pass_usm_transform' (compiler pass that "changes calls to
malloc/free/calloc/realloc and operator new to memory allocation
functions in libgomp with allocator=ompx_unified_shared_mem_alloc"),
>
 (I have not yet looked into that in detail.)

Here's now a different idea.  As '-foffload-memory=pinned', per the name
of the option, concerns itself with memory used in offloading but not
host execution generally, why are we actually attempting to "[force] all
host memory to be pinned" -- why not just the memory that's being used
with offloading?  That is, if '-foffload-memory=pinned' is set, register
as page-locked with the GPU driver all memory that appears in OMP
offloading data regions, such as OpenMP 'target' 'map' clauses etc.  That
way, this is directed at the offloading data transfers, as itended, but
at the same time we don't "waste" page-locked memory for generic host
memory allocations.  What do you think -- you, who've spent a lot more
time on this topic than I have, so it's likely possible that I fail to
realize some "details"?


The main reason it is the way it is is because in general it's not 
possible to know what memory is going to be offloaded at the time it is 
allocated (and stack/static memory is never allocated that way).


If there's a way to pin it after the fact then maybe that's not a 
terrible idea? The downside is that the memory might already have been 
paged out at that point, and we'd have to track what we'd previously 
pinned, or else re-pin it every time we launch a kernel. We'd also have 
no way to unpin previously pinned memory (not that that's relevant to 
the "lock all" case).


My original plan was to use omp_alloc for both the standard OpenMP 
support and the -foffload-memory option (to get the benefit of pinning 
without modifying any source), but then I decided that the mlockall 
option was much less invasive. This is still the best way to implement 
target-independent pinning, when there's no driver registration option.


Andrew


Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread 盼 李 via Gcc-patches
Thanks all for your help and comments.

Let me share more information about this patch. Especially for the 
tree-ssa-sccvn.cc part.

Assume we have the blow test code for this issue.

void
test_1(int8_t * restrict in, int8_t * restrict out) {
vbool8_t v2 = *(vbool8_t*)in;
vbool16_t v5 = *(vbool16_t*)in;

*(vbool8_t*)(out + 100) = v2;
*(vbool16_t*)(out + 200) = v5;
}

Without the tree-ssa-sccvn.cc file code change.

void test_1 (int8_t * restrict in, int8_t * restrict out)
{
  vbool8_t v2;
  __rvv_bool16_t _1;

   [local count: 1073741824]:
  v2_4 = MEM[(vbool8_t *)in_3(D)];
  _1 = VIEW_CONVERT_EXPR<__rvv_bool16_t>(v2_4);  // insert during 039.fre1
  MEM[(vbool8_t *)out_5(D) + 100B] = v2_4;
  MEM[(vbool16_t *)out_5(D) + 200B] = _1;
  return;
}

WIthin the tree-ssa-sccvn.cc file code change.

void test_1 (int8_t * restrict in, int8_t * restrict out)
{
  vbool16_t v5;
  vbool8_t v2;

   [local count: 1073741824]:
  v2_3 = MEM[(vbool8_t *)in_2(D)];
  v5_4 = MEM[(vbool16_t *)in_2(D)];
  MEM[(vbool8_t *)out_5(D) + 100B] = v2_3;
  MEM[(vbool16_t *)out_5(D) + 200B] = v5_4;
  return;
}

Thus, I figured out the a-main.c.039t.fre1 pass results in this CONVERT being 
inserted.
With some debugging, I located the difference that comes from the
expressions_equal_p. If GET_MODE_SIZE(mode) is the same between the VxN8Bimode
and VxN4Bimode, the expressions_equal_p will compare the same address of a 
tree, aka
POLY_INT_CST [8, 8].

visit_reference_op_load
|- vn_reference_lookup
|- vn_reference_lookup_2
 |- find_slot_with_hash
 |- vn_reference_hasher::equal
 |- expressions_equal_p

Meanwhile, we also double-checked that set the different MODE_SIZE of both the
VxN8Bimode and VxN4Bimode (for example, [8, 1] and [4,1] for test only) are able
to resolve this issue. But they should be [1, 1] according to the ISA semantics.

Thus, we try to set other MODE_XXX but it seems not working at all. For example:

VNx4BIMode NUNITS [0x4, 0x4]
VNx8BIMode NUNITS [0x8, 0x8]

Finally, I found the TARGET_MODES_TIEABLE_P and inject it into the function
visit_reference_op_load to resolve this issue.

I will continue to try other ways besides the tree-ssa-sccvn.cc if this may not 
be
the right place for this issue.

Thank again and will keep you posted.

Pan




From: Andrew Stubbs 
Sent: Monday, February 13, 2023 19:00
To: Richard Biener ; juzhe.zh...@rivai.ai 

Cc: Pan Li ; gcc-patches 
; kito.cheng ; 
richard.sandif...@arm.com 
Subject: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

I presume I've been CC'd on this conversation because weird vector
architecture problems have happened to me before. :)

However, I'm not sure I can help much because AMD GCN does not use
BImode vectors at all. This is partly because loading boolean values
into a GCN vector would have 31 padding bits for each lane, but mostly
because the result of comparison instructions is written to a DImode
scalar register, not into a vector.

I did experiment, long ago, with having a V64BImode that could be stored
in scalar registers (tieable with DImode), but there wasn't any great
advantage and it broke VECTOR_MODE_P in most other contexts.

It's possible to store truth values in vectors as integers, and there
are some cases where we do so (SIMD clone mask arguments, for example),
but that's mostly to smooth things over in the middle-end.

The problem with padding bits is something I do see: V64QImode has 24
padding bits for each lane, in register. While there are instructions
that will load and store QImode vectors correctly, without the padding,
the backend still has to handle all the sign-extends, zero-extends, and
truncates explicitly, because the middle-end and expand pass give no
assistance with that for vectors (unlike scalars).

Andrew
On 13/02/2023 08:07, Richard Biener via Gcc-patches wrote:
> On Sat, 11 Feb 2023, juzhe.zh...@rivai.ai wrote:
>
>> Thanks for contributing this.
>> Hi, Richard. Can you help us with this issue?
>> In RVV, we have vbool8_t (VNx8BImode), vbool16_t (VNx4BImode), vbool32_t 
>> (VNx2BImode), vbool64_t (VNx1BImode)
>> Since we are using 1bit-mask which is 1-BOOL occupy 1bit.
>> According to RVV ISA, we adjust these modes as follows:
>>
>> VNx8BImode poly (8,8) NUNTTS (each nunits is 1bit mask)
>> VNx4BImode poly(4,4) NUNTTS (each nunits is 1bit mask)
>> VNx2BImode poly(2,2) NUNTTS (each nunits is 1bit mask)
>> VNx1BImode poly (1,1) NUNTTS (each nunits is 1bit mask)
>
> So how's VNx1BImode laid out for N == 2?  Is that still a single
> byte and two consecutive bits?  I suppose so.
>
> But then GET_MODE_PRECISION (GET_MODE_INNER (..)) should always be 1?
>
> I'm not sure what GET_MODE_PRECISION of the vector mode itself
> should be here, but then I wonder 

Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-13 Thread Richard Biener via Gcc-patches
On Mon, 13 Feb 2023, 盼 李 wrote:

> Thanks all for your help and comments.
> 
> Let me share more information about this patch. Especially for the 
> tree-ssa-sccvn.cc part.
> 
> Assume we have the blow test code for this issue.
> 
> void
> test_1(int8_t * restrict in, int8_t * restrict out) {
> vbool8_t v2 = *(vbool8_t*)in;
> vbool16_t v5 = *(vbool16_t*)in;
> 
> *(vbool8_t*)(out + 100) = v2;
> *(vbool16_t*)(out + 200) = v5;
> }
> 
> Without the tree-ssa-sccvn.cc file code change.
> 
> void test_1 (int8_t * restrict in, int8_t * restrict out)
> {
>   vbool8_t v2;
>   __rvv_bool16_t _1;
> 
>[local count: 1073741824]:
>   v2_4 = MEM[(vbool8_t *)in_3(D)];
>   _1 = VIEW_CONVERT_EXPR<__rvv_bool16_t>(v2_4);  // insert during 039.fre1
>   MEM[(vbool8_t *)out_5(D) + 100B] = v2_4;
>   MEM[(vbool16_t *)out_5(D) + 200B] = _1;
>   return;
> }
> 
> WIthin the tree-ssa-sccvn.cc file code change.
> 
> void test_1 (int8_t * restrict in, int8_t * restrict out)
> {
>   vbool16_t v5;
>   vbool8_t v2;
> 
>[local count: 1073741824]:
>   v2_3 = MEM[(vbool8_t *)in_2(D)];
>   v5_4 = MEM[(vbool16_t *)in_2(D)];
>   MEM[(vbool8_t *)out_5(D) + 100B] = v2_3;
>   MEM[(vbool16_t *)out_5(D) + 200B] = v5_4;
>   return;
> }
> 
> Thus, I figured out the a-main.c.039t.fre1 pass results in this CONVERT being 
> inserted.
> With some debugging, I located the difference that comes from the
> expressions_equal_p. If GET_MODE_SIZE(mode) is the same between the VxN8Bimode
> and VxN4Bimode, the expressions_equal_p will compare the same address of a 
> tree, aka
> POLY_INT_CST [8, 8].
> 
> visit_reference_op_load
> |- vn_reference_lookup
> |- vn_reference_lookup_2
>  |- find_slot_with_hash
>  |- vn_reference_hasher::equal
>  |- expressions_equal_p
> 
> Meanwhile, we also double-checked that set the different MODE_SIZE of both the
> VxN8Bimode and VxN4Bimode (for example, [8, 1] and [4,1] for test only) are 
> able
> to resolve this issue. But they should be [1, 1] according to the ISA 
> semantics.
> 
> Thus, we try to set other MODE_XXX but it seems not working at all. For 
> example:
> 
> VNx4BIMode NUNITS [0x4, 0x4]
> VNx8BIMode NUNITS [0x8, 0x8]
> 
> Finally, I found the TARGET_MODES_TIEABLE_P and inject it into the function
> visit_reference_op_load to resolve this issue.
> 
> I will continue to try other ways besides the tree-ssa-sccvn.cc if this may 
> not be
> the right place for this issue.

There are other places like alias analysis which will be not happy
if the mode size/precision do not match reality.  So no, I don't think
modes_tieable is the correct thing to check here.  Instead the existing
check seems to be to the point but the modes are not set up correctly
to carry the info of one having padding at the end and the other not.

Richard.

> Thank again and will keep you posted.
> 
> Pan
> 
> 
> 
> 
> From: Andrew Stubbs 
> Sent: Monday, February 13, 2023 19:00
> To: Richard Biener ; juzhe.zh...@rivai.ai 
> 
> Cc: Pan Li ; gcc-patches 
> ; kito.cheng ; 
> richard.sandif...@arm.com 
> Subject: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
> 
> I presume I've been CC'd on this conversation because weird vector
> architecture problems have happened to me before. :)
> 
> However, I'm not sure I can help much because AMD GCN does not use
> BImode vectors at all. This is partly because loading boolean values
> into a GCN vector would have 31 padding bits for each lane, but mostly
> because the result of comparison instructions is written to a DImode
> scalar register, not into a vector.
> 
> I did experiment, long ago, with having a V64BImode that could be stored
> in scalar registers (tieable with DImode), but there wasn't any great
> advantage and it broke VECTOR_MODE_P in most other contexts.
> 
> It's possible to store truth values in vectors as integers, and there
> are some cases where we do so (SIMD clone mask arguments, for example),
> but that's mostly to smooth things over in the middle-end.
> 
> The problem with padding bits is something I do see: V64QImode has 24
> padding bits for each lane, in register. While there are instructions
> that will load and store QImode vectors correctly, without the padding,
> the backend still has to handle all the sign-extends, zero-extends, and
> truncates explicitly, because the middle-end and expand pass give no
> assistance with that for vectors (unlike scalars).
> 
> Andrew
> On 13/02/2023 08:07, Richard Biener via Gcc-patches wrote:
> > On Sat, 11 Feb 2023, juzhe.zh...@rivai.ai wrote:
> >
> >> Thanks for contributing this.
> >> Hi, Richard. Can you help us with this issue?
> >> In RVV, we have vbool8_t (VNx8BImode), vbool16_t (VNx4BImode), vbool32_t 
> >> (VNx2BImode), vbool64_t (VNx1BImode)

Re: [PATCH v5 1/5] libcpp: reject codepoints above 0x10FFFF

2023-02-13 Thread Jason Merrill via Gcc-patches

On 1/25/23 13:06, Ben Boeckel wrote:

Unicode does not support such values because they are unrepresentable in
UTF-16.

libcpp/

* charset.cc: Reject encodings of codepoints above 0x10.
UTF-16 does not support such codepoints and therefore all
Unicode rejects such values.


It seems that this causes a bunch of testsuite failures from tests that 
expect this limit to be checked elsewhere with a different diagnostic, 
so I think the easiest thing is to fold this into _cpp_valid_utf8_str 
instead, i.e.:


Make sense?

JasonFrom 296e9d1e16533979d12bd98db2937e396a0796f3 Mon Sep 17 00:00:00 2001
From: Ben Boeckel 
Date: Sat, 10 Dec 2022 17:20:49 -0500
Subject: [PATCH] libcpp: add a function to determine UTF-8 validity of a C
 string
To: gcc-patches@gcc.gnu.org

This simplifies the interface for other UTF-8 validity detections when a
simple "yes" or "no" answer is sufficient.

libcpp/

	* charset.cc: Add `_cpp_valid_utf8_str` which determines whether
	a C string is valid UTF-8 or not.
	* internal.h: Add prototype for `_cpp_valid_utf8_str`.

Signed-off-by: Ben Boeckel 
---
 libcpp/internal.h |  2 ++
 libcpp/charset.cc | 24 
 2 files changed, 26 insertions(+)

diff --git a/libcpp/internal.h b/libcpp/internal.h
index 9724676a8cd..48520901b2d 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -834,6 +834,8 @@ extern bool _cpp_valid_utf8 (cpp_reader *pfile,
 			 struct normalize_state *nst,
 			 cppchar_t *cp);
 
+extern bool _cpp_valid_utf8_str (const char *str);
+
 extern void _cpp_destroy_iconv (cpp_reader *);
 extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
 	  unsigned char *, size_t, size_t,
diff --git a/libcpp/charset.cc b/libcpp/charset.cc
index 3c47d4f868b..42a1b596c06 100644
--- a/libcpp/charset.cc
+++ b/libcpp/charset.cc
@@ -1864,6 +1864,30 @@ _cpp_valid_utf8 (cpp_reader *pfile,
   return true;
 }
 
+/*  Detect whether a C-string is a valid UTF-8-encoded set of bytes. Returns
+`false` if any contained byte sequence encodes an invalid Unicode codepoint
+or is not a valid UTF-8 sequence. Returns `true` otherwise. */
+
+extern bool
+_cpp_valid_utf8_str (const char *name)
+{
+  const uchar* in = (const uchar*)name;
+  size_t len = strlen (name);
+  cppchar_t cp;
+
+  while (*in)
+{
+  if (one_utf8_to_cppchar (&in, &len, &cp))
+	return false;
+
+  /* one_utf8_to_cppchar doesn't check this limit.  */
+  if (cp > UCS_LIMIT)
+	return false;
+}
+
+  return true;
+}
+
 /* Subroutine of convert_hex and convert_oct.  N is the representation
in the execution character set of a numeric escape; write it into the
string buffer TBUF and update the end-of-string pointer therein.  WIDE
-- 
2.31.1



Re: [PATCH] tree-optimization/108724 - vectorized code getting piecewise expanded

2023-02-13 Thread Jeff Law via Gcc-patches




On 2/13/23 07:51, Richard Biener wrote:

On Mon, 13 Feb 2023, Jeff Law wrote:




On 2/10/23 04:02, Richard Biener via Gcc-patches wrote:

This fixes an oversight to when removing the hard limits on using
generic vectors for the vectorizer to enable both SLP and BB
vectorization to use those.  The vectorizer relies on vector lowering
to expand plus, minus and negate to bit operations but vector
lowering has a hard limit on the minimum number of elements per
work item.  Vectorizer costs for the testcase at hand work out
to vectorize a loop with just two work items per vector and that
causes element wise expansion and spilling.

The fix for now is to re-instantiate the hard limit, matching what
vector lowering does.  For the future the way to go is to emit the
lowered sequence directly from the vectorizer instead.

Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?

Thanks,
Richard.

  PR tree-optimization/108724
  * tree-vect-stmts.cc (vectorizable_operation): Avoid
  using word_mode vectors when vector lowering will
  decompose them to elementwise operations.

  * gcc.target/i386/pr108724.c: New testcase.

OK.  Though can't this be a problem with logicals too?  Or is there something
special about +- going on here?


Logical ops do not cross lanes even when using scalar operations on GPRs.
For +- you have to compute the MSB separately to avoid spilling over to
the next vector lane.

Oh, yes, makes perfect sense.

jeff


[PATCH] c++: fix ICE in joust_maybe_elide_copy [PR106675]

2023-02-13 Thread Marek Polacek via Gcc-patches
joust_maybe_elide_copy checks that the last conversion in the ICS for
the first argument is ck_ref_bind, which is reasonable, because we've
checked that we're dealing with a copy/move constructor.  But it can
also happen that we couldn't figure out which conversion function is
better to convert the argument, as in this testcase: joust couldn't
decide if we should go with

  operator foo &()

or

  operator foo const &()

so we get a ck_ambig, which then upsets joust_maybe_elide_copy.  Since
a ck_ambig can validly occur, I think we should just return early, as
in the patch below.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk/12?

PR c++/106675

gcc/cp/ChangeLog:

* call.cc (joust_maybe_elide_copy): Return false for ck_ambig.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/overload-conv-5.C: New test.
---
 gcc/cp/call.cc   |  2 ++
 gcc/testsuite/g++.dg/cpp0x/overload-conv-5.C | 21 
 2 files changed, 23 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/overload-conv-5.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index a349d8e79db..048b2b052f8 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -12542,6 +12542,8 @@ joust_maybe_elide_copy (z_candidate *&cand)
   if (!DECL_COPY_CONSTRUCTOR_P (fn) && !DECL_MOVE_CONSTRUCTOR_P (fn))
 return false;
   conversion *conv = cand->convs[0];
+  if (conv->kind == ck_ambig)
+return false;
   gcc_checking_assert (conv->kind == ck_ref_bind);
   conv = next_conversion (conv);
   if (conv->kind == ck_user && !TYPE_REF_P (conv->type))
diff --git a/gcc/testsuite/g++.dg/cpp0x/overload-conv-5.C 
b/gcc/testsuite/g++.dg/cpp0x/overload-conv-5.C
new file mode 100644
index 000..b1e7766e42b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/overload-conv-5.C
@@ -0,0 +1,21 @@
+// PR c++/106675
+// { dg-do compile { target c++11 } }
+
+struct foo {
+int n_;
+foo(int n) : n_(n) {}
+};
+
+struct bar {
+int n_;
+
+operator foo() const {
+return foo(n_);
+}
+operator foo &() { return *reinterpret_cast(n_); }
+operator foo const &() = delete;
+
+void crashgcc() {
+foo tmp(*this); // { dg-error "ambiguous" }
+}
+};

base-commit: 72ae1e5635648bd3f6a5760ca46d531ad1f2c6b1
-- 
2.39.1



[PATCH 1/2] c++: factor out TYPENAME_TYPE substitution

2023-02-13 Thread Patrick Palka via Gcc-patches
[N.B. this is a corrected version of
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607443.html ]

This patch factors out the TYPENAME_TYPE case of tsubst into a separate
function tsubst_typename_type.  It also factors out the two tsubst flags
controlling TYPENAME_TYPE substitution, tf_keep_type_decl and tf_tst_ok,
into distinct boolean parameters of this new function (and of
make_typename_type).  Consequently, callers which used to pass tf_tst_ok
to tsubst now instead must directly call tsubst_typename_type when
appropriate.  In a subsequent patch we'll add another flag to
tsubst_typename_type controlling whether we want to ignore non-types
during the qualified lookup.

gcc/cp/ChangeLog:

* cp-tree.h (enum tsubst_flags): Remove tf_keep_type_decl
and tf_tst_ok.
(make_typename_type): Add two trailing boolean parameters
defaulted to false.
* decl.cc (make_typename_type): Replace uses of
tf_keep_type_decl and tf_tst_ok with the corresponding new
boolean parameters.
* pt.cc (tsubst_typename_type): New, factored out from tsubst
and adjusted after removing tf_keep_type_decl and tf_tst_ok.
(tsubst_decl) : Conditionally call
tsubst_typename_type directly instead of using tf_tst_ok.
(tsubst) : Call tsubst_typename_type.
(tsubst_copy) : Conditionally call
tsubst_typename_type directly instead of using tf_tst_ok.
(tsubst_copy_and_build) : Likewise.
: Likewise.
---
 gcc/cp/cp-tree.h |   9 +-
 gcc/cp/decl.cc   |  17 ++--
 gcc/cp/pt.cc | 223 +--
 3 files changed, 134 insertions(+), 115 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 06bc64a6b8d..a7c5765fc33 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -5573,8 +5573,7 @@ enum tsubst_flags {
   tf_error = 1 << 0,/* give error messages  */
   tf_warning = 1 << 1,  /* give warnings too  */
   tf_ignore_bad_quals = 1 << 2, /* ignore bad cvr qualifiers */
-  tf_keep_type_decl = 1 << 3,   /* retain typedef type decls
-   (make_typename_type use) */
+  /* 1 << 3 available */
   tf_ptrmem_ok = 1 << 4,/* pointers to member ok (internal
instantiate_type use) */
   tf_user = 1 << 5, /* found template must be a user template
@@ -5594,8 +5593,7 @@ enum tsubst_flags {
(build_target_expr and friends) */
   tf_norm = 1 << 11,/* Build diagnostic information during
constraint normalization.  */
-  tf_tst_ok = 1 << 12,  /* Allow a typename-specifier to name
-   a template (C++17 or later).  */
+  /* 1 << 12 available */
   tf_dguide = 1 << 13, /* Building a deduction guide from a ctor.  */
   /* Convenient substitution flags combinations.  */
   tf_warning_or_error = tf_warning | tf_error
@@ -6846,7 +6844,8 @@ extern tree declare_local_label   (tree);
 extern tree define_label   (location_t, tree);
 extern void check_goto (tree);
 extern bool check_omp_return   (void);
-extern tree make_typename_type (tree, tree, enum tag_types, 
tsubst_flags_t);
+extern tree make_typename_type (tree, tree, enum tag_types, 
tsubst_flags_t,
+bool = false, bool = false);
 extern tree build_typename_type(tree, tree, tree, 
tag_types);
 extern tree make_unbound_class_template(tree, tree, tree, 
tsubst_flags_t);
 extern tree make_unbound_class_template_raw(tree, tree, tree);
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index d606b31d7a7..430533606b0 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -4228,14 +4228,17 @@ build_typename_type (tree context, tree name, tree 
fullname,
 /* Resolve `typename CONTEXT::NAME'.  TAG_TYPE indicates the tag
provided to name the type.  Returns an appropriate type, unless an
error occurs, in which case error_mark_node is returned.  If we
-   locate a non-artificial TYPE_DECL and TF_KEEP_TYPE_DECL is set, we
+   locate a non-artificial TYPE_DECL and KEEP_TYPE_DECL is true, we
return that, rather than the _TYPE it corresponds to, in other
-   cases we look through the type decl.  If TF_ERROR is set, complain
-   about errors, otherwise be quiet.  */
+   cases we look through the type decl.  If TEMPLATE_OK is true and
+   we found a TEMPLATE_DECL then we return a CTAD placeholder for the
+   TEMPLATE_DECL.  If TF_ERROR is set, complain about errors, otherwise
+   be quiet.  */
 
 tree
 make_typename_type (tree context, tree name, enum tag_types tag_type,
-   tsubst_flags_t complain)
+   tsubst_flags_t complain, bool keep_type_decl /* = false */,
+   bool template_ok /* 

[PATCH 2/2] c++: TYPENAME_TYPE lookup ignoring non-types [PR107773]

2023-02-13 Thread Patrick Palka via Gcc-patches
[N.B. this is a corrected version of
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607443.html ]

Currently when resolving a TYPENAME_TYPE for 'typename T::m' via
make_typename_type, we consider only type bindings of 'm' and ignore
non-type ones.  But [temp.res.general]/3 says, in a note, "the usual
qualified name lookup ([basic.lookup.qual]) applies even in the presence
of 'typename'", and qualified name lookup doesn't discriminate between
type and non-type bindings.  So when resolving such a TYPENAME_TYPE
we want the lookup to consider all bindings.

An exception is when we have a TYPENAME_TYPE corresponding to the
qualifying scope appearing before the :: scope resolution operator, such
as 'T::type' in 'typename T::type::m'.  In that case, [basic.lookup.qual]/1
applies, and lookup for such a TYPENAME_TYPE must ignore non-type
bindings.  So in order to correctly handle all cases, make_typename_type
needs an additional flag controlling whether lookup should ignore
non-types or not.

To that end this patch adds a type_only flag to make_typename_type and
defaults it to false (do not ignore non-types).  In contexts where we do
want to ignore non-types (when substituting into the scope of a
TYPENAME_TYPE, SCOPE_REF, USING_DECL) we call tsubst_typename_type
directly with type_only=true.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

PR c++/107773

gcc/cp/ChangeLog:

* cp-tree.h (make_typename_type): Add another boolean parameter
that defaults to false.
* decl.cc (make_typename_type): Use lookup_member instead of
lookup_field.  Pass want_type=type_only instead of =false to
lookup_member.  Generalize format specifier in diagnostic to
handle both type and non-type bindings.
* pt.cc (tsubst_typename_type): Add another boolean parameter
that defaults to false and pass it to make_typename_type.  If
TYPE_CONTEXT is a TYPENAME_TYPE recurse with type_only=true
instead of substituting it via tsubst.
(tsubst_decl) : If the scpoe is a TYPENAME_TYPE
call tsubst_typename_type directly with type_only=true instead
of substituting it via tsubst.
(tsubst_qualified_id): Likewise.
* search.cc (lookup_member): Document default argument.

gcc/testsuite/ChangeLog:

* g++.dg/template/typename24.C: New test.
* g++.dg/template/typename25.C: New test.
* g++.dg/template/typename26.C: New test.
---
 gcc/cp/cp-tree.h   |  2 +-
 gcc/cp/decl.cc | 14 -
 gcc/cp/pt.cc   | 24 +++
 gcc/cp/search.cc   |  2 +-
 gcc/testsuite/g++.dg/template/typename24.C | 18 
 gcc/testsuite/g++.dg/template/typename25.C | 34 ++
 gcc/testsuite/g++.dg/template/typename26.C | 20 +
 7 files changed, 100 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/typename24.C
 create mode 100644 gcc/testsuite/g++.dg/template/typename25.C
 create mode 100644 gcc/testsuite/g++.dg/template/typename26.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index a7c5765fc33..1241dbf8037 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6845,7 +6845,7 @@ extern tree define_label  (location_t, 
tree);
 extern void check_goto (tree);
 extern bool check_omp_return   (void);
 extern tree make_typename_type (tree, tree, enum tag_types, 
tsubst_flags_t,
-bool = false, bool = false);
+bool = false, bool = false, 
bool = false);
 extern tree build_typename_type(tree, tree, tree, 
tag_types);
 extern tree make_unbound_class_template(tree, tree, tree, 
tsubst_flags_t);
 extern tree make_unbound_class_template_raw(tree, tree, tree);
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 430533606b0..c741dc23d99 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -4232,13 +4232,14 @@ build_typename_type (tree context, tree name, tree 
fullname,
return that, rather than the _TYPE it corresponds to, in other
cases we look through the type decl.  If TEMPLATE_OK is true and
we found a TEMPLATE_DECL then we return a CTAD placeholder for the
-   TEMPLATE_DECL.  If TF_ERROR is set, complain about errors, otherwise
-   be quiet.  */
+   TEMPLATE_DECL.  If TYPE_ONLY is true, lookup of NAME in CONTEXT
+   ignores non-type bindings.  If TF_ERROR is set, complain about errors,
+   otherwise be quiet.  */
 
 tree
 make_typename_type (tree context, tree name, enum tag_types tag_type,
tsubst_flags_t complain, bool keep_type_decl /* = false */,
-   bool template_ok /* = false */)
+   bool template_ok /* = false */, bool type_only /* = false 
*/)
 {
   tree

Re: [RFC PATCH v1 08/10] ifcvt: add if-conversion to conditional-zero instructions

2023-02-13 Thread Richard Sandiford via Gcc-patches
Andrew Pinski via Gcc-patches  writes:
> On Fri, Feb 10, 2023 at 2:47 PM Philipp Tomsich
>  wrote:
>>
>> Some architectures, as it the case on RISC-V with the proposed
>> ZiCondOps and the vendor-defined XVentanaCondOps, define a
>> conditional-zero instruction that is equivalent to:
>>  - the positive form:  rd = (rc != 0) ? rs : 0
>>  - the negated form:   rd = (rc == 0) ? rs : 0
>>
>> While noce_try_store_flag_mask will somewhat work for this case, it
>> will generate a number of atomic RTX that will misdirect the cost
>> calculation and may be too long (i.e., 4 RTX and more) to successfully
>> merge at combine-time.
>
> Can you expand on this? Especially when there are patterns that use
> (if_then_else) already.
>
>>
>> Instead, we add two new transforms that attempt to build up what we
>> define as the canonical form of a conditional-zero expression:
>>
>>   (set (match_operand 0 "register_operand" "=r")
>>(and (neg (eq_or_ne (match_operand 1 "register_operand" "r")
>>(const_int 0)))
>> (match_operand 2 "register_operand" "r")))
>
> Again why are you not using:
> (set (reg) (if_then_else (eq_ne (reg) (const_int 0)) (reg) (const_int 0)))
> Form instead of the really bad "canonical" form of the above?

I don't think one form is inherently better than the other if we think
about just this one case.  But I agree that the if_then_else form is
currently the canonical form for the operation, and extends more
naturally to the general case.  AArch64 already matches specifically
for it (with xzr providing the zero value).

Thanks,
Richard


Re: Support for NOINLINE attribute

2023-02-13 Thread Harald Anlauf via Gcc-patches

Pushed as:

commit 086a1df4374962787db37c1f0d1bd9beb828f9e3

Thanks,
Harald

On 2/12/23 22:28, Harald Anlauf via Gcc-patches wrote:

Hi Rimvydas,


Gesendet: Sonntag, 12. Februar 2023 um 07:59 Uhr
Von: "Rimvydas Jasinskas" 
An: "Harald Anlauf" 
Cc: "fortran" 
Betreff: Re: Support for NOINLINE attribute

On Sat, Feb 11, 2023 at 11:26 PM Harald Anlauf  wrote:

I am also not a native speaker, like many others contributing, but let
me quote the relevant orignal paragraph:

"The @code{noreturn} keyword tells the compiler to assume that
@code{fatal} cannot return.  It can then optimize without regard to what
would happen if @code{fatal} ever did return.  This makes slightly
better code.  More importantly, it helps avoid spurious warnings of
uninitialized variables."

My reading of this original paragraph differs very much from the
intention I get from the shortened version.  Would you please reread?


Same, from extend.texi, see gcc/testsuite/gfortran.dg/noreturn-3.f90
It is about marking dead conditional branches, so that the compiler
can prove proper initialization (no -Wmaybe-uninitialized given).  It
should behave the same as in C frontend.


True.  And that's the whole point (IMHO), not silencing the compiler.

Hmm both look the same to me, the silencing of false positive
diagnostics is already implied by spurious.  To simplify I have
changed it in v2 to just:
"add a hint that a given function cannot return" documentation could
be expanded later.


But shouldn't we rather follow what the C family of compilers in the
first place does for a particular target?  Most relevant libraries
for Fortran code are either C/C++ or Fortran anyway, including any
of the common MPI implementations, so should we care about Ada?

I agree with you.  I have removed SUPPORTS_WEAK check and fixed
indentation in v2.

Regtested cleany on x86_64-pc-linux-gnu.

Regards,
Rimvydas


this version of the patch looks good to me, so it is basically OK
to commit.

There is one thing I cannot test, which is the handling of weak symbols
on other platforms.  A quick glance at the C testcases suggests that
someone with access to either an NVPTX or MingGW target might tell
whether that particular target should be excluded.  So I'd like to wait
for 24 hours for others to comment on this.

I see that you've signed-off your patch.  Do you have commit rights?
Otherwise I'll commit for you.  (I've CC'ed to gcc-patches@ for this
purpose.)

Thanks for the patch!

Harald







Re: [PATCH v5 4/5] c++modules: report imported CMI files as dependencies

2023-02-13 Thread Jason Merrill via Gcc-patches

On 1/25/23 13:06, Ben Boeckel wrote:

They affect the build, so report them via `-MF` mechanisms.

gcc/cp/

* module.cc (do_import): Report imported CMI files as
dependencies.


Both this and the mapper dependency patch seem to cause most of the 
modules testcases to crash; please remember to run the regression tests 
(https://gcc.gnu.org/contribute.html#testing)



Signed-off-by: Ben Boeckel 
---
  gcc/cp/module.cc | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index ebd30f63d81..dbd1b721616 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -18966,6 +18966,8 @@ module_state::do_import (cpp_reader *reader, bool 
outermost)
dump () && dump ("CMI is %s", file);
if (note_module_cmi_yes || inform_cmi_p)
inform (loc, "reading CMI %qs", file);
+  /* Add the CMI file to the dependency tracking. */
+  deps_add_dep (cpp_get_deps (reader), file);
fd = open (file, O_RDONLY | O_CLOEXEC | O_BINARY);
e = errno;
  }




Re: [RFC PATCH v1 08/10] ifcvt: add if-conversion to conditional-zero instructions

2023-02-13 Thread Jeff Law via Gcc-patches




On 2/13/23 10:32, Richard Sandiford via Gcc-patches wrote:

Andrew Pinski via Gcc-patches  writes:

On Fri, Feb 10, 2023 at 2:47 PM Philipp Tomsich
 wrote:


Some architectures, as it the case on RISC-V with the proposed
ZiCondOps and the vendor-defined XVentanaCondOps, define a
conditional-zero instruction that is equivalent to:
  - the positive form:  rd = (rc != 0) ? rs : 0
  - the negated form:   rd = (rc == 0) ? rs : 0

While noce_try_store_flag_mask will somewhat work for this case, it
will generate a number of atomic RTX that will misdirect the cost
calculation and may be too long (i.e., 4 RTX and more) to successfully
merge at combine-time.


Can you expand on this? Especially when there are patterns that use
(if_then_else) already.



Instead, we add two new transforms that attempt to build up what we
define as the canonical form of a conditional-zero expression:

   (set (match_operand 0 "register_operand" "=r")
(and (neg (eq_or_ne (match_operand 1 "register_operand" "r")
(const_int 0)))
 (match_operand 2 "register_operand" "r")))


Again why are you not using:
(set (reg) (if_then_else (eq_ne (reg) (const_int 0)) (reg) (const_int 0)))
Form instead of the really bad "canonical" form of the above?


I don't think one form is inherently better than the other if we think
about just this one case.  But I agree that the if_then_else form is
currently the canonical form for the operation, and extends more
naturally to the general case.  AArch64 already matches specifically
for it (with xzr providing the zero value).
The more I think about it, the more I prefer the if-then-else form.   My 
biggest hesitation with getting behind one form or the other is a lack 
of knowledge about which is likely better interpreted by simplify-rtx 
and friends -- though it may not matter much in practice.


Jeff


Re: [RFC PATCH v1 08/10] ifcvt: add if-conversion to conditional-zero instructions

2023-02-13 Thread Andrew Pinski via Gcc-patches
On Mon, Feb 13, 2023 at 10:43 AM Jeff Law  wrote:
>
>
>
> On 2/13/23 10:32, Richard Sandiford via Gcc-patches wrote:
> > Andrew Pinski via Gcc-patches  writes:
> >> On Fri, Feb 10, 2023 at 2:47 PM Philipp Tomsich
> >>  wrote:
> >>>
> >>> Some architectures, as it the case on RISC-V with the proposed
> >>> ZiCondOps and the vendor-defined XVentanaCondOps, define a
> >>> conditional-zero instruction that is equivalent to:
> >>>   - the positive form:  rd = (rc != 0) ? rs : 0
> >>>   - the negated form:   rd = (rc == 0) ? rs : 0
> >>>
> >>> While noce_try_store_flag_mask will somewhat work for this case, it
> >>> will generate a number of atomic RTX that will misdirect the cost
> >>> calculation and may be too long (i.e., 4 RTX and more) to successfully
> >>> merge at combine-time.
> >>
> >> Can you expand on this? Especially when there are patterns that use
> >> (if_then_else) already.
> >>
> >>>
> >>> Instead, we add two new transforms that attempt to build up what we
> >>> define as the canonical form of a conditional-zero expression:
> >>>
> >>>(set (match_operand 0 "register_operand" "=r")
> >>> (and (neg (eq_or_ne (match_operand 1 "register_operand" "r")
> >>> (const_int 0)))
> >>>  (match_operand 2 "register_operand" "r")))
> >>
> >> Again why are you not using:
> >> (set (reg) (if_then_else (eq_ne (reg) (const_int 0)) (reg) (const_int 0)))
> >> Form instead of the really bad "canonical" form of the above?
> >
> > I don't think one form is inherently better than the other if we think
> > about just this one case.  But I agree that the if_then_else form is
> > currently the canonical form for the operation, and extends more
> > naturally to the general case.  AArch64 already matches specifically
> > for it (with xzr providing the zero value).
> The more I think about it, the more I prefer the if-then-else form.   My
> biggest hesitation with getting behind one form or the other is a lack
> of knowledge about which is likely better interpreted by simplify-rtx
> and friends -- though it may not matter much in practice.

In the case of IF_THEN_ELSE, combine tries a few things:
  /* If this is a simple operation applied to an IF_THEN_ELSE, try
 applying it to the arms of the IF_THEN_ELSE.  This often simplifies
 things.  Check for cases where both arms are testing the same
 condition.

Also see simplify_if_then_else inside combine.cc.
I don't think the first one could be done not using IF_THEN_ELSE.

Thanks,
Andrew Pinski

>
> Jeff


[PATCH] apply debug-remap to file names in .su files

2023-02-13 Thread Rasmus Villemoes
The .su files generated with -fstack-usage are arguably debug info. In
order to make builds more reproducible, apply the same remapping logic
to the recorded file names as for when producing the debug info
embedded in the object files.

To this end, teach print_decl_identifier() a new
PRINT_DECL_REMAP_DEBUG flag and use that from output_stack_usage_1().

gcc/ChangeLog:

* print-tree.h (PRINT_DECL_REMAP_DEBUG): New flag.
* print-tree.cc (print_decl_identifier): Implement it.
* toplev.cc (output_stack_usage_1): Use it.
---
 gcc/print-tree.cc | 6 +-
 gcc/print-tree.h  | 1 +
 gcc/toplev.cc | 3 ++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/gcc/print-tree.cc b/gcc/print-tree.cc
index 1f3afcbbc86..ccecd3dc6a7 100644
--- a/gcc/print-tree.cc
+++ b/gcc/print-tree.cc
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-cfg.h"
 #include "dumpfile.h"
 #include "print-tree.h"
+#include "file-prefix-map.h"
 
 /* Define the hash table of nodes already seen.
Such nodes are not repeated; brief cross-references are used.  */
@@ -1065,7 +1066,10 @@ print_decl_identifier (FILE *file, tree decl, int flags)
{
  expanded_location loc
= expand_location (DECL_SOURCE_LOCATION (decl));
- fprintf (file, "%s:%d:%d", loc.file, loc.line, loc.column);
+ const char *f = flags & PRINT_DECL_REMAP_DEBUG
+   ? remap_debug_filename (loc.file)
+   : loc.file;
+ fprintf (file, "%s:%d:%d", f, loc.line, loc.column);
}
   needs_colon = true;
 }
diff --git a/gcc/print-tree.h b/gcc/print-tree.h
index 7683730484f..dc5a69b7a30 100644
--- a/gcc/print-tree.h
+++ b/gcc/print-tree.h
@@ -45,6 +45,7 @@ extern void indent_to (FILE *, int);
 #define PRINT_DECL_ORIGIN   0x1
 #define PRINT_DECL_NAME 0x2
 #define PRINT_DECL_UNIQUE_NAME  0x4
+#define PRINT_DECL_REMAP_DEBUG  0x8
 extern void print_decl_identifier (FILE *, tree, int flags);
 
 #endif  // GCC_PRINT_TREE_H
diff --git a/gcc/toplev.cc b/gcc/toplev.cc
index 4c15d4f542e..d76571f60e8 100644
--- a/gcc/toplev.cc
+++ b/gcc/toplev.cc
@@ -829,7 +829,8 @@ output_stack_usage_1 (FILE *cf)
   if (stack_usage_file)
 {
   print_decl_identifier (stack_usage_file, current_function_decl,
-PRINT_DECL_ORIGIN | PRINT_DECL_NAME);
+PRINT_DECL_ORIGIN | PRINT_DECL_NAME
+| PRINT_DECL_REMAP_DEBUG);
   fprintf (stack_usage_file, "\t" HOST_WIDE_INT_PRINT_DEC"\t%s\n",
   stack_usage, stack_usage_kind_str[stack_usage_kind]);
 }
-- 
2.37.2



[PATCH] i386: Relax extract location operand mode requirements [PR108516]

2023-02-13 Thread Uros Bizjak via Gcc-patches
Combine pass simplifies zero-extend of a zero-extract to:

Trying 16 -> 6:
   16: r86:QI#0=zero_extract(r87:HI,0x8,0x8)
  REG_DEAD r87:HI
6: r84:SI=zero_extend(r86:QI)
  REG_DEAD r86:QI
Failed to match this instruction:
(set (reg:SI 84 [ s.e2 ])
(zero_extract:SI (reg:HI 87)
(const_int 8 [0x8])
(const_int 8 [0x8])))

which fails instruction recognision.  The pattern is valid, since there
is no requirement on the mode of the location operand.

The patch relaxes location operand mode requirements of *extzv and *extv
insn patterns to allow all supported integer modes.  The patch also
adds support for a related sign-extend from zero-extracted operand.

2023-02-13  Uroš Bizjak  

gcc/ChangeLog:

PR target/108516
* config/i386/predicates.md (extr_register_operand):
New special predicate.
* config/i386/i386.md (*extv): Use extr_register_operand
as operand 1 predicate.
(*exzv): Ditto.
(*extendqi_ext_1): New insn pattern.

gcc/testsuite/ChangeLog:

PR target/108516
* gcc.target/i386/pr108516-1.c: New test.
* gcc.target/i386/pr108516-2.c: Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e62dd07ad8b..5a946beb1c6 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3159,7 +3159,7 @@
 
 (define_insn "*extv"
   [(set (match_operand:SWI24 0 "register_operand" "=R")
-   (sign_extract:SWI24 (match_operand:SWI24 1 "register_operand" "Q")
+   (sign_extract:SWI24 (match_operand 1 "extr_register_operand" "Q")
(const_int 8)
(const_int 8)))]
   ""
@@ -3202,7 +3202,7 @@
 
 (define_insn "*extzv"
   [(set (match_operand:SWI248 0 "register_operand" "=R")
-   (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand" "Q")
+   (zero_extract:SWI248 (match_operand 1 "extr_register_operand" "Q")
 (const_int 8)
 (const_int 8)))]
   ""
@@ -4777,6 +4777,19 @@
  (if_then_else (eq_attr "prefix_0f" "0")
(const_string "0")
(const_string "1")))])
+
+(define_insn "*extendqi_ext_1"
+  [(set (match_operand:SWI24 0 "register_operand" "=R")
+   (sign_extend:SWI24
+ (subreg:QI
+   (zero_extract:SWI248
+ (match_operand:SWI248 1 "register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)))]
+  ""
+  "movs{b|x}\t{%h1, %0|%0, %h1}"
+   [(set_attr "type" "imovx")
+(set_attr "mode" "")])
 
 ;; Conversions between float and double.
 
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index ec1785cde49..cca64f00a6a 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -92,6 +92,14 @@
   (and (match_code "reg")
(match_test "MASK_REGNO_P (REGNO (op))")))
 
+;; Match a DI, SI or HImode register operand for extract op.
+(define_special_predicate "extr_register_operand"
+  (and (match_operand 0 "register_operand")
+   (ior (and (match_test "TARGET_64BIT")
+(match_test "GET_MODE (op) == DImode"))
+   (match_test "GET_MODE (op) == SImode")
+   (match_test "GET_MODE (op) == HImode"
+
 ;; Match a DI, SI, HI or QImode nonimmediate_operand.
 (define_special_predicate "int_nonimmediate_operand"
   (and (match_operand 0 "nonimmediate_operand")
diff --git a/gcc/testsuite/gcc.target/i386/pr108516-1.c 
b/gcc/testsuite/gcc.target/i386/pr108516-1.c
new file mode 100644
index 000..d5344ef23e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr108516-1.c
@@ -0,0 +1,19 @@
+/* PR target/108516 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -dp" } */
+/* { dg-additional-options "-mregparm=1" { target ia32 } } */
+
+struct S
+{
+  unsigned char e1;
+  unsigned char e2;
+  unsigned char e3;
+};
+
+unsigned int
+f2 (struct S s)
+{
+  return s.e2;
+}
+
+/* { dg-final { scan-assembler-not "\\*zero_extend" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr108516-2.c 
b/gcc/testsuite/gcc.target/i386/pr108516-2.c
new file mode 100644
index 000..3e709e8c738
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr108516-2.c
@@ -0,0 +1,19 @@
+/* PR target/108516 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -dp" } */
+/* { dg-additional-options "-mregparm=1" { target ia32 } } */
+
+struct S
+{
+  signed char e1;
+  signed char e2;
+  signed char e3;
+};
+
+int
+f2 (struct S s)
+{
+  return s.e2;
+}
+
+/* { dg-final { scan-assembler-not "\\*extzv" } } */


[Patch] libgomp: Fix 'target enter data' with always pointer

2023-02-13 Thread Tobias Burnus

The problem is that GOMP_MAP_ALWAYS_POINTER, there is a lookup for "i - 1"
but with 'target enter data', GOMP_MAP_ALWAYS_POINTER and its data were passed
as separate entities.

I am not sure whether there is a legitimate reason to have two
GOMP_MAP_ALWAYS_POINTER in a row; the check in gomp_map_vars_internal
seems to indicate that there is. Hence, I assumed there is and I add
an 'i > 0' check to that function and also a check the kinds[i] isn't
an always pointer (if i+ is) in the caller, i.e. GOMP_target_enter_exit_data.

Note that there is a front-end/middle-end issue with regards to 'target exit 
data',
which is the reason that the exit data has been commented out. I plan to fix 
this
separately.* (It is a bug of its own - and this fix is to libgomp and the other 
is
to the FE/ME.)

OK for mainline?

Tobias

(*) Part of the 'alloc' issue has been discussed in the patch:
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/604887.html
however, during discussion on IRC it turned out that this patch is incomplete.
This issue is next on my to-do list.


PS: Also *pending* *review* is a simple reverse-offload-only patch and
one '!$omp loop' "13 Regression" fix (with the review comments fixed):

"[v2] OpenMP/Fortran: Fix loop-iter var privatization with !$OMP LOOP 
[PR108512]"
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611730.html

"[Patch] libgomp: Fix reverse-offload for GOMP_MAP_TO_PSET"
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611617.html

(Other pending patches: "OpenMP Patch Ping – including "[13 Regression]",
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611524.html )
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp: Fix 'target enter data' with always pointer

As GOMP_MAP_ALWAYS_POINTER operates on the previous map item, ensure that
with 'target enter data' both are passed together to gomp_map_vars_internal.

libgomp/ChangeLog:

	* target.c (gomp_map_vars_internal): Add 'i > 0' before doing a
	kind check.
	(GOMP_target_enter_exit_data): If the next map item is
	GOMP_MAP_ALWAYS_POINTER map it together with the current item.
* testsuite/libgomp.fortran/target-enter-data-3.f90: New test.

 target.c  |   17 +
 testsuite/libgomp.fortran/target-enter-data-3.f90 |   22 ++
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/libgomp/target.c b/libgomp/target.c
index c1682caea13..cc8db85957c 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1480,8 +1480,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 		gomp_mutex_unlock (&devicep->lock);
 		gomp_fatal ("always pointer not mapped");
 		  }
-		if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
-		!= GOMP_MAP_ALWAYS_POINTER)
+		if (i > 0
+		&& ((get_kind (short_mapkind, kinds, i - 1) & typemask)
+			!= GOMP_MAP_ALWAYS_POINTER))
 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
 		if (cur_node.tgt_offset)
 		  cur_node.tgt_offset -= sizes[i];
@@ -4085,7 +4086,10 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
 			 GOMP_MAP_VARS_ENTER_DATA);
 	  i += j - i - 1;
 	}
-  else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
+  else if (i + 1 < mapnum
+	   && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
+		   || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
+		   && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
 	{
 	  /* An attach operation must be processed together with the mapped
 	 base-pointer list item.  */
diff --git a/libgomp/testsuite/libgomp.fortran/target-enter-data-3.f90 b/libgomp/testsuite/libgomp.fortran/target-enter-data-3.f90
new file mode 100644
index 000..5d97566c66c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/target-enter-data-3.f90
@@ -0,0 +1,22 @@
+implicit none
+type t
+  integer :: dummy
+  integer, pointer :: p1(:), p2(:)
+  integer :: dummy2
+end type t
+type(t) :: var
+integer :: i
+allocate(var%p1(5),var%p2(2:4))
+var%p1 = [22,53,28,6,4]
+var%p2 = [46,679,54]
+
+!$omp target enter data map(to:var%p1, var%p2)
+!$omp target
+  if (.not.associated(var%p1).or.lbound(var%p1,1)/=1.or.ubound(var%p1,1)/=5) stop 1
+  if (.not.associated(var%p2).or.lbound(var%p2,1)/=2.or.ubound(var%p2,1)/=4) stop 2
+  if (any (var%p1 /= [22,53,28,6,4])) stop 3
+  if (any (var%p2 /= [46,679,54])) stop 4
+!$omp end target
+!!$omp target exit data map(from:var%p1, var%p2)
+end
+


[pushed] [PR108774] RA: Clear reg equiv caller_save_p flag when clearing defined_p flag

2023-02-13 Thread Vladimir Makarov via Gcc-patches

The following patch solves

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108774

The patch was successfully bootstrapped and tested on i686, x86_64, and 
aarch64.
commit a33e3dcbd15e73603796e30b5eeec11a0c8bacec
Author: Vladimir N. Makarov 
Date:   Mon Feb 13 16:05:04 2023 -0500

RA: Clear reg equiv caller_save_p flag when clearing defined_p flag

IRA can invalidate initially setup equivalence in setup_reg_equiv.
Flag caller_saved was not cleared during invalidation although
init_insns were cleared.  It resulted in segmentation fault in
get_equiv.  Clearing the flag solves the problem.  For more
precaution I added clearing the flag in other places too although it
might be not necessary.

PR rtl-optimization/108774

gcc/ChangeLog:

* ira.cc (ira_update_equiv_info_by_shuffle_insn): Clear equiv
caller_save_p flag when clearing defined_p flag.
(setup_reg_equiv): Ditto.
* lra-constraints.cc (lra_constraints): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr108774.c: New.

diff --git a/gcc/ira.cc b/gcc/ira.cc
index 9f9af808f63..6c7f4901e4c 100644
--- a/gcc/ira.cc
+++ b/gcc/ira.cc
@@ -2725,6 +2725,7 @@ ira_update_equiv_info_by_shuffle_insn (int to_regno, int from_regno, rtx_insn *i
 	  return;
 	}
   ira_reg_equiv[to_regno].defined_p = false;
+  ira_reg_equiv[to_regno].caller_save_p = false;
   ira_reg_equiv[to_regno].memory
 	= ira_reg_equiv[to_regno].constant
 	= ira_reg_equiv[to_regno].invariant
@@ -4193,6 +4194,7 @@ setup_reg_equiv (void)
 			if (ira_reg_equiv[i].memory == NULL_RTX)
 			  {
 			ira_reg_equiv[i].defined_p = false;
+			ira_reg_equiv[i].caller_save_p = false;
 			ira_reg_equiv[i].init_insns = NULL;
 			break;
 			  }
@@ -4203,6 +4205,7 @@ setup_reg_equiv (void)
 	  }
 	  }
 	ira_reg_equiv[i].defined_p = false;
+	ira_reg_equiv[i].caller_save_p = false;
 	ira_reg_equiv[i].init_insns = NULL;
 	break;
   }
diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index dd4f68bbfc0..dbfaf0485a5 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -5100,7 +5100,8 @@ lra_constraints (bool first_p)
 			 && (targetm.preferred_reload_class
 			 (x, lra_get_allocno_class (i)) == NO_REGS))
 			|| contains_symbol_ref_p (x
-	  ira_reg_equiv[i].defined_p = false;
+	  ira_reg_equiv[i].defined_p
+		= ira_reg_equiv[i].caller_save_p = false;
 	if (contains_reg_p (x, false, true))
 	  ira_reg_equiv[i].profitable_p = false;
 	if (get_equiv (reg) != reg)
diff --git a/gcc/testsuite/gcc.target/i386/pr108774.c b/gcc/testsuite/gcc.target/i386/pr108774.c
new file mode 100644
index 000..482bc490cde
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr108774.c
@@ -0,0 +1,11 @@
+/* PR target/108774 */
+/* { dg-do compile  { target x86_64-*-* } } */
+/* { dg-options "-Os -ftrapv -mcmodel=large" } */
+
+int i, j;
+
+void
+foo (void)
+{
+  i = ((1 << j) - 1) >> j;
+}


Re: [PATCH] lra: Replace subregs in bare uses & clobbers [PR108681]

2023-02-13 Thread Richard Sandiford via Gcc-patches
Jeff Law  writes:
> On 2/7/23 03:29, Richard Sandiford via Gcc-patches wrote:
>> In this PR we had a write to one vector of a 4-vector tuple.
>> The vector had mode V1DI, and the target doesn't provide V1DI
>> moves, so this was converted into:
>> 
>>  (clobber (subreg:V1DI (reg/v:V4x1DI 92 [ b ]) 24))
>> 
>> followed by a DImode move.  (The clobber isn't really necessary
>> or helpful for a single word, but would be for wider moves.)
>> 
>> The subreg in the clobber survived until after RA:
>> 
>>  (clobber (subreg:V1DI (reg/v:V4x1DI 34 v2 [orig:92 b ] [92]) 24))
> Post-reload all (subregs (reg)) expressions are supposed to be 
> simplified.  At least that's my recollection.  Though it looks like we 
> don't force the simplification until final assembly output.
>
> One might question under what circumstances simplifying (subreg (reg)) 
> can legitimately fail.

My memory's hazy, but I think e500 had instances of this.  e500's long
gone though, so maybe it's a non-issue now.

>> IMO this isn't well-formed.  If a subreg of a hard register simplifies
>> to a hard register, it should be replaced by the hard register.  If the
>> subreg doesn't simplify, then target-independent code can't be sure
>> which parts of the register are affected and which aren't.  A clobber
>> of such a subreg isn't useful and (again IMO) should just be removed.
>> Conversely, a use of such a subreg is effectively a use of the whole
>> inner register.
> Agreed.
>
> I'm not even sure that naked USE/CLOBBERS have any value post-reload 
> except for the use of the return register(s) and those inserted by 
> reorg.  But changing that at this stage seems inadvisable.

Yeah, not sure either about USEs.  I think the CLOBBERs can still be
useful as a way of avoiding partially-uninitialised registers becoming
too upwards-exposed.  E.g. when a 4-register hardreg is used and only
one register is set, the CLOBBER prevents the other 3 registers being
live on entry, or at least being kept live after some earlier unrelated
use.  That should give things like regrename more freedom.

Thanks for the review, now pushed.

Richard

>> LRA has code to simplify subregs of hard registers, but it didn't
>> handle bare uses and clobbers.  The patch extends it to do that.
>> 
>> One question was whether the final_p argument to alter_subregs
>> should be true or false.  True is IMO dangerous, since it forces
>> replacements that might not be valid from a dataflow perspective,
>> and uses and clobbers only exist for dataflow.  As said above,
>> I think the correct way of handling a failed simplification would
>> be to delete clobbers and replace uses of subregs with uses of
>> the inner register.  But I didn't want to write untested code
>> to do that.
> I'd go with "false" here after reviewing the code.
>
>
>
>> 
>> In the PR, the clobber caused an infinite loop in DCE, because
>> of a disagreement about what effect the clobber had.  But for
>> the reasons above, I think that was GIGO rather than a bug in
>> DF or DCE.
>> 
>> Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?
>> 
>> Richard
>> 
>> 
>> gcc/
>>  PR rtl-optimization/108681
>>  * lra-spills.cc (lra_final_code_change): Extend subreg replacement
>>  code to handle bare uses and clobbers.
>> 
>> gcc/testsuite/
>>  PR rtl-optimization/108681
>>  * gcc.target/aarch64/pr108681.c: New test.
> OK
> jeff


[PATCH, committed] Fortran: error recovery after invalid use of CLASS variable [PR103475]

2023-02-13 Thread Harald Anlauf via Gcc-patches
Dear all,

the attached simple and obvious patch fixes a NULL pointer dereference
on an invalid use of a CLASS variable.

Committed to mainline after regtesting on x86_64-pc-linux-gnu as

https://gcc.gnu.org/g:2ce7e2a83e18a27fe9c659f8667fc24f0df4ea9a

Thanks,
Harald

From 2ce7e2a83e18a27fe9c659f8667fc24f0df4ea9a Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Mon, 13 Feb 2023 22:02:44 +0100
Subject: [PATCH] Fortran: error recovery after invalid use of CLASS variable
 [PR103475]

gcc/fortran/ChangeLog:

	PR fortran/103475
	* primary.cc (gfc_expr_attr): Avoid NULL pointer dereference for
	invalid use of CLASS variable.

gcc/testsuite/ChangeLog:

	PR fortran/103475
	* gfortran.dg/pr103475.f90: New test.
---
 gcc/fortran/primary.cc |  2 +-
 gcc/testsuite/gfortran.dg/pr103475.f90 | 11 +++
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gfortran.dg/pr103475.f90

diff --git a/gcc/fortran/primary.cc b/gcc/fortran/primary.cc
index 28ce5fea865..1bea17d44fe 100644
--- a/gcc/fortran/primary.cc
+++ b/gcc/fortran/primary.cc
@@ -2770,7 +2770,7 @@ gfc_expr_attr (gfc_expr *e)
 	{
 	  gfc_symbol *sym = e->value.function.esym->result;
 	  attr = sym->attr;
-	  if (sym->ts.type == BT_CLASS)
+	  if (sym->ts.type == BT_CLASS && sym->attr.class_ok)
 	{
 	  attr.dimension = CLASS_DATA (sym)->attr.dimension;
 	  attr.pointer = CLASS_DATA (sym)->attr.class_pointer;
diff --git a/gcc/testsuite/gfortran.dg/pr103475.f90 b/gcc/testsuite/gfortran.dg/pr103475.f90
new file mode 100644
index 000..6cce5e8ebf7
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr103475.f90
@@ -0,0 +1,11 @@
+! { dg-do compile }
+! { dg-options "-O2 -Wall" }
+! PR fortran/103475 - ICE in gfc_expr_attr
+! Contributed by G.Steinmetz
+
+program p
+  type t
+  end type
+  class(t) :: x ! { dg-error "must be dummy, allocatable or pointer" }
+  y = x()   ! { dg-error "Cannot convert invalid class" }
+end
--
2.35.3



Ping: [PATCH+wwwdocs 0/8] A small Texinfo refinement

2023-02-13 Thread Arsen Arsenović via Gcc-patches
Ping on this patch.  I took the liberty to rebase it.  The changes are
minimal, so I didn't want to resend the entire patchset.  I included a
range diff and a pull request for your convenience.

The render is also updated, and ``make all && make html'' passes (which
is something I forgot to check last time, so tm.texi had some
complaints, apologies).

-:  --- > 1:  6eba1548dfe docs: Create Indices appendix
1:  3ac13e06ad7 ! 2:  3f54e2c451f docs: Reorder @opindex to be before 
corresponding options
@@ gcc/doc/invoke.texi: union U @{
  
  @end itemize
  
++@opindex -Wno-changes-meaning
+ @item -Wno-changes-meaning @r{(C++ and Objective-C++ only)}
+ C++ requires that unqualified uses of a name within a class have the
+ same meaning in the complete scope of the class, so declaring the name
+@@ gcc/doc/invoke.texi: error case can be reduced to a warning with
+ 
+ Both diagnostics are also suppressed by @option{-fms-extensions}.
+ 
 -@item -Wchar-subscripts
  @opindex Wchar-subscripts
  @opindex Wno-char-subscripts
@@ gcc/doc/invoke.texi: program may yield backtraces with different 
addresses due t
  @opindex fsanitize=kernel-address
 +@item -fsanitize=kernel-address
  Enable AddressSanitizer for Linux kernel.
- See @uref{https://github.com/google/kasan} for more details.
+ See @uref{https://github.com/google/kernel-sanitizers} for more details.
  
 -@item -fsanitize=hwaddress
  @opindex fsanitize=hwaddress
@@ gcc/doc/invoke.texi: For predictable results, you must also specify the 
same set
  Produce a shared object which can then be linked with other objects to
  form an executable.  Not all systems support this option.  For predictable
  results, you must also specify the same set of options used for 
compilation
-@@ gcc/doc/invoke.texi: to subtle defects.  Supplying them in cases where 
they are not necessary
- is innocuous. For x86, crtfastmath.o will not be added when
- @option{-shared} is specified. }
+@@ gcc/doc/invoke.texi: is innocuous.  @option{-shared} suppresses the 
addition of startup code
+ to alter the floating-point environment as done with @option{-ffast-math},
+ @option{-Ofast} or @option{-funsafe-math-optimizations} on some targets.}
  
 -@item -shared-libgcc
 -@itemx -static-libgcc
2:  7ff7376a83c ! 3:  7821fcc2717 **/*.texi: Reorder index entries
@@ Commit message
 * doc/invoke.texi: Ditto.
 * doc/md.texi: Ditto.
 * doc/rtl.texi: Ditto.
-* doc/tm.texi: Ditto.
+* doc/tm.texi.in: Ditto.
 * doc/trouble.texi: Ditto.
+* doc/tm.texi: Regenerate.
 
 gcc/fortran/ChangeLog:
 
@@ gcc/doc/tm.texi: boundary, to contain the local variables of the 
function.  On s
  this region and the save area may occur in the opposite order, with the
  save area closer to the top of the stack.
  
+-@item
+ @cindex @code{ACCUMULATE_OUTGOING_ARGS} and stack frames
++@item
+ Optionally, when @code{ACCUMULATE_OUTGOING_ARGS} is defined, a region of
+ @code{crtl->outgoing_args_size} bytes to be used for outgoing
+ argument lists of the function.  @xref{Stack Arguments}.
+
+ ## gcc/doc/tm.texi.in ##
+@@ gcc/doc/tm.texi.in: This section describes the macros that output 
function entry
+ @hook TARGET_ASM_FUNCTION_EPILOGUE
+ 
+ @itemize @bullet
+-@item
+ @findex pretend_args_size
+ @findex crtl->args.pretend_args_size
++@item
+ A region of @code{crtl->args.pretend_args_size} bytes of
+ uninitialized space just underneath the first argument arriving on the
+ stack.  (This may not be at the very start of the allocated stack region
+@@ gcc/doc/tm.texi.in: boundary, to contain the local variables of the 
function.  On some machines,
+ this region and the save area may occur in the opposite order, with the
+ save area closer to the top of the stack.
+ 
 -@item
  @cindex @code{ACCUMULATE_OUTGOING_ARGS} and stack frames
 +@item
3:  00cb8c6ad52 = 4:  af9be5e8ae7 docs: Mechanically reorder item/index combos 
in extend.texi
4:  af15b1b84cb ! 5:  19e506d79a4 doc: Add @defbuiltin family of helpers, set 
documentlanguage
@@ gcc/doc/extend.texi: myprintf (FILE *f, const char *format, ...)
 -@end deftypefn
 +@enddefbuiltin
  
--@deftypefn {Built-in Function} {size_t} __builtin_va_arg_pack_len ()
-+@defbuiltin{{size_t} __builtin_va_arg_pack_len ()}
+-@deftypefn {Built-in Function} {int} __builtin_va_arg_pack_len ()
++@defbuiltin{int __builtin_va_arg_pack_len ()}
  This built-in function returns the number of anonymous arguments of
  an inline function.  It can be used only in inline functions that
  are always inlined, never compiled as a separate function, such
@@ gcc/doc/extend.texi: forced to 

Ping: [wwwdocs] lists: Add documentation about the Sourceware public-inbox

2023-02-13 Thread Arsen Arsenović via Gcc-patches
Ping:
https://inbox.sourceware.org/gcc-patches/20230123012100.4021860-1-ar...@aarsen.me/

No further changes happened to the file between the post and today.

Have a lovely night :-)
-- 
Arsen Arsenović


signature.asc
Description: PGP signature


[pushed] libstdc++: Adjust "The Component Object Model" reference

2023-02-13 Thread Gerald Pfeifer
Pushed.

Gerald


libstdc++-v3/ChangeLog:

* doc/xml/manual/policy_data_structures_biblio.xml: Adjust
"The Component Object Model" reference.
* doc/html/manual/policy_data_structures.html: Regenerate.
---
 libstdc++-v3/doc/html/manual/policy_data_structures.html  | 4 ++--
 libstdc++-v3/doc/xml/manual/policy_data_structures_biblio.xml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/doc/html/manual/policy_data_structures.html 
b/libstdc++-v3/doc/html/manual/policy_data_structures.html
index cbe9ea21c7c..927b4125587 100644
--- a/libstdc++-v3/doc/html/manual/policy_data_structures.html
+++ b/libstdc++-v3/doc/html/manual/policy_data_structures.html
@@ -1157,7 +1157,7 @@
. 
  Cambridge University Press
. [biblio.mscom] 
-   https://docs.microsoft.com/en-us/windows/win32/com/the-component-object-model";
 target="_top">
+   https://learn.microsoft.com/en-us/windows/win32/com/the-component-object-model";
 target="_top">
  The Component Object Model

   . 
@@ -1297,4 +1297,4 @@
Wickland
  . 
  National Psychological Institute
-   . Prev??Up??NextImplementation??Home??Using
\ No newline at end of file
+   . Prev??Up??NextImplementation??Home??Using
diff --git a/libstdc++-v3/doc/xml/manual/policy_data_structures_biblio.xml 
b/libstdc++-v3/doc/xml/manual/policy_data_structures_biblio.xml
index 1adba7df91d..1450b8fc000 100644
--- a/libstdc++-v3/doc/xml/manual/policy_data_structures_biblio.xml
+++ b/libstdc++-v3/doc/xml/manual/policy_data_structures_biblio.xml
@@ -1061,7 +1061,7 @@
 
   
http://www.w3.org/1999/xlink";
- 
xlink:href="https://docs.microsoft.com/en-us/windows/win32/com/the-component-object-model";>
+ 
xlink:href="https://learn.microsoft.com/en-us/windows/win32/com/the-component-object-model";>
  The Component Object Model

   
-- 
2.39.1


Re: [PATCH] libstdc++: Add missing free functions for atomic_flag [PR103934]

2023-02-13 Thread Thomas Rodgers via Gcc-patches
Tested x86_64-pc-linux-gnu. Pushed to trunk.

The first patch has also been backported and pushed to releases/gcc-12 and
releases/gcc-11

The second patch fails to cleanly cherry-pick. Will resolve and push
shortly.

On Fri, Feb 10, 2023 at 4:41 PM Jonathan Wakely  wrote:

> On Fri, 10 Feb 2023 at 18:25, Thomas Rodgers  wrote:
> >
> > This patch did not get committed in a timely manner after it was OK'd.
> In revisiting the patch some issues were found that have lead me to
> resubmit for review -
> >
> > Specifically -
> >
> > The original commit to add C++20 atomic_flag::test did not include the
> free functions for atomic_flag_test[_explicit]
> > The original commit to add C++20 atomic_flag::wait/notify did not
> include the free functions for atomic_flag_wait/notify[_explicit]
> >
> > These two commits landed in GCC10 and GCC11 respectively. My original
> patch included both sets of free functions, but
> > that complicates the backporting of these changes to GCC10, GCC11, and
> GCC12.
>
> I don't think we need them in GCC 10.
>
> > Additionally commit 7c2155 removed const qualification from
> atomic_flag::notify_one/notify_all but the original version of this
> > patch accepts the atomic flag as const.
> >
> > The original version of this patch did not include test cases for the
> atomic_flag_test[_explicit] free functions.
> >
> > I have split the original patch into two patches, on for the
> atomic_flag_test free functions, and one for the atomic_flag_wait/notify
> > free functions.
>
> Thanks.
>
> For [PATCH 1/2] please name the added functions in the changelog entry:
>
> * include/std/atomic (atomic_flag_test): Add.
> (atomic_flag_test_explicit): Add.
>
> Similarly for the changelog in [PATCH 2/2], naming the four new
> functions added to include/std/atomic.
>
> The indentation is off in [PATCH 2/2] for atomic_flag:
>
> +#if __cpp_lib_atomic_wait
> +  inline void
> +  atomic_flag_wait(atomic_flag* __a, bool __old) noexcept
> +  { __a->wait(__old); }
> +
>
> And similarly for the other three added functions.
> The function names should start in the same column as the 'inline' and
> opening brace of the function body.
>
>
> Both patches are OK for trunk, gcc-12 and gcc-11 with those changes.
>
>
>
>
> >
> >
> > On Wed, Feb 2, 2022 at 1:35 PM Jonathan Wakely 
> wrote:
> >>
> >> >+  inline void
> >> >+  atomic_flag_wait_explicit(const atomic_flag* __a, bool __old,
> >> >+   std::memory_order __m) noexcept
> >>
> >> No need for the std:: qualification, and check the indentation.
> >>
> >>
> >> > libstdc++-v3/ChangeLog:
> >> >
> >> >PR103934
> >>
> >> This needs to include the component: PR libstdc++/103934
> >>
> >> >* include/std/atomic: Add missing free functions.
> >>
> >> Please name the new functions in the changelog, in the usual format.
> >> Just the names is fine, no need for the full signatures with
> >> parameters.
> >>
> >> OK for trunk with those changes.
> >>
>
>


Re: [PATCH 2/2] vect: Make partial trapping ops use predication [PR96373]

2023-02-13 Thread Kewen.Lin via Gcc-patches
on 2023/2/13 21:57, Richard Sandiford wrote:
> "Kewen.Lin"  writes:
>> Hi Richard,
>>
>> on 2023/1/27 19:08, Richard Sandiford via Gcc-patches wrote:
>>> PR96373 points out that a predicated SVE loop currently converts
>>> trapping unconditional ops into unpredicated vector ops.  Doing
>>> the operation on inactive lanes can then raise an exception.
>>>
>>> As discussed in the PR trail, we aren't 100% consistent about
>>> whether we preserve traps or not.  But the direction of travel
>>> is clearly to improve that rather than live with it.  This patch
>>> tries to do that for the SVE case.
>>>
>>> Doing this regresses gcc.target/aarch64/sve/fabd_1.c.  I've added
>>> -fno-trapping-math for now and filed PR108571 to track it.
>>> A similar problem applies to fsubr_1.d.
>>>
>>> I think this is likely to regress Power 10, since conditional
>>> operations are only available for masked loops.  I think we'll
>>> need to add -fno-trapping-math to any affected testcases,
>>> but I don't have a Power 10 system to test on.  Kewen, would you
>>> mind giving this a spin and seeing how bad the fallout is?
>>>
>>
>> Sorry for the late reply, I'm just back from vacation.
>>
>> Thank you for fixing this and caring about Power10!
>>
>> I tested your proposed patch on one Power10 machine (ppc64le),
>> it's bootstrapped but some test failures got exposed as below.
>>
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times 
>> mlxvlM 14
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times 
>> mstxvlM 7
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times 
>> mlxvlM 20
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times 
>> mstxvlM 10
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times 
>> mlxvlM 14
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times 
>> mstxvlM 7
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times 
>> mlxvlM 70
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times 
>> mlxvx?M 120
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times 
>> mstxvlM 70
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times 
>> mstxvx?M 70
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times 
>> mlxvlM 21
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times 
>> mstxvlM 21
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times 
>> mstxvx?M 21
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times 
>> mlxvlM 10
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times 
>> mlxvx?M 42
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times 
>> mstxvlM 10
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times 
>> mlxvlM 16
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times 
>> mstxvlM 7
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not 
>> mlxvxM
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not 
>> mstxvxM
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times 
>> mlxvlM 20
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times 
>> mstxvlM 10
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not 
>> mlxvxM
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not 
>> mstxvxM
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times 
>> mlxvlM 20
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times 
>> mstxvlM 10
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times 
>> mlxvlM 14
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times 
>> mstxvlM 7
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not 
>> mlxvxM
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not 
>> mstxvM
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not 
>> mstxvxM
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times 
>> mlxvlM 70
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times 
>> mstxvlM 70
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not 
>> mlxvxM
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not 
>> mstxvM
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not 
>> mstxvxM
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times 
>> mlxvlM 21
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times 
>> mstxvlM 21
>> < FAIL: g

[PATCH] debug: Support "phrs" for dumping a HARD_REG_SET

2023-02-13 Thread Hans-Peter Nilsson via Gcc-patches
Ok to commit?  It survived both a cris-elf regtest and a
x86_64-linux-gnu native regtest. :)

 8< 
The debug-function in sel-sched-dump.cc that would be
suitable for a hookup to a command in gdb is guarded by
#ifdef INSN_SCHEDULING, thus can't be used for all targets.
Better move the function marked DEBUG_FUNCTION elsewhere,
here to a file with a suitable static function to call.

There are multiple sets of similar functions dumping
HARD_REG_SETs, but cleaning that up is better left to a
separate commit.

gcc:
* gdbinit.in (phrs): New command.
* sel-sched-dump.cc (debug_hard_reg_set): Remove debug-function.
* ira-color.cc (debug_hard_reg_set): New, calling print_hard_reg_set.
---
 gcc/gdbinit.in| 12 
 gcc/ira-color.cc  |  7 +++
 gcc/sel-sched-dump.cc | 10 --
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/gcc/gdbinit.in b/gcc/gdbinit.in
index 1f7592b0e26a..a76079a46af7 100644
--- a/gcc/gdbinit.in
+++ b/gcc/gdbinit.in
@@ -31,6 +31,7 @@ GCC gdbinit file introduces several debugging shorthands:
 pdd [dw_die_ref],
 pbm [bitmap],
 pel [location_t],
+phrs [HARD_REG_SET]
 pp, pbs, pcfun
 
 They are generally implemented by calling a function that prints to stderr,
@@ -145,6 +146,17 @@ Print given GENERIC expression in C syntax.
 See also 'help-gcc-hooks'.
 end
 
+define phrs
+eval "set $debug_arg = $%s", $argc ? "arg0" : ""
+call debug_hard_reg_set ($debug_arg)
+end
+
+document phrs
+GCC hook: debug_hard_reg_set (HARD_REG_SET)
+Print given HARD_REG_SET.
+See also 'help-gcc-hooks'.
+end
+
 define pmz
 eval "set $debug_arg = $%s", $argc ? "arg0" : ""
 call mpz_out_str(stderr, 10, $debug_arg)
diff --git a/gcc/ira-color.cc b/gcc/ira-color.cc
index fe6dfc6e7692..1fb2958bddd0 100644
--- a/gcc/ira-color.cc
+++ b/gcc/ira-color.cc
@@ -512,6 +512,13 @@ print_hard_reg_set (FILE *f, HARD_REG_SET set, bool 
new_line_p)
 fprintf (f, "\n");
 }
 
+/* Dump a hard reg set SET to stderr.  */
+DEBUG_FUNCTION void
+debug_hard_reg_set (HARD_REG_SET set)
+{
+  print_hard_reg_set (stderr, set, true);
+}
+
 /* Print allocno hard register subforest given by ROOTS and its LEVEL
to F.  */
 static void
diff --git a/gcc/sel-sched-dump.cc b/gcc/sel-sched-dump.cc
index b4eef8803df9..05de98409375 100644
--- a/gcc/sel-sched-dump.cc
+++ b/gcc/sel-sched-dump.cc
@@ -986,16 +986,6 @@ debug_blist (blist_t bnds)
   restore_dump ();
 }
 
-/* Dump a hard reg set SET to stderr.  */
-DEBUG_FUNCTION void
-debug_hard_reg_set (HARD_REG_SET set)
-{
-  switch_dump (stderr);
-  dump_hard_reg_set ("", set);
-  sel_print ("\n");
-  restore_dump ();
-}
-
 /* Debug a cfg region with default flags.  */
 void
 sel_debug_cfg (void)
-- 
2.30.2



Re: [PATCH] LoongArch: Fix multiarch tuple canonization

2023-02-13 Thread Lulu Cheng

add yangyujie.

在 2023/2/13 下午6:38, Xi Ruoyao 写道:

Multiarch tuple will be coded in file or directory names in
multiarch-aware distros, so one ABI should have only one multiarch
tuple.  For example, "--target=loongarch64-linux-gnu --with-abi=lp64s"
and "--target=loongarch64-linux-gnusf" should both set multiarch tuple
to "loongarch64-linux-gnusf".  Before this commit,
"--target=loongarch64-linux-gnu --with-abi=lp64s --disable-multilib"
will produce wrong result (loongarch64-linux-gnu).

A recent LoongArch psABI revision mandates "loongarch64-linux-gnu" to be
used for -mabi=lp64d (instead of "loongarch64-linux-gnuf64") for some
non-technical reason [1].  Note that we cannot make
"loongarch64-linux-gnuf64" an alias for "loongarch64-linux-gnu" because
to implement such an alias, we must create thousands of symlinks in the
distro and doing so would be completely unpractical.  This commit also
aligns GCC with the revision.

Tested by building cross compilers with --enable-multiarch and multiple
combinations of --target=loongarch64-linux-gnu*, --with-abi=lp64{s,f,d},
and --{enable,disable}-multilib; and run "xgcc --print-multiarch" then
manually verify the result with eyesight.

Ok for trunk and backport to releases/gcc-12?

[1]: https://github.com/loongson/LoongArch-Documentation/pull/80

gcc/ChangeLog:

* config.gcc (triplet_abi): Set its value based on $with_abi,
instead of $target.
(la_canonical_triplet): Set it after $triplet_abi is set
correctly.
* config/loongarch/t-linux (MULTILIB_OSDIRNAMES): Make the
multiarch tuple for lp64d "loongarch64-linux-gnu" (without
"f64" suffix).
---
  gcc/config.gcc   | 14 +++---
  gcc/config/loongarch/t-linux |  2 +-
  2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 067720ac795..c070e6ecd2e 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4889,20 +4889,16 @@ case "${target}" in
case ${target} in
loongarch64-*-*-*f64)
abi_pattern="lp64d"
-   triplet_abi="f64"
;;
loongarch64-*-*-*f32)
abi_pattern="lp64f"
-   triplet_abi="f32"
;;
loongarch64-*-*-*sf)
abi_pattern="lp64s"
-   triplet_abi="sf"
;;
loongarch64-*-*-*)
abi_pattern="lp64[dfs]"
abi_default="lp64d"
-   triplet_abi=""
;;
*)
echo "Unsupported target ${target}." 1>&2
@@ -4923,9 +4919,6 @@ case "${target}" in
  ;;
esac
  
-		la_canonical_triplet="loongarch64-${triplet_os}${triplet_abi}"

-
-
# Perform initial sanity checks on --with-* options.
case ${with_arch} in
"" | loongarch64 | la464) ;; # OK, append here.
@@ -4996,6 +4989,13 @@ case "${target}" in
;;
esac
  
+		case ${with_abi} in

+ "lp64d") triplet_abi="";;
+ "lp64f") triplet_abi="f32";;
+ "lp64s") triplet_abi="sf";;
+   esac
+   la_canonical_triplet="loongarch64-${triplet_os}${triplet_abi}"
+
# Set default value for with_abiext (internal)
case ${with_abiext} in
"")
diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux
index 131c45fdced..e40da179203 100644
--- a/gcc/config/loongarch/t-linux
+++ b/gcc/config/loongarch/t-linux
@@ -40,7 +40,7 @@ ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),)
  
  MULTILIB_OSDIRNAMES = \

mabi.lp64d=../lib64$\
-  $(call if_multiarch,:loongarch64-linux-gnuf64)
+  $(call if_multiarch,:loongarch64-linux-gnu)
  
  MULTILIB_OSDIRNAMES += \

mabi.lp64f=../lib64/f32$\