[PATCH] RISC-V: Support RVV FP16 ZVFHMIN intrinsic API

2023-06-04 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch support the 2 intrinsic API of FP16 ZVFHMIN extension. Aka
SEW=16 for below instructions

vfwcvt.f.f.v
vfncvt.f.f.w

Then users can leverage the instrinsic APIs to perform the conversion
between RVV vector single float point and half float point.

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-types.def
(vfloat32mf2_t): Add vfloat32mf2_t type to vfncvt.f.f.w operations.
(vfloat32m1_t): Likewise.
(vfloat32m2_t): Likewise.
(vfloat32m4_t): Likewise.
(vfloat32m8_t): Likewise.
* config/riscv/riscv-vector-builtins.def: Fix typo in comments.
* config/riscv/vector-iterators.md: Add single to half machine
mode conversion.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c: New test.
---
 .../riscv/riscv-vector-builtins-types.def |  6 +++
 gcc/config/riscv/riscv-vector-builtins.def|  2 +-
 gcc/config/riscv/vector-iterators.md  | 10 
 .../riscv/rvv/base/zvfhmin-intrinsic.c| 53 +++
 4 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def 
b/gcc/config/riscv/riscv-vector-builtins-types.def
index 65716b8c637..9cb3aca992e 100644
--- a/gcc/config/riscv/riscv-vector-builtins-types.def
+++ b/gcc/config/riscv/riscv-vector-builtins-types.def
@@ -553,6 +553,12 @@ DEF_RVV_WCONVERT_U_OPS (vuint64m2_t, 
RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_6
 DEF_RVV_WCONVERT_U_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_ELEN_64)
 DEF_RVV_WCONVERT_U_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_ELEN_64)
 
+DEF_RVV_WCONVERT_F_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WCONVERT_F_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
+DEF_RVV_WCONVERT_F_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
+DEF_RVV_WCONVERT_F_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32)
+DEF_RVV_WCONVERT_F_OPS (vfloat32m8_t, RVV_REQUIRE_ELEN_FP_32)
+
 DEF_RVV_WCONVERT_F_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_WCONVERT_F_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_WCONVERT_F_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
diff --git a/gcc/config/riscv/riscv-vector-builtins.def 
b/gcc/config/riscv/riscv-vector-builtins.def
index 149835f36ac..310edeaf5a9 100644
--- a/gcc/config/riscv/riscv-vector-builtins.def
+++ b/gcc/config/riscv/riscv-vector-builtins.def
@@ -490,7 +490,7 @@ DEF_RVV_TYPE (vint64m8_t, 15, __rvv_int64m8_t, int64, 
VNx16DI, VNx8DI, VOID, _i6
 DEF_RVV_TYPE (vuint64m8_t, 16, __rvv_uint64m8_t, uint64, VNx16DI, VNx8DI, 
VOID, _u64m8,
  _u64, _e64m8)
 
-/* Enabled if TARGET_VECTOR_ELEN_FP_16 && 9TARGET_ZVFH or TARGET_ZVFHMIN).  */
+/* Enabled if TARGET_VECTOR_ELEN_FP_16 && (TARGET_ZVFH or TARGET_ZVFHMIN).  */
 /* LMUL = 1/4.  */
 DEF_RVV_TYPE (vfloat16mf4_t, 18, __rvv_float16mf4_t, float16, VNx2HF, VNx1HF, 
VOID,
  _f16mf4, _f16, _e16mf4)
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index 5fbaef89566..90743ed76c5 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -481,6 +481,13 @@ (define_mode_iterator VWEXTI [
 ])
 
 (define_mode_iterator VWEXTF [
+  (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
+  (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
+  (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
+  (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
+  (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+
   (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
   (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
   (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
@@ -1145,6 +1152,8 @@ (define_mode_attr V_DOUBLE_TRUNC [
   (VNx16SI "VNx16HI") (VNx32SI "VNx32HI")
   (VNx1DI "VNx1SI") (VNx2DI "VNx2SI") (VNx4DI "VNx4SI") (VNx8DI "VNx8SI")
   (VNx16DI "VNx16SI")
+
+  (VNx1SF "VNx1HF") (VNx2SF "VNx2HF") (VNx4SF "VNx4HF") (VNx8SF "VNx8HF") 
(VNx16SF "VNx16HF") (VNx32SF "VNx32HF")
   (VNx1DF "VNx1SF") (VNx2DF "VNx2SF") (VNx4DF "VNx4SF") (VNx8DF "VNx8SF")
   (VNx16DF "VNx16SF")
 ])
@@ -1169,6 +1178,7 @@ (define_mode_attr v_double_trunc [
   (VNx16SI "vnx16hi") (VNx32SI "vnx32hi")
   (VNx1DI "vnx1si") (VNx2DI "vnx2si") (VNx4DI "vnx4si") (VNx8DI "vnx8si")
   (VNx16DI "vnx16si")
+  (VNx1SF "vnx1hf") (VNx2SF "vnx2hf") (VNx4SF "vnx4hf") (VNx8SF "vnx8hf") 
(VNx16SF "vnx16hf") (VNx32SF "vnx32hf")
   (VNx1DF "vnx1sf") (VNx2DF "vnx2sf") (VNx4DF "vnx4sf") (VNx8DF "vnx8sf")
   (VNx16DF "vnx16sf")
 ])
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c
new file mode 100644
index 000..0923b6bc4d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfhmin -m

Re: [PATCH] RISC-V: Support RVV FP16 ZVFHMIN intrinsic API

2023-06-04 Thread 钟居哲
LGTM.



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-06-04 15:19
To: gcc-patches
CC: juzhe.zhong; kito.cheng; pan2.li; yanzhang.wang
Subject: [PATCH] RISC-V: Support RVV FP16 ZVFHMIN intrinsic API
From: Pan Li 
 
This patch support the 2 intrinsic API of FP16 ZVFHMIN extension. Aka
SEW=16 for below instructions
 
vfwcvt.f.f.v
vfncvt.f.f.w
 
Then users can leverage the instrinsic APIs to perform the conversion
between RVV vector single float point and half float point.
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/riscv-vector-builtins-types.def
(vfloat32mf2_t): Add vfloat32mf2_t type to vfncvt.f.f.w operations.
(vfloat32m1_t): Likewise.
(vfloat32m2_t): Likewise.
(vfloat32m4_t): Likewise.
(vfloat32m8_t): Likewise.
* config/riscv/riscv-vector-builtins.def: Fix typo in comments.
* config/riscv/vector-iterators.md: Add single to half machine
mode conversion.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c: New test.
---
.../riscv/riscv-vector-builtins-types.def |  6 +++
gcc/config/riscv/riscv-vector-builtins.def|  2 +-
gcc/config/riscv/vector-iterators.md  | 10 
.../riscv/rvv/base/zvfhmin-intrinsic.c| 53 +++
4 files changed, 70 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c
 
diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def 
b/gcc/config/riscv/riscv-vector-builtins-types.def
index 65716b8c637..9cb3aca992e 100644
--- a/gcc/config/riscv/riscv-vector-builtins-types.def
+++ b/gcc/config/riscv/riscv-vector-builtins-types.def
@@ -553,6 +553,12 @@ DEF_RVV_WCONVERT_U_OPS (vuint64m2_t, 
RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_6
DEF_RVV_WCONVERT_U_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_ELEN_64)
DEF_RVV_WCONVERT_U_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_ELEN_64)
+DEF_RVV_WCONVERT_F_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WCONVERT_F_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
+DEF_RVV_WCONVERT_F_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
+DEF_RVV_WCONVERT_F_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32)
+DEF_RVV_WCONVERT_F_OPS (vfloat32m8_t, RVV_REQUIRE_ELEN_FP_32)
+
DEF_RVV_WCONVERT_F_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64)
DEF_RVV_WCONVERT_F_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
DEF_RVV_WCONVERT_F_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
diff --git a/gcc/config/riscv/riscv-vector-builtins.def 
b/gcc/config/riscv/riscv-vector-builtins.def
index 149835f36ac..310edeaf5a9 100644
--- a/gcc/config/riscv/riscv-vector-builtins.def
+++ b/gcc/config/riscv/riscv-vector-builtins.def
@@ -490,7 +490,7 @@ DEF_RVV_TYPE (vint64m8_t, 15, __rvv_int64m8_t, int64, 
VNx16DI, VNx8DI, VOID, _i6
DEF_RVV_TYPE (vuint64m8_t, 16, __rvv_uint64m8_t, uint64, VNx16DI, VNx8DI, VOID, 
_u64m8,
  _u64, _e64m8)
-/* Enabled if TARGET_VECTOR_ELEN_FP_16 && 9TARGET_ZVFH or TARGET_ZVFHMIN).  */
+/* Enabled if TARGET_VECTOR_ELEN_FP_16 && (TARGET_ZVFH or TARGET_ZVFHMIN).  */
/* LMUL = 1/4.  */
DEF_RVV_TYPE (vfloat16mf4_t, 18, __rvv_float16mf4_t, float16, VNx2HF, VNx1HF, 
VOID,
  _f16mf4, _f16, _e16mf4)
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index 5fbaef89566..90743ed76c5 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -481,6 +481,13 @@ (define_mode_iterator VWEXTI [
])
(define_mode_iterator VWEXTF [
+  (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
+  (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
+  (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
+  (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
+  (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+
   (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
   (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
   (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
@@ -1145,6 +1152,8 @@ (define_mode_attr V_DOUBLE_TRUNC [
   (VNx16SI "VNx16HI") (VNx32SI "VNx32HI")
   (VNx1DI "VNx1SI") (VNx2DI "VNx2SI") (VNx4DI "VNx4SI") (VNx8DI "VNx8SI")
   (VNx16DI "VNx16SI")
+
+  (VNx1SF "VNx1HF") (VNx2SF "VNx2HF") (VNx4SF "VNx4HF") (VNx8SF "VNx8HF") 
(VNx16SF "VNx16HF") (VNx32SF "VNx32HF")
   (VNx1DF "VNx1SF") (VNx2DF "VNx2SF") (VNx4DF "VNx4SF") (VNx8DF "VNx8SF")
   (VNx16DF "VNx16SF")
])
@@ -1169,6 +1178,7 @@ (define_mode_attr v_double_trunc [
   (VNx16SI "vnx16hi") (VNx32SI "vnx32hi")
   (VNx1DI "vnx1si") (VNx2DI "vnx2si") (VNx4DI "vnx4si") (VNx8DI "vnx8si")
   (VNx16DI "vnx16si")
+  (VNx1SF "vnx1hf") (VNx2SF "vnx2hf") (VNx4SF "vnx4hf") (VNx8SF "vnx8hf") 
(VNx16SF "vnx16hf") (VNx32SF "vnx32hf")
   (VNx1DF "vnx1sf") (VNx2DF "vnx2sf") (VNx4DF "vnx4sf") (VNx8DF "vnx8sf")
   (VNx16DF "vnx16sf")
])
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c
new file mode 100644
index 000..0923b6bc4d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrins

Re: [x86_64 PATCH] PR target/110083: Fix-up REG_EQUAL notes on COMPARE in STV.

2023-06-04 Thread Uros Bizjak via Gcc-patches
On Sat, Jun 3, 2023 at 7:31 PM Roger Sayle  wrote:
>
>
> This patch fixes PR target/110083, an ICE-on-valid regression exposed by
> my recent PTEST improvements (to address PR target/109973).  The latent
> bug (admittedly mine) is that the scalar-to-vector (STV) pass doesn't update
> or delete REG_EQUAL notes attached to COMPARE instructions.  As a result
> the operands of COMPARE would be mismatched, with the register transformed
> to V1TImode, but the immediate operand left as const_wide_int, which is
> valid for TImode but not V1TImode.  This remained latent when the STV
> conversion converted the mode of the COMPARE to CCmode, with later passes
> recognizing the REG_EQUAL note is obviously invalid as the modes didn't
> match, but now that we (correctly) preserve the CCZmode on COMPARE, the
> mismatched operand modes trigger a sanity checking ICE downstream.
>
> Fixed by updating (or deleting) any REG_EQUAL notes in convert_compare.
>
> Before:
> (expr_list:REG_EQUAL (compare:CCZ (reg:V1TI 119 [ ivin.29_38 ])
> (const_wide_int 0x8000))
>
> After:
> (expr_list:REG_EQUAL (compare:CCZ (reg:V1TI 119 [ ivin.29_38 ])
> (const_vector:V1TI [
> (const_wide_int 0x8000)
>  ]))
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline?
>
>
> 2023-06-03  Roger Sayle  
>
> gcc/ChangeLog
> PR target/110083
> * config/i386/i386-features.cc (scalar_chain::convert_compare):
> Update or delete REG_EQUAL notes, converting CONST_INT and
> CONST_WIDE_INT immediate operands to a suitable CONST_VECTOR.
>
> gcc/testsuite/ChangeLog
> PR target/110083
> * gcc.target/i386/pr110083.c: New test case.

OK.

Thanks,
Uros.

>
>
> Roger
> --
>


[PATCH] RISC-V: Remove redundant vlmul_ext_* patterns to fix PR110109

2023-06-04 Thread juzhe . zhong
From: Juzhe-Zhong 

PR target/110109

This patch is to fix PR110109 issue. This issue happens is because:

(define_insn_and_split "*vlmul_extx2"
  [(set (match_operand: 0 "register_operand"  "=vr, ?&vr")
   (subreg:
 (match_operand:VLMULEXT2 1 "register_operand" " 0,   vr") 0))]
  "TARGET_VECTOR"
  "#"
  "&& reload_completed"
  [(const_int 0)]
{
  emit_insn (gen_rtx_SET (gen_lowpart (mode, operands[0]), operands[1]));
  DONE;
})

Such pattern generate such codes in insn-recog.cc:
static int
pattern57 (rtx x1)
{
  rtx * const operands ATTRIBUTE_UNUSED = &recog_data.operand[0];
  rtx x2;
  int res ATTRIBUTE_UNUSED;
  if (maybe_ne (SUBREG_BYTE (x1).to_constant (), 0))
return -1;
...

PR110109 ICE at maybe_ne (SUBREG_BYTE (x1).to_constant (), 0) since for scalable
RVV modes can not be accessed as SUBREG_BYTE (x1).to_constant ()

I create that patterns is to optimize the following test:
vfloat32m2_t test_vlmul_ext_v_f32mf2_f32m2(vfloat32mf2_t op1) {
  return __riscv_vlmul_ext_v_f32mf2_f32m2(op1);
}

codegen:
test_vlmul_ext_v_f32mf2_f32m2:
vsetvli a5,zero,e32,m2,ta,ma
vmv.v.i v2,0
vsetvli a5,zero,e32,mf2,ta,ma
vle32.v v2,0(a1)
vs2r.v  v2,0(a0)
ret

There is a redundant 'vmv.v.i' here, Since GCC doesn't undefine IR (unlike 
LLVM, LLVM has undef/poison).
For vlmul_ext_* RVV intrinsic, GCC will initiate all zeros into register. 
However, I think it's not
a big issue after we support subreg livness tracking.

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc: Change expand approach.
* config/riscv/vector.md (@vlmul_extx2): Remove it.
(@vlmul_extx4): Ditto.
(@vlmul_extx8): Ditto.
(@vlmul_extx16): Ditto.
(@vlmul_extx32): Ditto.
(@vlmul_extx64): Ditto.
(*vlmul_extx2): Ditto.
(*vlmul_extx4): Ditto.
(*vlmul_extx8): Ditto.
(*vlmul_extx16): Ditto.
(*vlmul_extx32): Ditto.
(*vlmul_extx64): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr110109-1.c: New test.
* gcc.target/riscv/rvv/base/pr110109-2.c: New test.

---
 .../riscv/riscv-vector-builtins-bases.cc  |  28 +-
 gcc/config/riscv/vector.md| 120 -
 .../gcc.target/riscv/rvv/base/pr110109-1.c|  40 ++
 .../gcc.target/riscv/rvv/base/pr110109-2.c| 485 ++
 4 files changed, 529 insertions(+), 144 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110109-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110109-2.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 09870c327fa..87a684dd127 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1565,30 +1565,10 @@ public:
 
   rtx expand (function_expander &e) const override
   {
-e.add_input_operand (0);
-switch (e.op_info->ret.base_type)
-  {
-  case RVV_BASE_vlmul_ext_x2:
-   return e.generate_insn (
- code_for_vlmul_extx2 (e.vector_mode ()));
-  case RVV_BASE_vlmul_ext_x4:
-   return e.generate_insn (
- code_for_vlmul_extx4 (e.vector_mode ()));
-  case RVV_BASE_vlmul_ext_x8:
-   return e.generate_insn (
- code_for_vlmul_extx8 (e.vector_mode ()));
-  case RVV_BASE_vlmul_ext_x16:
-   return e.generate_insn (
- code_for_vlmul_extx16 (e.vector_mode ()));
-  case RVV_BASE_vlmul_ext_x32:
-   return e.generate_insn (
- code_for_vlmul_extx32 (e.vector_mode ()));
-  case RVV_BASE_vlmul_ext_x64:
-   return e.generate_insn (
- code_for_vlmul_extx64 (e.vector_mode ()));
-  default:
-   gcc_unreachable ();
-  }
+tree arg = CALL_EXPR_ARG (e.exp, 0);
+rtx src = expand_normal (arg);
+emit_insn (gen_rtx_SET (gen_lowpart (e.vector_mode (), e.target), src));
+return e.target;
   }
 };
 
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 79f1644732a..2496eff7874 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -498,126 +498,6 @@
   }
 )
 
-(define_expand "@vlmul_extx2"
-  [(set (match_operand: 0 "register_operand")
-   (subreg:
- (match_operand:VLMULEXT2 1 "register_operand") 0))]
-  "TARGET_VECTOR"
-{})
-
-(define_expand "@vlmul_extx4"
-  [(set (match_operand: 0 "register_operand")
-   (subreg:
- (match_operand:VLMULEXT4 1 "register_operand") 0))]
-  "TARGET_VECTOR"
-{})
-
-(define_expand "@vlmul_extx8"
-  [(set (match_operand: 0 "register_operand")
-   (subreg:
- (match_operand:VLMULEXT8 1 "register_operand") 0))]
-  "TARGET_VECTOR"
-{})
-
-(define_expand "@vlmul_extx16"
-  [(set (match_operand: 0 "register_operand")
-   (subreg:
- (match_operand:VLMULEXT16 1 "register_operand") 0))]
-  "TARGET_VECTOR"
-{})
-
-(define_expand "@vlmul_extx32"
-  [(set (match_operand: 0 "register_ope

[NFC] RISC-V: Reorganize riscv-v.cc

2023-06-04 Thread juzhe . zhong
From: Juzhe-Zhong 

This patch is just reorganizing the functions for the following patch.

I put rvv_builder and emit_* functions located before expand_const_vector
function since I will use them in expand_const_vector in the following patch.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (class rvv_builder): Reorganize functions.
(rvv_builder::can_duplicate_repeating_sequence_p): Ditto.
(rvv_builder::repeating_sequence_use_merge_profitable_p): Ditto.
(rvv_builder::get_merged_repeating_sequence): Ditto.
(rvv_builder::get_merge_scalar_mask): Ditto.
(emit_scalar_move_insn): Ditto.
(emit_vlmax_integer_move_insn): Ditto.
(emit_nonvlmax_integer_move_insn): Ditto.
(emit_vlmax_gather_insn): Ditto.
(emit_vlmax_masked_gather_mu_insn): Ditto.
(get_repeating_sequence_dup_machine_mode): Ditto.

---
 gcc/config/riscv/riscv-v.cc | 497 ++--
 1 file changed, 249 insertions(+), 248 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 75cf00b7eba..fa13bd94f9d 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -239,6 +239,165 @@ private:
   expand_operand m_ops[MAX_OPERANDS];
 };
 
+
+class rvv_builder : public rtx_vector_builder
+{
+public:
+  rvv_builder () : rtx_vector_builder () {}
+  rvv_builder (machine_mode mode, unsigned int npatterns,
+  unsigned int nelts_per_pattern)
+: rtx_vector_builder (mode, npatterns, nelts_per_pattern)
+  {
+m_inner_mode = GET_MODE_INNER (mode);
+m_inner_bits_size = GET_MODE_BITSIZE (m_inner_mode);
+m_inner_bytes_size = GET_MODE_SIZE (m_inner_mode);
+
+gcc_assert (
+  int_mode_for_size (inner_bits_size (), 0).exists (&m_inner_int_mode));
+  }
+
+  bool can_duplicate_repeating_sequence_p ();
+  rtx get_merged_repeating_sequence ();
+
+  bool repeating_sequence_use_merge_profitable_p ();
+  rtx get_merge_scalar_mask (unsigned int) const;
+
+  machine_mode new_mode () const { return m_new_mode; }
+  scalar_mode inner_mode () const { return m_inner_mode; }
+  scalar_int_mode inner_int_mode () const { return m_inner_int_mode; }
+  unsigned int inner_bits_size () const { return m_inner_bits_size; }
+  unsigned int inner_bytes_size () const { return m_inner_bytes_size; }
+
+private:
+  scalar_mode m_inner_mode;
+  scalar_int_mode m_inner_int_mode;
+  machine_mode m_new_mode;
+  scalar_int_mode m_new_inner_mode;
+  unsigned int m_inner_bits_size;
+  unsigned int m_inner_bytes_size;
+};
+
+/* Return true if the vector duplicated by a super element which is the fusion
+   of consecutive elements.
+
+ v = { a, b, a, b } super element = ab, v = { ab, ab }  */
+bool
+rvv_builder::can_duplicate_repeating_sequence_p ()
+{
+  poly_uint64 new_size = exact_div (full_nelts (), npatterns ());
+  unsigned int new_inner_size = m_inner_bits_size * npatterns ();
+  if (!int_mode_for_size (new_inner_size, 0).exists (&m_new_inner_mode)
+  || GET_MODE_SIZE (m_new_inner_mode) > UNITS_PER_WORD
+  || !get_vector_mode (m_new_inner_mode, new_size).exists (&m_new_mode))
+return false;
+  return repeating_sequence_p (0, full_nelts ().to_constant (), npatterns ());
+}
+
+/* Return true if it is a repeating sequence that using
+   merge approach has better codegen than using default
+   approach (slide1down).
+
+   Sequence A:
+ {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b}
+
+   nelts = 16
+   npatterns = 2
+
+   for merging a we need mask 101010
+   for merging b we need mask 010101
+
+   Foreach element in the npattern, we need to build a mask in scalar register.
+   Mostely we need 3 instructions (aka COST = 3), which is consist of 2 scalar
+   instruction and 1 scalar move to v0 register.  Finally we need vector merge
+   to merge them.
+
+   lui a5, #imm
+   add a5, #imm
+   vmov.s.xv0, a5
+   vmerge.vxm  v9, v9, a1, v0
+
+   So the overall (roughly) COST of Sequence A = (3 + 1) * npatterns = 8.
+   If we use slide1down, the COST = nelts = 16 > 8 (COST of merge).
+   So return true in this case as it is profitable.
+
+   Sequence B:
+ {a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h}
+
+   nelts = 16
+   npatterns = 8
+
+   COST of merge approach = (3 + 1) * npatterns = 24
+   COST of slide1down approach = nelts = 16
+   Return false in this case as it is NOT profitable in merge approach.
+*/
+bool
+rvv_builder::repeating_sequence_use_merge_profitable_p ()
+{
+  if (inner_bytes_size () > UNITS_PER_WORD)
+return false;
+
+  unsigned int nelts = full_nelts ().to_constant ();
+
+  if (!repeating_sequence_p (0, nelts, npatterns ()))
+return false;
+
+  unsigned int merge_cost = 1;
+  unsigned int build_merge_mask_cost = 3;
+  unsigned int slide1down_cost = nelts;
+
+  return (build_merge_mask_cost + merge_cost) * npatterns () < slide1down_cost;
+}
+
+/* Merge the repeating sequence into a single element and return the RTX.  */
+rtx
+rvv_builder::get_

[PATCH] RISC-V: Split arguments of expand_vec_perm

2023-06-04 Thread juzhe . zhong
From: Juzhe-Zhong 

Since the following patch will calls expand_vec_perm with
splitted arguments, change the expand_vec_perm interface in
this patch.

gcc/ChangeLog:

* config/riscv/autovec.md: Split arguments.
* config/riscv/riscv-protos.h (expand_vec_perm): Ditto.
* config/riscv/riscv-v.cc (expand_vec_perm): Ditto.

---
 gcc/config/riscv/autovec.md | 3 ++-
 gcc/config/riscv/riscv-protos.h | 2 +-
 gcc/config/riscv/riscv-v.cc | 6 +-
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 5c3aad7ee44..ec038fe87cd 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -96,7 +96,8 @@
(match_operand: 3 "vector_perm_operand")]
   "TARGET_VECTOR && GET_MODE_NUNITS (mode).is_constant ()"
   {
-riscv_vector::expand_vec_perm (operands);
+riscv_vector::expand_vec_perm (operands[0], operands[1],
+  operands[2], operands[3]);
 DONE;
   }
 )
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index d032f569a36..00e1b20c6c6 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -241,7 +241,7 @@ opt_machine_mode get_mask_mode (machine_mode);
 void expand_vec_series (rtx, rtx, rtx);
 void expand_vec_init (rtx, rtx);
 void expand_vcond (rtx *);
-void expand_vec_perm (rtx *);
+void expand_vec_perm (rtx, rtx, rtx, rtx);
 /* Rounding mode bitfield for fixed point VXRM.  */
 enum vxrm_field_enum
 {
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index fa13bd94f9d..49752cd8899 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2025,12 +2025,8 @@ expand_vcond (rtx *ops)
 /* Implement vec_perm.  */
 
 void
-expand_vec_perm (rtx *operands)
+expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
 {
-  rtx target = operands[0];
-  rtx op0 = operands[1];
-  rtx op1 = operands[2];
-  rtx sel = operands[3];
   machine_mode data_mode = GET_MODE (target);
   machine_mode sel_mode = GET_MODE (sel);
 
-- 
2.36.3



[NFC] RISC-V: Move optimization patterns into autovec-opt.md

2023-06-04 Thread juzhe . zhong
From: Juzhe-Zhong 

Move all optimization patterns into autovec-opt.md to make organization
easier maintain.

gcc/ChangeLog:

* config/riscv/autovec-opt.md (*not): Move to 
autovec-opt.md.
(*n): Ditto.
* config/riscv/autovec.md (*not): Ditto.
(*n): Ditto.
* config/riscv/vector.md: Ditto.

---
 gcc/config/riscv/autovec-opt.md | 92 +
 gcc/config/riscv/autovec.md | 52 ---
 gcc/config/riscv/vector.md  | 39 --
 3 files changed, 92 insertions(+), 91 deletions(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 92cdc4e9a16..f6052b50572 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -78,3 +78,95 @@
   "vwmulsu.vv\t%0,%3,%4%p1"
   [(set_attr "type" "viwmul")
(set_attr "mode" "")])
+
+;; 
-
+;;  Integer Compare Instructions Simplification
+;; 
-
+;; Simplify OP(V, V) Instructions to VMCLR.m Includes:
+;; - 1.  VMSNE
+;; - 2.  VMSLT
+;; - 3.  VMSLTU
+;; - 4.  VMSGT
+;; - 5.  VMSGTU
+;; 
-
+;; Simplify OP(V, V) Instructions to VMSET.m Includes:
+;; - 1.  VMSEQ
+;; - 2.  VMSLE
+;; - 3.  VMSLEU
+;; - 4.  VMSGE
+;; - 5.  VMSGEU
+;; 
-
+
+(define_split
+  [(set (match_operand:VB  0 "register_operand")
+   (if_then_else:VB
+ (unspec:VB
+   [(match_operand:VB 1 "vector_all_trues_mask_operand")
+(match_operand4 "vector_length_operand")
+(match_operand5 "const_int_operand")
+(match_operand6 "const_int_operand")
+(reg:SI VL_REGNUM)
+(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operand:VB3 "vector_move_operand")
+ (match_operand:VB2 "vector_undef_operand")))]
+  "TARGET_VECTOR"
+  [(const_int 0)]
+  {
+emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mode),
+RVV_VUNDEF (mode), operands[3],
+operands[4], operands[5]));
+DONE;
+  }
+)
+
+;; -
+;;  [BOOL] Binary logical operations (inverted second input)
+;; -
+;; Includes:
+;; - vmandnot.mm
+;; - vmornot.mm
+;; -
+
+(define_insn_and_split "*not"
+  [(set (match_operand:VB 0 "register_operand"   "=vr")
+   (bitmanip_bitwise:VB
+ (not:VB (match_operand:VB 2 "register_operand" " vr"))
+ (match_operand:VB 1 "register_operand" " vr")))]
+  "TARGET_VECTOR"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+  {
+insn_code icode = code_for_pred_not (, mode);
+riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
+DONE;
+  }
+  [(set_attr "type" "vmalu")
+   (set_attr "mode" "")])
+
+;; -
+;;  [BOOL] Binary logical operations (inverted result)
+;; -
+;; Includes:
+;; - vmnand.mm
+;; - vmnor.mm
+;; - vmxnor.mm
+;; -
+
+(define_insn_and_split "*n"
+  [(set (match_operand:VB 0 "register_operand" "=vr")
+   (not:VB
+ (any_bitwise:VB
+   (match_operand:VB 1 "register_operand" " vr")
+   (match_operand:VB 2 "register_operand" " vr"]
+  "TARGET_VECTOR"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+  {
+insn_code icode = code_for_pred_n (, mode);
+riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
+DONE;
+  }
+  [(set_attr "type" "vmalu")
+   (set_attr "mode" "")])
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index ec038fe87cd..9f4492db23c 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -229,58 +229,6 @@
   [(set_attr "type" "vmalu")
(set_attr "mode" "")])
 
-;; -
-;;  [BOOL] Binary logical operations (inverted second input)
-;; -
-;; Includes:
-;; - vmandnot.mm
-;; - vmornot.mm
-;; -
-
-(define_insn_and_split "*not"
-  [(set (match_operand:VB 0 "register_operand"   "=vr")
-   (bitmanip_bitwise:VB
- (not:VB (match_operand:VB 2 "register_operand" " vr"))
- (match_operand:VB 1 "register_operand" " vr")))]
-  "TARGET_VECTOR

Re: [PATCH] xtensa: Optimize boolean evaluation or branching when EQ/NE to zero in S[IF]mode

2023-06-04 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Sat, Jun 3, 2023 at 2:55 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch optimizes the boolean evaluation of EQ/NE against zero
> by adding two insn_and_split patterns similar to SImode conditional
> store:
>
> "eq_zero":
> op0 = (op1 == 0) ? 1 : 0;
> op0 = clz(op1) >> 5;  /* optimized (requires TARGET_NSA) */
>
> "movsicc_ne0_reg_0":
> op0 = (op1 != 0) ? op2 : 0;
> op0 = op2; if (op1 == 0) ? op0 = op1;  /* optimized */
>
> These also work in SFmode by ignoring their sign bits, and further-
> more, the branch if EQ/NE against zero in SFmode is also done in the
> same manner.
>
> The reasons for this optimization in SFmode are:
>
>   - Only zero values (negative or non-negative) contain no bits of 1
> with both the exponent and the mantissa.
>   - EQ/NE comparisons involving NaNs produce no signal even if they
> are signaling.
>   - Even if the use of IEEE 754 single-precision floating-point co-
> processor is configured (TARGET_HARD_FLOAT is true):
> 1. Load zero value to FP register
> 2. Possibly, additional FP move if the comparison target is
>an address register
> 3. FP equality check instruction
> 4. Read the boolean register containing the result, or condi-
>tional branch
> As noted above, a considerable number of instructions are still
> generated.
>
> gcc/ChangeLog:
>
> * config/xtensa/predicates.md (const_float_0_operand):
> Rename from obsolete "const_float_1_operand" and change the
> constant to compare.
> (cstoresf_cbranchsf_operand, cstoresf_cbranchsf_operator):
> New.
> * config/xtensa/xtensa.cc (xtensa_expand_conditional_branch):
> Add code for EQ/NE comparison with constant zero in SFmode.
> (xtensa_expand_scc): Added code to derive boolean evaluation
> of EQ/NE with constant zero for comparison in SFmode.
> (xtensa_rtx_costs): Change cost of CONST_DOUBLE with value
> zero inside "cbranchsf4" to 0.
> * config/xtensa/xtensa.md (cbranchsf4, cstoresf4):
> Change "match_operator" and the third "match_operand" to the
> ones mentioned above.
> (movsicc_ne0_reg_zero, eq_zero): New.
> ---
>  gcc/config/xtensa/predicates.md | 19 ++--
>  gcc/config/xtensa/xtensa.cc | 43 ++
>  gcc/config/xtensa/xtensa.md | 53 +
>  3 files changed, 106 insertions(+), 9 deletions(-)

This change results in a bunch of new testsuite failures
on configurations without FPU that are all ICEs:

+FAIL: gcc.c-torture/execute/bitfld-3.c   -O1  execution test
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O1  (internal compiler
error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O1  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O2  (internal compiler
error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O2  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal
compiler error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O3 -g  (internal compiler
error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O3 -g  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -Os  (internal compiler
error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -Os  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  (internal compiler error:
in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  (internal compiler error: in
extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -O1  (internal compiler
error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -O1  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -O2  (internal compiler
error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -O2  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal
compiler error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functi

Re: [NFC] RISC-V: Move optimization patterns into autovec-opt.md

2023-06-04 Thread Kito Cheng via Gcc-patches
Lgtm

於 2023年6月4日 週日,17:37寫道:

> From: Juzhe-Zhong 
>
> Move all optimization patterns into autovec-opt.md to make organization
> easier maintain.
>
> gcc/ChangeLog:
>
> * config/riscv/autovec-opt.md (*not): Move to
> autovec-opt.md.
> (*n): Ditto.
> * config/riscv/autovec.md (*not): Ditto.
> (*n): Ditto.
> * config/riscv/vector.md: Ditto.
>
> ---
>  gcc/config/riscv/autovec-opt.md | 92 +
>  gcc/config/riscv/autovec.md | 52 ---
>  gcc/config/riscv/vector.md  | 39 --
>  3 files changed, 92 insertions(+), 91 deletions(-)
>
> diff --git a/gcc/config/riscv/autovec-opt.md
> b/gcc/config/riscv/autovec-opt.md
> index 92cdc4e9a16..f6052b50572 100644
> --- a/gcc/config/riscv/autovec-opt.md
> +++ b/gcc/config/riscv/autovec-opt.md
> @@ -78,3 +78,95 @@
>"vwmulsu.vv\t%0,%3,%4%p1"
>[(set_attr "type" "viwmul")
> (set_attr "mode" "")])
> +
> +;;
> -
> +;;  Integer Compare Instructions Simplification
> +;;
> -
> +;; Simplify OP(V, V) Instructions to VMCLR.m Includes:
> +;; - 1.  VMSNE
> +;; - 2.  VMSLT
> +;; - 3.  VMSLTU
> +;; - 4.  VMSGT
> +;; - 5.  VMSGTU
> +;;
> -
> +;; Simplify OP(V, V) Instructions to VMSET.m Includes:
> +;; - 1.  VMSEQ
> +;; - 2.  VMSLE
> +;; - 3.  VMSLEU
> +;; - 4.  VMSGE
> +;; - 5.  VMSGEU
> +;;
> -
> +
> +(define_split
> +  [(set (match_operand:VB  0 "register_operand")
> +   (if_then_else:VB
> + (unspec:VB
> +   [(match_operand:VB 1 "vector_all_trues_mask_operand")
> +(match_operand4 "vector_length_operand")
> +(match_operand5 "const_int_operand")
> +(match_operand6 "const_int_operand")
> +(reg:SI VL_REGNUM)
> +(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> + (match_operand:VB3 "vector_move_operand")
> + (match_operand:VB2 "vector_undef_operand")))]
> +  "TARGET_VECTOR"
> +  [(const_int 0)]
> +  {
> +emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX
> (mode),
> +RVV_VUNDEF (mode), operands[3],
> +operands[4], operands[5]));
> +DONE;
> +  }
> +)
> +
> +;;
> -
> +;;  [BOOL] Binary logical operations (inverted second input)
> +;;
> -
> +;; Includes:
> +;; - vmandnot.mm
> +;; - vmornot.mm
> +;;
> -
> +
> +(define_insn_and_split "*not"
> +  [(set (match_operand:VB 0 "register_operand"   "=vr")
> +   (bitmanip_bitwise:VB
> + (not:VB (match_operand:VB 2 "register_operand" " vr"))
> + (match_operand:VB 1 "register_operand" " vr")))]
> +  "TARGET_VECTOR"
> +  "#"
> +  "&& can_create_pseudo_p ()"
> +  [(const_int 0)]
> +  {
> +insn_code icode = code_for_pred_not (, mode);
> +riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP,
> operands);
> +DONE;
> +  }
> +  [(set_attr "type" "vmalu")
> +   (set_attr "mode" "")])
> +
> +;;
> -
> +;;  [BOOL] Binary logical operations (inverted result)
> +;;
> -
> +;; Includes:
> +;; - vmnand.mm
> +;; - vmnor.mm
> +;; - vmxnor.mm
> +;;
> -
> +
> +(define_insn_and_split "*n"
> +  [(set (match_operand:VB 0 "register_operand" "=vr")
> +   (not:VB
> + (any_bitwise:VB
> +   (match_operand:VB 1 "register_operand" " vr")
> +   (match_operand:VB 2 "register_operand" " vr"]
> +  "TARGET_VECTOR"
> +  "#"
> +  "&& can_create_pseudo_p ()"
> +  [(const_int 0)]
> +  {
> +insn_code icode = code_for_pred_n (, mode);
> +riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP,
> operands);
> +DONE;
> +  }
> +  [(set_attr "type" "vmalu")
> +   (set_attr "mode" "")])
> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> index ec038fe87cd..9f4492db23c 100644
> --- a/gcc/config/riscv/autovec.md
> +++ b/gcc/config/riscv/autovec.md
> @@ -229,58 +229,6 @@
>[(set_attr "type" "vmalu")
> (set_attr "mode" "")])
>
> -;;
> -
> -;;  [BOOL] Binary logical operations (inverted second input)
> -;;
> -
> -;; Includes:
> -;; - vmandnot.mm
> -;; - vmornot.mm
> -;;
> ---

RE: [NFC] RISC-V: Move optimization patterns into autovec-opt.md

2023-06-04 Thread Li, Pan2 via Gcc-patches
Committed, thanks Kito.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Kito Cheng via Gcc-patches
Sent: Sunday, June 4, 2023 9:14 PM
To: juzhe.zh...@rivai.ai
Cc: gcc-patches@gcc.gnu.org; jeffreya...@gmail.com; pal...@rivosinc.com; 
rdapp@gmail.com
Subject: Re: [NFC] RISC-V: Move optimization patterns into autovec-opt.md

Lgtm

於 2023年6月4日 週日,17:37寫道:

> From: Juzhe-Zhong 
>
> Move all optimization patterns into autovec-opt.md to make 
> organization easier maintain.
>
> gcc/ChangeLog:
>
> * config/riscv/autovec-opt.md (*not): Move to 
> autovec-opt.md.
> (*n): Ditto.
> * config/riscv/autovec.md (*not): Ditto.
> (*n): Ditto.
> * config/riscv/vector.md: Ditto.
>
> ---
>  gcc/config/riscv/autovec-opt.md | 92 +
>  gcc/config/riscv/autovec.md | 52 ---
>  gcc/config/riscv/vector.md  | 39 --
>  3 files changed, 92 insertions(+), 91 deletions(-)
>
> diff --git a/gcc/config/riscv/autovec-opt.md 
> b/gcc/config/riscv/autovec-opt.md index 92cdc4e9a16..f6052b50572 
> 100644
> --- a/gcc/config/riscv/autovec-opt.md
> +++ b/gcc/config/riscv/autovec-opt.md
> @@ -78,3 +78,95 @@
>"vwmulsu.vv\t%0,%3,%4%p1"
>[(set_attr "type" "viwmul")
> (set_attr "mode" "")])
> +
> +;;
> --
> ---
> +;;  Integer Compare Instructions Simplification ;;
> --
> ---
> +;; Simplify OP(V, V) Instructions to VMCLR.m Includes:
> +;; - 1.  VMSNE
> +;; - 2.  VMSLT
> +;; - 3.  VMSLTU
> +;; - 4.  VMSGT
> +;; - 5.  VMSGTU
> +;;
> --
> ---
> +;; Simplify OP(V, V) Instructions to VMSET.m Includes:
> +;; - 1.  VMSEQ
> +;; - 2.  VMSLE
> +;; - 3.  VMSLEU
> +;; - 4.  VMSGE
> +;; - 5.  VMSGEU
> +;;
> --
> ---
> +
> +(define_split
> +  [(set (match_operand:VB  0 "register_operand")
> +   (if_then_else:VB
> + (unspec:VB
> +   [(match_operand:VB 1 "vector_all_trues_mask_operand")
> +(match_operand4 "vector_length_operand")
> +(match_operand5 "const_int_operand")
> +(match_operand6 "const_int_operand")
> +(reg:SI VL_REGNUM)
> +(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> + (match_operand:VB3 "vector_move_operand")
> + (match_operand:VB2 "vector_undef_operand")))]
> +  "TARGET_VECTOR"
> +  [(const_int 0)]
> +  {
> +emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX
> (mode),
> +RVV_VUNDEF (mode), operands[3],
> +operands[4], operands[5]));
> +DONE;
> +  }
> +)
> +
> +;;
> --
> ---
> +;;  [BOOL] Binary logical operations (inverted second input) ;;
> --
> ---
> +;; Includes:
> +;; - vmandnot.mm
> +;; - vmornot.mm
> +;;
> --
> ---
> +
> +(define_insn_and_split "*not"
> +  [(set (match_operand:VB 0 "register_operand"   "=vr")
> +   (bitmanip_bitwise:VB
> + (not:VB (match_operand:VB 2 "register_operand" " vr"))
> + (match_operand:VB 1 "register_operand" " vr")))]
> +  "TARGET_VECTOR"
> +  "#"
> +  "&& can_create_pseudo_p ()"
> +  [(const_int 0)]
> +  {
> +insn_code icode = code_for_pred_not (, mode);
> +riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP,
> operands);
> +DONE;
> +  }
> +  [(set_attr "type" "vmalu")
> +   (set_attr "mode" "")])
> +
> +;;
> --
> ---
> +;;  [BOOL] Binary logical operations (inverted result) ;;
> --
> ---
> +;; Includes:
> +;; - vmnand.mm
> +;; - vmnor.mm
> +;; - vmxnor.mm
> +;;
> --
> ---
> +
> +(define_insn_and_split "*n"
> +  [(set (match_operand:VB 0 "register_operand" "=vr")
> +   (not:VB
> + (any_bitwise:VB
> +   (match_operand:VB 1 "register_operand" " vr")
> +   (match_operand:VB 2 "register_operand" " vr"]
> +  "TARGET_VECTOR"
> +  "#"
> +  "&& can_create_pseudo_p ()"
> +  [(const_int 0)]
> +  {
> +insn_code icode = code_for_pred_n (, mode);
> +riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP,
> operands);
> +DONE;
> +  }
> +  [(set_attr "type" "vmalu")
> +   (set_attr "mode" "")])
> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md 
> index ec038fe87cd..9f4492db23c 100644
> --- a/gcc/config/riscv/autovec.md
> +++ b/gcc/config/riscv/autovec.md
> @@ -229,58 +229,6 @@
>[(set_at

Re: [PATCH] RISC-V: Support RVV FP16 ZVFHMIN intrinsic API

2023-06-04 Thread Kito Cheng via Gcc-patches
LGTM too, thanks

On Sun, Jun 4, 2023 at 3:36 PM 钟居哲  wrote:
>
> LGTM.
>
>
>
> juzhe.zh...@rivai.ai
>
> From: pan2.li
> Date: 2023-06-04 15:19
> To: gcc-patches
> CC: juzhe.zhong; kito.cheng; pan2.li; yanzhang.wang
> Subject: [PATCH] RISC-V: Support RVV FP16 ZVFHMIN intrinsic API
> From: Pan Li 
>
> This patch support the 2 intrinsic API of FP16 ZVFHMIN extension. Aka
> SEW=16 for below instructions
>
> vfwcvt.f.f.v
> vfncvt.f.f.w
>
> Then users can leverage the instrinsic APIs to perform the conversion
> between RVV vector single float point and half float point.
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-types.def
> (vfloat32mf2_t): Add vfloat32mf2_t type to vfncvt.f.f.w operations.
> (vfloat32m1_t): Likewise.
> (vfloat32m2_t): Likewise.
> (vfloat32m4_t): Likewise.
> (vfloat32m8_t): Likewise.
> * config/riscv/riscv-vector-builtins.def: Fix typo in comments.
> * config/riscv/vector-iterators.md: Add single to half machine
> mode conversion.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c: New test.
> ---
> .../riscv/riscv-vector-builtins-types.def |  6 +++
> gcc/config/riscv/riscv-vector-builtins.def|  2 +-
> gcc/config/riscv/vector-iterators.md  | 10 
> .../riscv/rvv/base/zvfhmin-intrinsic.c| 53 +++
> 4 files changed, 70 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def 
> b/gcc/config/riscv/riscv-vector-builtins-types.def
> index 65716b8c637..9cb3aca992e 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-types.def
> +++ b/gcc/config/riscv/riscv-vector-builtins-types.def
> @@ -553,6 +553,12 @@ DEF_RVV_WCONVERT_U_OPS (vuint64m2_t, 
> RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_6
> DEF_RVV_WCONVERT_U_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_FP_32 | 
> RVV_REQUIRE_ELEN_64)
> DEF_RVV_WCONVERT_U_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_FP_32 | 
> RVV_REQUIRE_ELEN_64)
> +DEF_RVV_WCONVERT_F_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | 
> RVV_REQUIRE_MIN_VLEN_64)
> +DEF_RVV_WCONVERT_F_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
> +DEF_RVV_WCONVERT_F_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
> +DEF_RVV_WCONVERT_F_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32)
> +DEF_RVV_WCONVERT_F_OPS (vfloat32m8_t, RVV_REQUIRE_ELEN_FP_32)
> +
> DEF_RVV_WCONVERT_F_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64)
> DEF_RVV_WCONVERT_F_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
> DEF_RVV_WCONVERT_F_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
> diff --git a/gcc/config/riscv/riscv-vector-builtins.def 
> b/gcc/config/riscv/riscv-vector-builtins.def
> index 149835f36ac..310edeaf5a9 100644
> --- a/gcc/config/riscv/riscv-vector-builtins.def
> +++ b/gcc/config/riscv/riscv-vector-builtins.def
> @@ -490,7 +490,7 @@ DEF_RVV_TYPE (vint64m8_t, 15, __rvv_int64m8_t, int64, 
> VNx16DI, VNx8DI, VOID, _i6
> DEF_RVV_TYPE (vuint64m8_t, 16, __rvv_uint64m8_t, uint64, VNx16DI, VNx8DI, 
> VOID, _u64m8,
>   _u64, _e64m8)
> -/* Enabled if TARGET_VECTOR_ELEN_FP_16 && 9TARGET_ZVFH or TARGET_ZVFHMIN).  
> */
> +/* Enabled if TARGET_VECTOR_ELEN_FP_16 && (TARGET_ZVFH or TARGET_ZVFHMIN).  
> */
> /* LMUL = 1/4.  */
> DEF_RVV_TYPE (vfloat16mf4_t, 18, __rvv_float16mf4_t, float16, VNx2HF, VNx1HF, 
> VOID,
>   _f16mf4, _f16, _e16mf4)
> diff --git a/gcc/config/riscv/vector-iterators.md 
> b/gcc/config/riscv/vector-iterators.md
> index 5fbaef89566..90743ed76c5 100644
> --- a/gcc/config/riscv/vector-iterators.md
> +++ b/gcc/config/riscv/vector-iterators.md
> @@ -481,6 +481,13 @@ (define_mode_iterator VWEXTI [
> ])
> (define_mode_iterator VWEXTF [
> +  (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
> +  (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
> +  (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
> +  (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
> +  (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
> +  (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
> +
>(VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
>(VNx2DF "TARGET_VECTOR_ELEN_FP_64")
>(VNx4DF "TARGET_VECTOR_ELEN_FP_64")
> @@ -1145,6 +1152,8 @@ (define_mode_attr V_DOUBLE_TRUNC [
>(VNx16SI "VNx16HI") (VNx32SI "VNx32HI")
>(VNx1DI "VNx1SI") (VNx2DI "VNx2SI") (VNx4DI "VNx4SI") (VNx8DI "VNx8SI")
>(VNx16DI "VNx16SI")
> +
> +  (VNx1SF "VNx1HF") (VNx2SF "VNx2HF") (VNx4SF "VNx4HF") (VNx8SF "VNx8HF") 
> (VNx16SF "VNx16HF") (VNx32SF "VNx32HF")
>(VNx1DF "VNx1SF") (VNx2DF "VNx2SF") (VNx4DF "VNx4SF") (VNx8DF "VNx8SF")
>(VNx16DF "VNx16SF")
> ])
> @@ -1169,6 +1178,7 @@ (define_mode_attr v_double_trunc [
>(VNx16SI "vnx16hi") (VNx32SI "vnx32hi")
>(VNx1DI "vnx1si") (VNx2DI "vnx2si") (VNx4DI "vnx4si") (VNx8DI "vnx8si")
>(VNx16DI "vnx16si")
> +  (VNx1SF "vnx1hf") (VNx2SF "vnx2hf") (VNx4SF "vnx4hf") (VNx8SF "vnx8hf") 
> (VNx16SF "vnx16hf") (VNx32SF "vnx32hf")
>(VNx1DF "vnx1sf") (VNx2DF "vnx2sf") (VNx4DF "vnx4sf") (VNx8DF "vnx8sf")
>

RE: [PATCH] RISC-V: Support RVV FP16 ZVFHMIN intrinsic API

2023-06-04 Thread Li, Pan2 via Gcc-patches
Committed, thanks Kito and Juzhe.

Pan

-Original Message-
From: Kito Cheng  
Sent: Sunday, June 4, 2023 9:38 PM
To: 钟居哲 
Cc: Li, Pan2 ; gcc-patches ; 
kito.cheng ; Wang, Yanzhang 
Subject: Re: [PATCH] RISC-V: Support RVV FP16 ZVFHMIN intrinsic API

LGTM too, thanks

On Sun, Jun 4, 2023 at 3:36 PM 钟居哲  wrote:
>
> LGTM.
>
>
>
> juzhe.zh...@rivai.ai
>
> From: pan2.li
> Date: 2023-06-04 15:19
> To: gcc-patches
> CC: juzhe.zhong; kito.cheng; pan2.li; yanzhang.wang
> Subject: [PATCH] RISC-V: Support RVV FP16 ZVFHMIN intrinsic API
> From: Pan Li 
>
> This patch support the 2 intrinsic API of FP16 ZVFHMIN extension. Aka
> SEW=16 for below instructions
>
> vfwcvt.f.f.v
> vfncvt.f.f.w
>
> Then users can leverage the instrinsic APIs to perform the conversion 
> between RVV vector single float point and half float point.
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-types.def
> (vfloat32mf2_t): Add vfloat32mf2_t type to vfncvt.f.f.w operations.
> (vfloat32m1_t): Likewise.
> (vfloat32m2_t): Likewise.
> (vfloat32m4_t): Likewise.
> (vfloat32m8_t): Likewise.
> * config/riscv/riscv-vector-builtins.def: Fix typo in comments.
> * config/riscv/vector-iterators.md: Add single to half machine mode 
> conversion.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c: New test.
> ---
> .../riscv/riscv-vector-builtins-types.def |  6 +++
> gcc/config/riscv/riscv-vector-builtins.def|  2 +-
> gcc/config/riscv/vector-iterators.md  | 10 
> .../riscv/rvv/base/zvfhmin-intrinsic.c| 53 +++
> 4 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def 
> b/gcc/config/riscv/riscv-vector-builtins-types.def
> index 65716b8c637..9cb3aca992e 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-types.def
> +++ b/gcc/config/riscv/riscv-vector-builtins-types.def
> @@ -553,6 +553,12 @@ DEF_RVV_WCONVERT_U_OPS (vuint64m2_t, 
> RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_6 DEF_RVV_WCONVERT_U_OPS 
> (vuint64m4_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) 
> DEF_RVV_WCONVERT_U_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_FP_32 | 
> RVV_REQUIRE_ELEN_64)
> +DEF_RVV_WCONVERT_F_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | 
> +RVV_REQUIRE_MIN_VLEN_64) DEF_RVV_WCONVERT_F_OPS (vfloat32m1_t, 
> +RVV_REQUIRE_ELEN_FP_32) DEF_RVV_WCONVERT_F_OPS (vfloat32m2_t, 
> +RVV_REQUIRE_ELEN_FP_32) DEF_RVV_WCONVERT_F_OPS (vfloat32m4_t, 
> +RVV_REQUIRE_ELEN_FP_32) DEF_RVV_WCONVERT_F_OPS (vfloat32m8_t, 
> +RVV_REQUIRE_ELEN_FP_32)
> +
> DEF_RVV_WCONVERT_F_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64) 
> DEF_RVV_WCONVERT_F_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64) 
> DEF_RVV_WCONVERT_F_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64) diff 
> --git a/gcc/config/riscv/riscv-vector-builtins.def 
> b/gcc/config/riscv/riscv-vector-builtins.def
> index 149835f36ac..310edeaf5a9 100644
> --- a/gcc/config/riscv/riscv-vector-builtins.def
> +++ b/gcc/config/riscv/riscv-vector-builtins.def
> @@ -490,7 +490,7 @@ DEF_RVV_TYPE (vint64m8_t, 15, __rvv_int64m8_t, 
> int64, VNx16DI, VNx8DI, VOID, _i6 DEF_RVV_TYPE (vuint64m8_t, 16, 
> __rvv_uint64m8_t, uint64, VNx16DI, VNx8DI, VOID, _u64m8,
>   _u64, _e64m8)
> -/* Enabled if TARGET_VECTOR_ELEN_FP_16 && 9TARGET_ZVFH or 
> TARGET_ZVFHMIN).  */
> +/* Enabled if TARGET_VECTOR_ELEN_FP_16 && (TARGET_ZVFH or 
> +TARGET_ZVFHMIN).  */
> /* LMUL = 1/4.  */
> DEF_RVV_TYPE (vfloat16mf4_t, 18, __rvv_float16mf4_t, float16, VNx2HF, VNx1HF, 
> VOID,
>   _f16mf4, _f16, _e16mf4)
> diff --git a/gcc/config/riscv/vector-iterators.md 
> b/gcc/config/riscv/vector-iterators.md
> index 5fbaef89566..90743ed76c5 100644
> --- a/gcc/config/riscv/vector-iterators.md
> +++ b/gcc/config/riscv/vector-iterators.md
> @@ -481,6 +481,13 @@ (define_mode_iterator VWEXTI [
> ])
> (define_mode_iterator VWEXTF [
> +  (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")  
> + (VNx2SF "TARGET_VECTOR_ELEN_FP_32")  (VNx4SF 
> + "TARGET_VECTOR_ELEN_FP_32")  (VNx8SF "TARGET_VECTOR_ELEN_FP_32")  
> + (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")  
> + (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
> +
>(VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
>(VNx2DF "TARGET_VECTOR_ELEN_FP_64")
>(VNx4DF "TARGET_VECTOR_ELEN_FP_64") @@ -1145,6 +1152,8 @@ 
> (define_mode_attr V_DOUBLE_TRUNC [
>(VNx16SI "VNx16HI") (VNx32SI "VNx32HI")
>(VNx1DI "VNx1SI") (VNx2DI "VNx2SI") (VNx4DI "VNx4SI") (VNx8DI "VNx8SI")
>(VNx16DI "VNx16SI")
> +
> +  (VNx1SF "VNx1HF") (VNx2SF "VNx2HF") (VNx4SF "VNx4HF") (VNx8SF 
> + "VNx8HF") (VNx16SF "VNx16HF") (VNx32SF "VNx32HF")
>(VNx1DF "VNx1SF") (VNx2DF "VNx2SF") (VNx4DF "VNx4SF") (VNx8DF "VNx8SF")
>(VNx16DF "VNx16SF")
> ])
> @@ -1169,6 +1178,7 @@ (define_mode_attr v_double_trunc [
>(VNx16SI "vnx16hi") (VNx32SI "vnx32hi")
>(VNx1DI "vnx1si") (VNx2DI "vn

Re: [PATCH] RISC-V: Remove redundant vlmul_ext_* patterns to fix PR110109

2023-06-04 Thread Jeff Law via Gcc-patches




On 6/4/23 02:51, juzhe.zh...@rivai.ai wrote:

From: Juzhe-Zhong 

 PR target/110109

This patch is to fix PR110109 issue. This issue happens is because:

(define_insn_and_split "*vlmul_extx2"
   [(set (match_operand: 0 "register_operand"  "=vr, ?&vr")
(subreg:
  (match_operand:VLMULEXT2 1 "register_operand" " 0,   vr") 0))]
   "TARGET_VECTOR"
   "#"
   "&& reload_completed"
   [(const_int 0)]
{
   emit_insn (gen_rtx_SET (gen_lowpart (mode, operands[0]), operands[1]));
   DONE;
})
So anytime you find yourself with an explicit subreg in a pattern, 
there's a very reasonable chance you've made a mistake somewhere else.


As a result every time I see an explicit subreg in a pattern I ask the 
author to describe in a fair amount of detail why the subreg was needed.


From a first glance, they definitely look like you're papering over a 
problem elsewhere.These are just simple moves.  For scalar modes 
this would be clearly wrong, but I'm not sure we have the same 
restrictions on vector moves.


I would also caution against the way you're generating code here.  I'd 
have to sit down with it for a while, but I'm not 100% sure you can just 
change the location of the subreg like you did (it's going to move from 
wrapping operand1 to wrapping operand0).  The semantics may be subtly 
different -- and that's one of the other reasons to avoid explicit 
subregs.  It's easy to get the semantics wrong.





I create that patterns is to optimize the following test:
vfloat32m2_t test_vlmul_ext_v_f32mf2_f32m2(vfloat32mf2_t op1) {
   return __riscv_vlmul_ext_v_f32mf2_f32m2(op1);
}

codegen:
test_vlmul_ext_v_f32mf2_f32m2:
 vsetvli a5,zero,e32,m2,ta,ma
 vmv.v.i v2,0
 vsetvli a5,zero,e32,mf2,ta,ma
 vle32.v v2,0(a1)
 vs2r.v  v2,0(a0)
 ret

There is a redundant 'vmv.v.i' here, Since GCC doesn't undefine IR (unlike 
LLVM, LLVM has undef/poison).
For vlmul_ext_* RVV intrinsic, GCC will initiate all zeros into register. 
However, I think it's not
a big issue after we support subreg livness tracking.
As I've suggested elsewhere, let's get the code correct and reasonably 
complete before we worry about this class of problems.  I'm not even 
convinced it's a big issue right now.






gcc/ChangeLog:

 * config/riscv/riscv-vector-builtins-bases.cc: Change expand approach.
 * config/riscv/vector.md (@vlmul_extx2): Remove it.
 (@vlmul_extx4): Ditto.
 (@vlmul_extx8): Ditto.
 (@vlmul_extx16): Ditto.
 (@vlmul_extx32): Ditto.
 (@vlmul_extx64): Ditto.
 (*vlmul_extx2): Ditto.
 (*vlmul_extx4): Ditto.
 (*vlmul_extx8): Ditto.
 (*vlmul_extx16): Ditto.
 (*vlmul_extx32): Ditto.
 (*vlmul_extx64): Ditto.

gcc/testsuite/ChangeLog:

 * gcc.target/riscv/rvv/base/pr110109-1.c: New test.
 * gcc.target/riscv/rvv/base/pr110109-2.c: New test.

Approved.  Please commit.

Jeff


RE: [PATCH] RISC-V: Remove redundant vlmul_ext_* patterns to fix PR110109

2023-06-04 Thread Li, Pan2 via Gcc-patches
Committed, thanks Jeff.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Sunday, June 4, 2023 9:55 PM
To: juzhe.zh...@rivai.ai; gcc-patches@gcc.gnu.org
Cc: kito.ch...@sifive.com; pal...@rivosinc.com; rdapp@gmail.com
Subject: Re: [PATCH] RISC-V: Remove redundant vlmul_ext_* patterns to fix 
PR110109



On 6/4/23 02:51, juzhe.zh...@rivai.ai wrote:
> From: Juzhe-Zhong 
> 
>  PR target/110109
> 
> This patch is to fix PR110109 issue. This issue happens is because:
> 
> (define_insn_and_split "*vlmul_extx2"
>[(set (match_operand: 0 "register_operand"  "=vr, ?&vr")
> (subreg:
>   (match_operand:VLMULEXT2 1 "register_operand" " 0,   vr") 0))]
>"TARGET_VECTOR"
>"#"
>"&& reload_completed"
>[(const_int 0)]
> {
>emit_insn (gen_rtx_SET (gen_lowpart (mode, operands[0]), 
> operands[1]));
>DONE;
> })
So anytime you find yourself with an explicit subreg in a pattern, there's a 
very reasonable chance you've made a mistake somewhere else.

As a result every time I see an explicit subreg in a pattern I ask the author 
to describe in a fair amount of detail why the subreg was needed.

 From a first glance, they definitely look like you're papering over a 
problem elsewhere.These are just simple moves.  For scalar modes 
this would be clearly wrong, but I'm not sure we have the same restrictions on 
vector moves.

I would also caution against the way you're generating code here.  I'd have to 
sit down with it for a while, but I'm not 100% sure you can just change the 
location of the subreg like you did (it's going to move from wrapping operand1 
to wrapping operand0).  The semantics may be subtly different -- and that's one 
of the other reasons to avoid explicit subregs.  It's easy to get the semantics 
wrong.


> 
> I create that patterns is to optimize the following test:
> vfloat32m2_t test_vlmul_ext_v_f32mf2_f32m2(vfloat32mf2_t op1) {
>return __riscv_vlmul_ext_v_f32mf2_f32m2(op1);
> }
> 
> codegen:
> test_vlmul_ext_v_f32mf2_f32m2:
>  vsetvli a5,zero,e32,m2,ta,ma
>  vmv.v.i v2,0
>  vsetvli a5,zero,e32,mf2,ta,ma
>  vle32.v v2,0(a1)
>  vs2r.v  v2,0(a0)
>  ret
> 
> There is a redundant 'vmv.v.i' here, Since GCC doesn't undefine IR (unlike 
> LLVM, LLVM has undef/poison).
> For vlmul_ext_* RVV intrinsic, GCC will initiate all zeros into 
> register. However, I think it's not a big issue after we support subreg 
> livness tracking.
As I've suggested elsewhere, let's get the code correct and reasonably complete 
before we worry about this class of problems.  I'm not even convinced it's a 
big issue right now.



> 
> gcc/ChangeLog:
> 
>  * config/riscv/riscv-vector-builtins-bases.cc: Change expand 
> approach.
>  * config/riscv/vector.md (@vlmul_extx2): Remove it.
>  (@vlmul_extx4): Ditto.
>  (@vlmul_extx8): Ditto.
>  (@vlmul_extx16): Ditto.
>  (@vlmul_extx32): Ditto.
>  (@vlmul_extx64): Ditto.
>  (*vlmul_extx2): Ditto.
>  (*vlmul_extx4): Ditto.
>  (*vlmul_extx8): Ditto.
>  (*vlmul_extx16): Ditto.
>  (*vlmul_extx32): Ditto.
>  (*vlmul_extx64): Ditto.
> 
> gcc/testsuite/ChangeLog:
> 
>  * gcc.target/riscv/rvv/base/pr110109-1.c: New test.
>  * gcc.target/riscv/rvv/base/pr110109-2.c: New test.
Approved.  Please commit.

Jeff


Re: [PATCH] RISC-V: Split arguments of expand_vec_perm

2023-06-04 Thread Jeff Law via Gcc-patches




On 6/4/23 03:25, juzhe.zh...@rivai.ai wrote:

From: Juzhe-Zhong 

Since the following patch will calls expand_vec_perm with
splitted arguments, change the expand_vec_perm interface in
this patch.

gcc/ChangeLog:

 * config/riscv/autovec.md: Split arguments.
 * config/riscv/riscv-protos.h (expand_vec_perm): Ditto.
 * config/riscv/riscv-v.cc (expand_vec_perm): Ditto.

OK
jeff


Re: [NFC] RISC-V: Reorganize riscv-v.cc

2023-06-04 Thread Jeff Law via Gcc-patches




On 6/4/23 03:11, juzhe.zh...@rivai.ai wrote:

From: Juzhe-Zhong 

This patch is just reorganizing the functions for the following patch.

I put rvv_builder and emit_* functions located before expand_const_vector
function since I will use them in expand_const_vector in the following patch.

gcc/ChangeLog:

 * config/riscv/riscv-v.cc (class rvv_builder): Reorganize functions.
 (rvv_builder::can_duplicate_repeating_sequence_p): Ditto.
 (rvv_builder::repeating_sequence_use_merge_profitable_p): Ditto.
 (rvv_builder::get_merged_repeating_sequence): Ditto.
 (rvv_builder::get_merge_scalar_mask): Ditto.
 (emit_scalar_move_insn): Ditto.
 (emit_vlmax_integer_move_insn): Ditto.
 (emit_nonvlmax_integer_move_insn): Ditto.
 (emit_vlmax_gather_insn): Ditto.
 (emit_vlmax_masked_gather_mu_insn): Ditto.
 (get_repeating_sequence_dup_machine_mode): Ditto.

OK
jeff



Re: [PATCHv2 1/2] Improve do_store_flag for single bit comparison against 0

2023-06-04 Thread Jeff Law via Gcc-patches




On 5/20/23 09:04, Andrew Pinski via Gcc-patches wrote:

While working something else, I noticed we could improve
the following function code generation:
```
unsigned f(unsigned t)
{
   if (t & ~(1<<30)) __builtin_unreachable();
   return t != 0;
}
```
Right know we just emit a comparison against 0 instead
of just a shift right by 30.
There is code in do_store_flag which already optimizes
`(t & 1<<30) != 0` to `(t >> 30) & 1` (using bit extraction if available).
This patch extends it to handle the case where we know t has a nonzero
of just one bit set.

Changes from v1:
* v2: Updated for the bit extraction improvements.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* expr.cc (do_store_flag): Extend the one bit checking case
to handle the case where we don't have an and but rather still
one bit is known to be non-zero.

OK
jeff


Re: [PATCHv2 2/2] Improve do_store_flag for comparing single bit against that bit

2023-06-04 Thread Jeff Law via Gcc-patches




On 5/20/23 09:04, Andrew Pinski via Gcc-patches wrote:

This is a case which I noticed while working on the previous patch.
Sometimes we end up with `a == CST` instead of comparing against 0.
This happens in the following code:
```
unsigned f(unsigned t)
{
   if (t & ~(1<<30)) __builtin_unreachable();
   t ^= (1<<30);
   return t != 0;
}
```

We should handle the case where the nonzero bits is the same as the
comparison operand.

Changes from v1:
* v2: Updated for the bit extraction changes.

OK? Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* expr.cc (do_store_flag): Improve for single bit testing
not against zero but against that single bit.

OK
jeff


Re: [PATCH V5] Use reg mode to move sub blocks for parameters and returns

2023-06-04 Thread Jeff Law via Gcc-patches




On 5/9/23 07:43, Jiufu Guo wrote:


Thanks for point out this!  Yes, BLKmode rtx may not always be a MEM.
MEM_SIZE is only ok for MEM after the it's known size is computed.
Here MEM_SIZE is fine just because it is an stack rtx corresponding
to the type of parameter and returns which has been computed.

I updated the patch to resolve the conflicts with the trunk, and
retest bootstrap&testsuite, and then updated the patch a new version.

And this version pass bootstrap and regtest on ppc64{,le}, x86_64.

The major change is 'move_sub_blocks' only handles the case when
the block size can be move by same submode, or say (size % sub_size)
is 0.  If no objection, I would committed the new version.

BR,
Jeff (Jiufu)

gcc/ChangeLog:

* cfgexpand.cc (expand_used_vars): Update to mark DECL_USEDBY_RETURN_P
for returns.
* expr.cc (move_sub_blocks): New function.
(expand_assignment): Update assignment code about returns/parameters.
* function.cc (assign_parm_setup_block): Update to mark
DECL_REGS_TO_STACK_P for parameter.
* tree-core.h (struct tree_decl_common): Add comment.
* tree.h (DECL_USEDBY_RETURN_P): New define.
(DECL_REGS_TO_STACK_P): New define.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr65421-1.c: New test.
* gcc.target/powerpc/pr65421-2.c: New test.

I don't think this was ever explicitly ACK'd.  OK for the trunk.

jeff


Re: [PATCH] xtensa: Optimize boolean evaluation or branching when EQ/NE to INT_MIN

2023-06-04 Thread Max Filippov via Gcc-patches
On Sat, Jun 3, 2023 at 3:52 PM Takayuki 'January June' Suwa
 wrote:
>
> This patch optimizes both the boolean evaluation of and the branching of
> EQ/NE against INT_MIN (-2147483648), by taking advantage of the specifi-
> cation the ABS machine instruction on Xtensa returns INT_MIN iff INT_MIN,
> otherwise non-negative value.
>
> /* example */
> int test0(int x) {
>   return (x == -2147483648);
> }
> int test1(int x) {
>   return (x != -2147483648);
> }
> extern void foo(void);
> void test2(int x) {
>   if(x == -2147483648)
> foo();
> }
> void test3(int x) {
>   if(x != -2147483648)
> foo();
> }
>
> ;; before
> test0:
> movi.n  a9, -1
> sllia9, a9, 31
> add.n   a2, a2, a9
> nsaua2, a2
> srlia2, a2, 5
> ret.n
> test1:
> movi.n  a9, -1
> sllia9, a9, 31
> add.n   a9, a2, a9
> movi.n  a2, 1
> moveqz  a2, a9, a9
> ret.n
> test2:
> movi.n  a9, -1
> sllia9, a9, 31
> bne a2, a9, .L3
> j.l foo, a9
> .L3:
> ret.n
> test3:
> movi.n  a9, -1
> sllia9, a9, 31
> beq a2, a9, .L5
> j.l foo, a9
> .L5:
> ret.n
>
> ;; after
> test0:
> abs a2, a2
> extui   a2, a2, 31, 1
> ret.n
> test1:
> abs a2, a2
> sraia2, a2, 31
> addi.n  a2, a2, 1
> ret.n
> test2:
> abs a2, a2
> bbcia2, 31, .L3
> j.l foo, a9
> .L3:
> ret.n
> test3:
> abs a2, a2
> bbsia2, 31, .L5
> j.l foo, a9
> .L5:
> ret.n
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (*btrue_INT_MIN, *eqne_INT_MIN):
> New insn_and_split patterns.
> ---
>  gcc/config/xtensa/xtensa.md | 64 +
>  1 file changed, 64 insertions(+)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [COMMITTED] MAINTAINERS: Add myself as MIPS port maintainer

2023-06-04 Thread Matthew Fortune via Gcc-patches
Hi YunQiang/Maciej,

Given that I have simply been unable to fulfill any of the duties of
maintaining a port in GCC due to various other commitments, I am
thrilled that YunQiang has stepped up to take it on. I have no
objection to either being removed or left, as a rather silent partner,
but would appreciate being moved to the write after approval section
if dropped from the maintainer section.

All the best,
Matthew

>On Sat, Jun 3, 2023 at 3:51 PM Maciej W. Rozycki  wrote:
>
> On Fri, 2 Jun 2023, YunQiang Su wrote:
>
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 4a7c963914b..c8b787b6e1e 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -91,7 +91,7 @@ m68k port   Andreas Schwab  
> > 
> >  m68k-motorola-sysv port  Philippe De Muyter  
> >  mcore port   Nick Clifton
> >  microblaze   Michael Eager   
> > -mips portMatthew Fortune 
> > +mips portYunQiang Su 
>
>  Has Matthew agreed to be removed from the maintainer's post?  Even if so,
> then he needs to be moved back to the Write After Approval section, as no
> one has deprived him of this right.
>
>   Maciej


[committed] Convert H8 port to LRA

2023-06-04 Thread Jeff Law via Gcc-patches
With Vlad's recent LRA fix to the elimination code, the H8 can be 
converted to LRA.


This patch has two changes of note.

First, this turns Zz into a standard constraint.  This helps reloading 
for the H8/SX movqi pattern.


Second, this drops the whole pattern for the SX bit memory operations. 
I can't see why those exist to begin with.  They should be handled by 
the standard bit manipulation patterns.   If someone wants to try and 
improve SX bit support, that'd be great and they can do so within the 
LRA framework :-)


Pushed to the trunk...

Jeffcommit f66e0a94ad7bc18538c8207fc2c86b62e4a51bb2
Author: Jeff Law 
Date:   Sun Jun 4 11:38:55 2023 -0600

Convert H8 port to LRA

With Vlad's recent LRA fix to the elimination code, the H8 can be converted
to LRA.

This patch has two changes of note.

First, this turns Zz into a standard constraint.  This helps reloading for
the H8/SX movqi pattern.

Second, this drops the whole pattern for the SX bit memory operations.  I
can't see why those exist to begin with.  They should be handled by the
standard bit manipulation patterns.   If someone wants to try and improve SX
bit support, that'd be great and they can do so within the LRA framework :-)

Pushed to the trunk...

gcc/
* config/h8300/constraints.md (Zz): Make this a normal
constraint.
* config/h8300/h8300.cc (TARGET_LRA_P): Remove.
* config/h8300/logical.md (H8/SX bit patterns): Remove.

diff --git a/gcc/config/h8300/constraints.md b/gcc/config/h8300/constraints.md
index 3aef1205fef..3e2526ccbbc 100644
--- a/gcc/config/h8300/constraints.md
+++ b/gcc/config/h8300/constraints.md
@@ -211,7 +211,7 @@ (define_constraint "Y2"
   (and (match_code "const_int")
(match_test "exact_log2 (ival & 0xff) != -1")))
 
-(define_special_memory_constraint "Zz"
+(define_constraint "Zz"
   "@internal"
   (and (match_test "TARGET_H8300SX")
(match_code "mem")
diff --git a/gcc/config/h8300/h8300.cc b/gcc/config/h8300/h8300.cc
index 7412c0535fc..cdf74c1acbd 100644
--- a/gcc/config/h8300/h8300.cc
+++ b/gcc/config/h8300/h8300.cc
@@ -5625,9 +5625,6 @@ pre_incdec_with_reg (rtx op, unsigned int reg)
 #undef TARGET_MODES_TIEABLE_P
 #define TARGET_MODES_TIEABLE_P h8300_modes_tieable_p
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_false
-
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_Ph8300_legitimate_address_p
 
diff --git a/gcc/config/h8300/logical.md b/gcc/config/h8300/logical.md
index f07c79e1eac..5df0922ef4e 100644
--- a/gcc/config/h8300/logical.md
+++ b/gcc/config/h8300/logical.md
@@ -31,28 +31,6 @@ (define_expand "3"
 ;; AND INSTRUCTIONS
 ;; --
 
-(define_insn "bclr_msx"
-  [(set (match_operand:QHI 0 "bit_register_indirect_operand" "=WU")
-   (and:QHI (match_operand:QHI 1 "bit_register_indirect_operand" "%0")
-(match_operand:QHI 2 "single_zero_operand" "Y0")))]
-  "TARGET_H8300SX && rtx_equal_p (operands[0], operands[1])"
-  "bclr\\t%W2,%0"
-  [(set_attr "length" "8")])
-
-(define_split
-  [(set (match_operand:HI 0 "bit_register_indirect_operand")
-   (and:HI (match_operand:HI 1 "bit_register_indirect_operand")
-   (match_operand:HI 2 "single_zero_operand")))]
-  "TARGET_H8300SX && abs (INTVAL (operands[2])) > 0xff"
-  [(set (match_dup 0)
-   (and:QI (match_dup 1)
-   (match_dup 2)))]
-  {
-operands[0] = adjust_address (operands[0], QImode, 0);
-operands[1] = adjust_address (operands[1], QImode, 0);
-operands[2] = GEN_INT ((INTVAL (operands[2])) >> 8);
-  })
-
 (define_insn_and_split "*andqi3_2"
   [(set (match_operand:QI 0 "bit_operand" "=U,rQ,r")
(and:QI (match_operand:QI 1 "bit_operand" "%0,0,WU")
@@ -177,14 +155,6 @@ (define_insn "*andorsi3_shift_8_clobber_flags"
 ;; OR/XOR INSTRUCTIONS
 ;; --
 
-(define_insn "b_msx"
-  [(set (match_operand:QHI 0 "bit_register_indirect_operand" "=WU")
-   (ors:QHI (match_operand:QHI 1 "bit_register_indirect_operand" "%0")
-(match_operand:QHI 2 "single_one_operand" "Y2")))]
-  "TARGET_H8300SX && rtx_equal_p (operands[0], operands[1])"
-  { return  == IOR ? "bset\\t%V2,%0" : "bnot\\t%V2,%0"; }
-  [(set_attr "length" "8")])
-
 (define_insn_and_split "qi3_1"
   [(set (match_operand:QI 0 "bit_operand" "=U,rQ")
(ors:QI (match_operand:QI 1 "bit_operand" "%0,0")


Re: [x86 PATCH] Add support for stc, clc and cmc instructions in i386.md

2023-06-04 Thread Uros Bizjak via Gcc-patches
On Sun, Jun 4, 2023 at 12:45 AM Roger Sayle  wrote:
>
>
> This patch is the latest revision of my patch to add support for the
> STC (set carry flag), CLC (clear carry flag) and CMC (complement
> carry flag) instructions to the i386 backend, incorporating Uros'
> previous feedback.  The significant changes are (i) the inclusion
> of CMC, (ii) the use of UNSPEC for pattern, (iii) Use of a new
> X86_TUNE_SLOW_STC tuning flag to use alternate implementations on
> pentium4 (which has a notoriously slow STC) when not optimizing
> for size.
>
> An example of the use of the stc instruction is:
> unsigned int foo (unsigned int a, unsigned int b, unsigned int *c) {
>   return __builtin_ia32_addcarryx_u32 (1, a, b, c);
> }
>
> which previously generated:
> movl$1, %eax
> addb$-1, %al
> adcl%esi, %edi
> setc%al
> movl%edi, (%rdx)
> movzbl  %al, %eax
> ret
>
> with this patch now generates:
> stc
> adcl%esi, %edi
> setc%al
> movl%edi, (%rdx)
> movzbl  %al, %eax
> ret
>
> An example of the use of the cmc instruction (where the carry from
> a first adc is inverted/complemented as input to a second adc) is:
> unsigned int bar (unsigned int a, unsigned int b,
>   unsigned int c, unsigned int d)
> {
>   unsigned int c1 = __builtin_ia32_addcarryx_u32 (1, a, b, &o1);
>   return __builtin_ia32_addcarryx_u32 (c1 ^ 1, c, d, &o2);
> }
>
> which previously generated:
> movl$1, %eax
> addb$-1, %al
> adcl%esi, %edi
> setnc   %al
> movl%edi, o1(%rip)
> addb$-1, %al
> adcl%ecx, %edx
> setc%al
> movl%edx, o2(%rip)
> movzbl  %al, %eax
> ret
>
> and now generates:
> stc
> adcl%esi, %edi
> cmc
> movl%edi, o1(%rip)
> adcl%ecx, %edx
> setc%al
> movl%edx, o2(%rip)
> movzbl  %al, %eax
> ret
>
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline?
>
>
> 2022-06-03  Roger Sayle  
>
> gcc/ChangeLog
> * config/i386/i386-expand.cc (ix86_expand_builtin) :
> Use new x86_stc or negqi_ccc_1 instructions to set the carry flag.
> * config/i386/i386.h (TARGET_SLOW_STC): New define.
> * config/i386/i386.md (UNSPEC_CLC): New UNSPEC for clc.
> (UNSPEC_STC): New UNSPEC for stc.
> (UNSPEC_CMC): New UNSPEC for cmc.
> (*x86_clc): New define_insn.
> (*x86_clc_xor): New define_insn for pentium4 without -Os.
> (x86_stc): New define_insn.
> (define_split): Convert x86_stc into alternate implementation
> on pentium4.
> (x86_cmc): New define_insn.
> (*x86_cmc_1): New define_insn_and_split to recognize cmc pattern.
> (*setcc_qi_negqi_ccc_1_): New define_insn_and_split to
> recognize (and eliminate) the carry flag being copied to itself.
> (*setcc_qi_negqi_ccc_2_): Likewise.
> (neg_ccc_1): Renamed from *neg_ccc_1 for gen function.
> * config/i386/x86-tune.def (X86_TUNE_SLOW_STC): New tuning flag.
>
> gcc/testsuite/ChangeLog
> * gcc.target/i386/cmc-1.c: New test case.
> * gcc.target/i386/stc-1.c: Likewise.

+;; Clear carry flag.
+(define_insn "*x86_clc"
+  [(set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_CLC))]
+  "!TARGET_SLOW_STC || optimize_function_for_size_p (cfun)"
+  "clc"
+  [(set_attr "length" "1")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
+(define_insn "*x86_clc_xor"
+  [(set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_CLC))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "TARGET_SLOW_STC && !optimize_function_for_size_p (cfun)"
+  "xor{l}\t%0, %0"
+[(set_attr "type" "alu1")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "0")])

I think the above would be better implemented as a peephole2 pattern
that triggers when a register is available. We should not waste a
register on a register starved x86_32 just to set a carry flag. This
is implemented with:

  [(match_scratch:SI 2 "r")

at the beginning of the peephole2 pattern that generates x86_clc_xor.
The pattern should be constrained with "TARGET_SLOW_STC &&
!optimize_function_for_size_p()" and x86_clc_xor should be available
only after reload (like e.g. "*mov_xor").

+;; On Pentium 4, set the carry flag using mov $1,%al;neg %al.
+(define_split
+  [(set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))]
+  "TARGET_SLOW_STC
+   && !optimize_insn_for_size_p ()
+   && can_create_pseudo_p ()"
+  [(set (match_dup 0) (const_int 1))
+   (parallel
+ [(set (reg:CCC FLAGS_REG)
+   (unspec:CCC [(match_dup 0) (const_int 0)] UNSPEC_CC_NE))
+  (set (match_dup 0) (neg:QI (match_dup 0)))])]
+  "ope

[RFA] Improve strcmp expansion when one input is a constant string.

2023-06-04 Thread Jeff Law via Gcc-patches
While investigating a RISC-V backend patch from Jivan I noticed a 
regression in terms of dynamic instruction counts for the omnetpp 
benchmark in spec2017.


https://gcc.gnu.org/pipermail/gcc-patches/2023-June/620577.html

The code we we with Jivan's patch at expansion time looks like this for 
each character in the input string:




(insn 6 5 7 (set (reg:SI 137)
(zero_extend:SI (mem:QI (reg/v/f:DI 135 [ x ]) [0 MEM 
 [(void *)x_2(D)]+0 S1 A8]))) "j.c":5:11 -1

 (nil))

(insn 7 6 8 (set (reg:DI 138)
(sign_extend:DI (plus:SI (reg:SI 137)
(const_int -108 [0xff94] "j.c":5:11 -1
 (nil))

(insn 8 7 9 (set (reg:SI 136)
(subreg/s/u:SI (reg:DI 138) 0)) "j.c":5:11 -1
 (expr_list:REG_EQUAL (plus:SI (reg:SI 137)
(const_int -108 [0xff94]))
(nil)))

(insn 9 8 10 (set (reg:DI 139)
(sign_extend:DI (reg:SI 136))) "j.c":5:11 -1
 (nil))

(jump_insn 10 9 11 (set (pc)
(if_then_else (ne (reg:DI 139)
(const_int 0 [0]))
(label_ref 64)
(pc))) "j.c":5:11 -1
 (nil))


Ignore insn 9.  fwprop will turn it into a trivial copy from r138->r139 
which will ultimately propagate away.



All the paths eventually transfer to control to the label in question, 
either by jumping or falling thru on the last character.  After a bit of 
cleanup by fwprop & friends we have:





(insn 6 3 7 2 (set (reg:SI 137 [ MEM  [(void *)x_2(D)] ])
(zero_extend:SI (mem:QI (reg/v/f:DI 135 [ x ]) [0 MEM  [(void 
*)x_2(D)]+0 S1 A8]))) "j.c":5:11 114 {zero_extendqisi2}
 (nil)) 
(insn 7 6 8 2 (set (reg:DI 138)

(sign_extend:DI (plus:SI (reg:SI 137 [ MEM  [(void 
*)x_2(D)] ])
(const_int -108 [0xff94] "j.c":5:11 6 
{addsi3_extended}
 (expr_list:REG_DEAD (reg:SI 137 [ MEM  [(void *)x_2(D)] ])
(nil)))
(insn 8 7 10 2 (set (reg:SI 136 [ MEM  [(void *)x_2(D)]+11 ])
(subreg/s/u:SI (reg:DI 138) 0)) "j.c":5:11 180 {*movsi_internal}
 (nil))
(jump_insn 10 8 73 2 (set (pc)
(if_then_else (ne (reg:DI 138)
(const_int 0 [0]))
(label_ref 64)
(pc))) "j.c":5:11 243 {*branchdi}
 (expr_list:REG_DEAD (reg:DI 138)
(int_list:REG_BR_PROB 536870916 (nil)))
 -> 64)



insn 8 is the result of wanting the ultimate result of the strcmp to be 
an "int" type (SImode).Note that (reg 136) is the result of the 
strcmp.  It gets set in each fragment of code that compares one element 
in the string.  It's also live after the strcmp sequence.   As a result 
combine isn't going to be able to clean this up.


Note how (reg 136) births while (reg 138) is live and even though (reg 
136) is a copy of (reg 138), IRA doesn't have the necessary code to 
determine that the regs do not conflict.  As a result (reg 136) and (reg 
138) must be allocated different hard registers and we get code like this:



lbu a5,0(a0)# 6 [c=28 l=4]  zero_extendqisi2/1
addiw   a5,a5,-108  # 7 [c=8 l=4]  addsi3_extended/1
mv  a4,a5   # 8 [c=4 l=4]  *movsi_internal/0
bne a5,zero,.L2 # 10[c=4 l=4]  *branchdi


Note the annoying "mv".


Rather than do a conversion for each character, we could do each step in 
word_mode and do the conversion once at the end of the whole sequence.


So for each character we expand to:


(insn 6 5 7 (set (reg:DI 138)
(zero_extend:DI (mem:QI (reg/v/f:DI 135 [ x ]) [0 MEM  [(void 
*)x_2(D)]+0 S1 A8]))) "j.c":5:11 -1
 (nil))

(insn 7 6 8 (set (reg:DI 137)
(plus:DI (reg:DI 138)
(const_int -108 [0xff94]))) "j.c":5:11 -1
 (nil))

(jump_insn 8 7 9 (set (pc)
(if_then_else (ne (reg:DI 137)
(const_int 0 [0]))
(label_ref 41)
(pc))) "j.c":5:11 -1
 (nil))


Good.  Then at the end of the sequence we have:

(code_label 41 40 42 2 (nil) [0 uses])

(insn 42 41 43 (set (reg:SI 136)
(subreg:SI (reg:DI 137) 0)) "j.c":5:11 -1
 (nil))


Which seems like exactly what we want.  At the assembly level we get:
lbu a5,0(a0)# 6 [c=28 l=4]  zero_extendqidi2/1
addia0,a5,-108  # 7 [c=4 l=4]  adddi3/1
bne a0,zero,.L2 # 8 [c=4 l=4]  *branchdi
[ ... ]

At the end of the sequence we realize the narrowing subreg followed by 
an extnesion isn't necessary and just remove them.


The ultimate result is omnetpp goes from a small regression to a small 
overall improvement with Jivan's patch.


Bootstrapped and regression tested on x86.  Also built and run spec2017 
on riscv64.


OK for the trunk?

Jeffdiff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 8400adaf5b4..f2e0d3b7d7f 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -7135,6 +7135,9 @@ inline_string_cmp (rtx target, tree var_str, const char 
*const_str,
   scalar_int_mode unit_mode
 = as_a  TYPE_MODE (unit_type_node);
 
+  /* We do

Re: [PATCH] VECT: Add SELECT_VL support

2023-06-04 Thread Richard Sandiford via Gcc-patches
Sorry for the slow review.

I don't know the IV-related parts well enough to review those properly,
but they looked reasonable to me.  Hopefully Richi can comment.

I'm curious though.  For:

> +  tree step = vect_dr_behavior (vinfo, dr_info)->step;
> +
> +  [...]
> +  poly_uint64 bytesize = GET_MODE_SIZE (element_mode (aggr_type));
> +  /* Since the outcome of .SELECT_VL is element size, we should adjust
> + it into bytesize so that it can be used in address pointer variable
> + amount IVs adjustment.  */
> +  tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
> +   build_int_cst (len_type, bytesize));
> +  if (tree_int_cst_sgn (step) == -1)
> +tmp = fold_build1 (NEGATE_EXPR, len_type, tmp);

Could you not just multiply loop_len by step, probably written as:

  build_int_cst (len_type, wi::to_widest (step))

avoiding the NEGATE_EXPR and bytesize calculation?  step should
represent the step of the original scalar IV, so doing that feels
more direct.

The loop-control bits look good to me apart from one hunk:

> @@ -2737,6 +2738,14 @@ start_over:
>   LOOP_VINFO_VECT_FACTOR (loop_vinfo
>  LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo) = true;
>  
> +  /* If we're using decrement IV and SELECT_VL is supported by the target.
> + Use output of SELECT_VL to adjust IV of loop control and data reference.
> + Note: We only use SELECT_VL on single-rgroup control.  */
> +  if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
> +  && LOOP_VINFO_LENS (loop_vinfo).length () == 1
> +  && !slp)
> +LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = true;
> +
>/* If we're vectorizing an epilogue loop, the vectorized loop either needs
>   to be able to handle fewer than VF scalars, or needs to have a lower VF
>   than the main loop.  */

This test also needs to check that the target implements the select_vl
optab for the chosen iv type.  You can check that using
direct_internal_fn_supported_p.

We should also check that LOOP_VINFO_LENS (loop_vinfo)[0].factor == 1,
since the IV update multiplies by the size in bytes.

I think it would be worth saying in more detail why we only use SELECT_VL
for single rgroups.  I assume the reason is to simplify the pointer IV
updates.  Is that right?

That is: the multiple length controls that are currently generated from
a MIN_EXPR IV should also work with a SELECT_VL IV.  The difficulty is
that an rgroup that controls N vector loads (say) would need N pointer
updates by variable amounts.  But I'm not 100% sure whether we're
avoiding that situation because it's difficult to code, or because
it's inefficient.  Or maybe we're avoiding it because it doesn't
fit well with the later RVV vsetvl pass.

Thanks,
Richard


Re: Re: [PATCH] VECT: Add SELECT_VL support

2023-06-04 Thread 钟居哲
Hi, Richard. Thanks for the comments.

>> I think it would be worth saying in more detail why we only use SELECT_VL
>> for single rgroups.  I assume the reason is to simplify the pointer IV
>> updates.  Is that right?

Yes.

>> The difficulty is
>> that an rgroup that controls N vector loads (say) would need N pointer
>> updates by variable amounts.  But I'm not 100% sure whether we're
>> avoiding that situation because it's difficult to code, or because
>> it's inefficient.  Or maybe we're avoiding it because it doesn't
>> fit well with the later RVV vsetvl pass.
I don't want to use SELECT_VL pattern for multiple rgroups since
it's really ineffecient and also not fit well with latter RVV vsetvl PASS. 
(I have a draft in my downstream, the loop body becomes very ugly with a
 lot of instructions to adjust IVs).

Since we define SELECT_VL as a flexible pattern that doesn't have the side 
effect
set vector length, we need much more scalar operations to adjust the pointer IVs
and it cause more vsetvli than just using MIN.

Also, it changes a lot in middle-end and make middle-end codes too ugly and no
benefits I see so far.

Current approach (MIN), I think the current codegen is good (even though may 
not be perfect).

Besides, LLVM only can handle one vector length (in GCC, we call multiple 
rgroup).

I think RVV GCC is already in a good shape now in case of loop control.

I'd rather support all RVV auto-vectorization features soon and focus on 
optimizing loopVectorizer
(For example, I known GCC in TSVC has 46 fails, fixing failed vectorization 
case will improve much more,
 I think it can also improve ARM. ).

Address comments and will send the next patch, really appreciate it.

Thanks.  


juzhe.zh...@rivai.ai
 
From: Richard Sandiford
Date: 2023-06-05 05:59
To: juzhe.zhong
CC: gcc-patches; rguenther
Subject: Re: [PATCH] VECT: Add SELECT_VL support
Sorry for the slow review.
 
I don't know the IV-related parts well enough to review those properly,
but they looked reasonable to me.  Hopefully Richi can comment.
 
I'm curious though.  For:
 
> +  tree step = vect_dr_behavior (vinfo, dr_info)->step;
> +
> +  [...]
> +  poly_uint64 bytesize = GET_MODE_SIZE (element_mode (aggr_type));
> +  /* Since the outcome of .SELECT_VL is element size, we should adjust
> + it into bytesize so that it can be used in address pointer variable
> + amount IVs adjustment.  */
> +  tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
> +   build_int_cst (len_type, bytesize));
> +  if (tree_int_cst_sgn (step) == -1)
> +tmp = fold_build1 (NEGATE_EXPR, len_type, tmp);
 
Could you not just multiply loop_len by step, probably written as:
 
  build_int_cst (len_type, wi::to_widest (step))
 
avoiding the NEGATE_EXPR and bytesize calculation?  step should
represent the step of the original scalar IV, so doing that feels
more direct.
 
The loop-control bits look good to me apart from one hunk:
 
> @@ -2737,6 +2738,14 @@ start_over:
>  LOOP_VINFO_VECT_FACTOR (loop_vinfo
>  LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo) = true;
>  
> +  /* If we're using decrement IV and SELECT_VL is supported by the target.
> + Use output of SELECT_VL to adjust IV of loop control and data reference.
> + Note: We only use SELECT_VL on single-rgroup control.  */
> +  if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
> +  && LOOP_VINFO_LENS (loop_vinfo).length () == 1
> +  && !slp)
> +LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = true;
> +
>/* If we're vectorizing an epilogue loop, the vectorized loop either needs
>   to be able to handle fewer than VF scalars, or needs to have a lower VF
>   than the main loop.  */
 
This test also needs to check that the target implements the select_vl
optab for the chosen iv type.  You can check that using
direct_internal_fn_supported_p.
 
We should also check that LOOP_VINFO_LENS (loop_vinfo)[0].factor == 1,
since the IV update multiplies by the size in bytes.
 
I think it would be worth saying in more detail why we only use SELECT_VL
for single rgroups.  I assume the reason is to simplify the pointer IV
updates.  Is that right?
 
That is: the multiple length controls that are currently generated from
a MIN_EXPR IV should also work with a SELECT_VL IV.  The difficulty is
that an rgroup that controls N vector loads (say) would need N pointer
updates by variable amounts.  But I'm not 100% sure whether we're
avoiding that situation because it's difficult to code, or because
it's inefficient.  Or maybe we're avoiding it because it doesn't
fit well with the later RVV vsetvl pass.
 
Thanks,
Richard
 


RE: [PATCH] RISC-V: Split arguments of expand_vec_perm

2023-06-04 Thread Li, Pan2 via Gcc-patches
Committed, thanks Jeff.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Sunday, June 4, 2023 11:28 PM
To: juzhe.zh...@rivai.ai; gcc-patches@gcc.gnu.org
Cc: kito.ch...@sifive.com; pal...@rivosinc.com; rdapp@gmail.com
Subject: Re: [PATCH] RISC-V: Split arguments of expand_vec_perm



On 6/4/23 03:25, juzhe.zh...@rivai.ai wrote:
> From: Juzhe-Zhong 
> 
> Since the following patch will calls expand_vec_perm with splitted 
> arguments, change the expand_vec_perm interface in this patch.
> 
> gcc/ChangeLog:
> 
>  * config/riscv/autovec.md: Split arguments.
>  * config/riscv/riscv-protos.h (expand_vec_perm): Ditto.
>  * config/riscv/riscv-v.cc (expand_vec_perm): Ditto.
OK
jeff


RE: [NFC] RISC-V: Reorganize riscv-v.cc

2023-06-04 Thread Li, Pan2 via Gcc-patches
Committed, thanks Jeff.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Sunday, June 4, 2023 11:40 PM
To: juzhe.zh...@rivai.ai; gcc-patches@gcc.gnu.org
Cc: kito.ch...@sifive.com; pal...@rivosinc.com; rdapp@gmail.com
Subject: Re: [NFC] RISC-V: Reorganize riscv-v.cc



On 6/4/23 03:11, juzhe.zh...@rivai.ai wrote:
> From: Juzhe-Zhong 
> 
> This patch is just reorganizing the functions for the following patch.
> 
> I put rvv_builder and emit_* functions located before 
> expand_const_vector function since I will use them in expand_const_vector in 
> the following patch.
> 
> gcc/ChangeLog:
> 
>  * config/riscv/riscv-v.cc (class rvv_builder): Reorganize functions.
>  (rvv_builder::can_duplicate_repeating_sequence_p): Ditto.
>  (rvv_builder::repeating_sequence_use_merge_profitable_p): Ditto.
>  (rvv_builder::get_merged_repeating_sequence): Ditto.
>  (rvv_builder::get_merge_scalar_mask): Ditto.
>  (emit_scalar_move_insn): Ditto.
>  (emit_vlmax_integer_move_insn): Ditto.
>  (emit_nonvlmax_integer_move_insn): Ditto.
>  (emit_vlmax_gather_insn): Ditto.
>  (emit_vlmax_masked_gather_mu_insn): Ditto.
>  (get_repeating_sequence_dup_machine_mode): Ditto.
OK
jeff



RE: [PATCH] RISCV: Add -m(no)-omit-leaf-frame-pointer support.

2023-06-04 Thread Li, Pan2 via Gcc-patches
Some nit comments.

+static bool
+riscv_frame_pointer_required (void)
+{
+  if (riscv_save_frame_pointer && !crtl->is_leaf)
+return true;
+
+  return false;
+}

Can be simplified to return riscv_save_frame_pointer && !crtl->is_leaf;

+  riscv_save_frame_pointer = false;
+  if (TARGET_OMIT_LEAF_FRAME_POINTER_P (global_options.x_target_flags))
+{
+  if (!global_options.x_flag_omit_frame_pointer)
+   riscv_save_frame_pointer = true;
+
+  global_options.x_flag_omit_frame_pointer = 1;
+}

Does this mean if omit_leaf_frame will also set the omit_frame_pointer 
implicitly?

Pan


-Original Message-
From: Wang, Yanzhang  
Sent: Friday, June 2, 2023 3:07 PM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@sifive.com; Li, Pan2 ; 
Wang, Yanzhang 
Subject: [PATCH] RISCV: Add -m(no)-omit-leaf-frame-pointer support.

From: Yanzhang Wang 

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_save_reg_p): Save ra for leaf
  when enabling -mno-omit-leaf-frame-pointer
(riscv_option_override): Override omit-frame-pointer.
(riscv_frame_pointer_required): Save s0 for non-leaf function
(TARGET_FRAME_POINTER_REQUIRED): Override defination
* config/riscv/riscv.opt: Add option support.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/omit-frame-pointer-1.c: New test.
* gcc.target/riscv/omit-frame-pointer-2.c: New test.
* gcc.target/riscv/omit-frame-pointer-3.c: New test.
* gcc.target/riscv/omit-frame-pointer-4.c: New test.
* gcc.target/riscv/omit-frame-pointer-test.c: New test.

Signed-off-by: Yanzhang Wang 
---
 gcc/config/riscv/riscv.cc | 31 ++-
 gcc/config/riscv/riscv.opt|  4 +++
 .../gcc.target/riscv/omit-frame-pointer-1.c   |  7 +
 .../gcc.target/riscv/omit-frame-pointer-2.c   |  7 +
 .../gcc.target/riscv/omit-frame-pointer-3.c   |  7 +
 .../gcc.target/riscv/omit-frame-pointer-4.c   |  7 +
 .../riscv/omit-frame-pointer-test.c   | 13 
 7 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/omit-frame-pointer-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/omit-frame-pointer-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/omit-frame-pointer-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/omit-frame-pointer-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/omit-frame-pointer-test.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5d2550871c7..e02f9cb50a4 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -408,6 +408,10 @@ static const struct riscv_tune_info 
riscv_tune_info_table[] = {
 #include "riscv-cores.def"
 };
 
+/* Global variable to distinguish whether we should save and restore s0/fp for
+   function.  */
+static bool riscv_save_frame_pointer;
+
 void riscv_frame_info::reset(void)
 {
   total_size = 0;
@@ -4744,7 +4748,11 @@ riscv_save_reg_p (unsigned int regno)
   if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
 return true;
 
-  if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return)
+  /* Need not to use ra for leaf when frame pointer is turned off by option
+ whatever the omit-leaf-frame's value.  */
+  bool keep_leaf_ra = frame_pointer_needed && crtl->is_leaf
+&& !TARGET_OMIT_LEAF_FRAME_POINTER;
+  if (regno == RETURN_ADDR_REGNUM && (crtl->calls_eh_return || keep_leaf_ra))
 return true;
 
   /* If this is an interrupt handler, then must save extra registers.  */
@@ -6287,6 +6295,15 @@ riscv_option_override (void)
   if (flag_pic)
 riscv_cmodel = CM_PIC;
 
+  riscv_save_frame_pointer = false;
+  if (TARGET_OMIT_LEAF_FRAME_POINTER_P (global_options.x_target_flags))
+{
+  if (!global_options.x_flag_omit_frame_pointer)
+   riscv_save_frame_pointer = true;
+
+  global_options.x_flag_omit_frame_pointer = 1;
+}
+
   /* We get better code with explicit relocs for CM_MEDLOW, but
  worse code for the others (for now).  Pick the best default.  */
   if ((target_flags_explicit & MASK_EXPLICIT_RELOCS) == 0)
@@ -7158,6 +7175,15 @@ riscv_zero_call_used_regs (HARD_REG_SET 
need_zeroed_hardregs)
& ~zeroed_hardregs);
 }
 
+static bool
+riscv_frame_pointer_required (void)
+{
+  if (riscv_save_frame_pointer && !crtl->is_leaf)
+return true;
+
+  return false;
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -7412,6 +7438,9 @@ riscv_zero_call_used_regs (HARD_REG_SET 
need_zeroed_hardregs)
 #undef TARGET_ZERO_CALL_USED_REGS
 #define TARGET_ZERO_CALL_USED_REGS riscv_zero_call_used_regs
 
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-riscv.h"
diff --git a/gcc/config/riscv/riscv.opt b/g

[PATCH] [x86] Add missing vec_pack/unpacks patterns for _Float16 <-> int/float conversion.

2023-06-04 Thread liuhongt via Gcc-patches
This patch only support vec_pack/unpacks optabs for vector modes whose lenth >= 
128.
For 32/64-bit vector, they're more hanlded by BB vectorizer with
truncmn2/extendmn2/fix{,uns}_truncmn2.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready to push to trunk.

gcc/ChangeLog:

* config/i386/sse.md (vec_pack_float_): New expander.
(vec_unpack_fix_trunc_lo_): Ditto.
(vec_unpack_fix_trunc_hi_): Ditto.
(vec_unpacks_lo_: Ditto.
(vec_unpacks_hi_: Ditto.
(sse_movlhps_): New define_insn.
(ssse3_palignr_perm): Extend to V_128H.
(V_128H): New mode iterator.
(ssepackPHmode): New mode attribute.
(vunpck_extract_mode>: Ditto.
(vpckfloat_concat_mode): Extend to VxSI/VxSF for _Float16.
(vpckfloat_temp_mode): Ditto.
(vpckfloat_op_mode): Ditto.
(vunpckfixt_mode): Extend to VxHF.
(vunpckfixt_model): Ditto.
(vunpckfixt_extract_mode): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/vec_pack_fp16-1.c: New test.
* gcc.target/i386/vec_pack_fp16-2.c: New test.
* gcc.target/i386/vec_pack_fp16-3.c: New test.
---
 gcc/config/i386/sse.md| 216 +-
 .../gcc.target/i386/vec_pack_fp16-1.c |  34 +++
 .../gcc.target/i386/vec_pack_fp16-2.c |   9 +
 .../gcc.target/i386/vec_pack_fp16-3.c |   8 +
 4 files changed, 258 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/vec_pack_fp16-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vec_pack_fp16-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vec_pack_fp16-3.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a92f50e96b5..1eb2dd077ff 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -291,6 +291,9 @@ (define_mode_iterator V
 (define_mode_iterator V_128
   [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
 
+(define_mode_iterator V_128H
+  [V16QI V8HI V8HF V8BF V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
+
 ;; All 256bit vector modes
 (define_mode_iterator V_256
   [V32QI V16HI V8SI V4DI V8SF V4DF])
@@ -1076,6 +1079,12 @@ (define_mode_attr ssePHmodelower
(V8DI "v8hf") (V4DI "v4hf") (V2DI "v2hf")
(V8DF "v8hf") (V16SF "v16hf") (V8SF "v8hf")])
 
+
+;; Mapping of vector modes to packed vector hf modes of same sized.
+(define_mode_attr ssepackPHmode
+  [(V16SI "V32HF") (V8SI "V16HF") (V4SI "V8HF")
+   (V16SF "V32HF") (V8SF "V16HF") (V4SF "V8HF")])
+
 ;; Mapping of vector modes to packed single mode of the same size
 (define_mode_attr ssePSmode
   [(V16SI "V16SF") (V8DF "V16SF")
@@ -6918,6 +6927,61 @@ (define_mode_attr qq2phsuff
(V16SF "") (V8SF "{y}") (V4SF "{x}")
(V8DF "{z}") (V4DF "{y}") (V2DF "{x}")])
 
+(define_mode_attr vunpck_extract_mode
+  [(V32HF "v32hf") (V16HF "v16hf") (V8HF "v16hf")])
+
+(define_expand "vec_unpacks_lo_"
+  [(match_operand: 0 "register_operand")
+   (match_operand:VF_AVX512FP16VL 1 "register_operand")]
+  "TARGET_AVX512FP16"
+{
+  rtx tem = operands[1];
+  rtx (*gen) (rtx, rtx);
+  if (mode != V8HFmode)
+{
+  tem = gen_reg_rtx (mode);
+  emit_insn (gen_vec_extract_lo_ (tem,
+  operands[1]));
+  gen = gen_extend2;
+}
+  else
+gen = gen_avx512fp16_float_extend_phv4sf2;
+
+  emit_insn (gen (operands[0], tem));
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_"
+  [(match_operand: 0 "register_operand")
+   (match_operand:VF_AVX512FP16VL 1 "register_operand")]
+  "TARGET_AVX512FP16"
+{
+  rtx tem = operands[1];
+  rtx (*gen) (rtx, rtx);
+  if (mode != V8HFmode)
+{
+  tem = gen_reg_rtx (mode);
+  emit_insn (gen_vec_extract_hi_ (tem,
+  operands[1]));
+  gen = gen_extend2;
+}
+  else
+{
+  tem = gen_reg_rtx (V8HFmode);
+  rtvec tmp = rtvec_alloc (8);
+  for (int i = 0; i != 8; i++)
+   RTVEC_ELT (tmp, i) = GEN_INT((i+4)%8);
+
+  rtx selector = gen_rtx_PARALLEL (VOIDmode, tmp);
+  emit_move_insn (tem,
+gen_rtx_VEC_SELECT (V8HFmode, operands[1], selector));
+  gen = gen_avx512fp16_float_extend_phv4sf2;
+}
+
+  emit_insn (gen (operands[0], tem));
+  DONE;
+})
+
 (define_insn 
"avx512fp16_vcvtph2_"
   [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
 (unspec:VI248_AVX512VL
@@ -8314,11 +8378,17 @@ (define_expand "floatv2div2sf2"
 })
 
 (define_mode_attr vpckfloat_concat_mode
-  [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
+  [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")
+   (V16SI "v32hf") (V8SI "v16hf") (V4SI "v16hf")
+   (V16SF "v32hf") (V8SF "v16hf") (V4SF "v16hf")])
 (define_mode_attr vpckfloat_temp_mode
-  [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
+  [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")
+   (V16SI "V16HF") (V8SI "V8HF") (V4SI "V8HF")
+   (V16SF "V16HF") (V8SF "V8HF") (V4SF "V8HF")])
 (define_mode_attr vpckfloat_op_mode
-  [(V8DI "v8sf"

[PATCH] MAINTAINERS: move Matthew Fortune to Write After Approval

2023-06-04 Thread YunQiang Su
In 4fe6e12204535545edf7f035d4dc79c1404058cf, I should have added
Matthew Fortune to the Write After Approval section, while replacing
the MIPS Maintainer position.

ChangeLog:

* MAINTAINERS (Write After Approval): move Matthew Fortune
to Write After Approval.
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c8b787b6e1e..89b9289830f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -412,6 +412,7 @@ Li Feng 

 Thomas Fitzsimmons 
 Alexander Fomin

 Brian Ford 
+Matthew Fortune
 John Freeman   
 Nathan Froyd   
 Chao-ying Fu   
-- 
2.30.2



Re: [PATCH] RISCV: Add -m(no)-omit-leaf-frame-pointer support.

2023-06-04 Thread Wang, Yanzhang via Gcc-patches
Hi Jeff,

Yes, there's a requirement to support backtrace based on the fp+ra.
And the unwind/cfa is not acceptable because it will add additional
sections to the binary. Currently, -fno-omit-frame-pointer can not
save the ra for the leaf function. So we need to add another option
like ARM/X86 to support consistent fp+ra stack layout for the leaf
and non-leaf functions.

Thanks,
Yanzhang

From: Jeff Law 
Sent: Saturday, June 3, 2023 10:43 AM
To: Wang, Yanzhang ; gcc-patches@gcc.gnu.org 

Cc: juzhe.zh...@rivai.ai ; kito.ch...@sifive.com 
; Li, Pan2 
Subject: Re: [PATCH] RISCV: Add -m(no)-omit-leaf-frame-pointer support.



On 6/2/23 01:07, yanzhang.wang--- via Gcc-patches wrote:
> From: Yanzhang Wang 
>
> gcc/ChangeLog:
>
>* config/riscv/riscv.cc (riscv_save_reg_p): Save ra for leaf
>  when enabling -mno-omit-leaf-frame-pointer
>(riscv_option_override): Override omit-frame-pointer.
>(riscv_frame_pointer_required): Save s0 for non-leaf function
>(TARGET_FRAME_POINTER_REQUIRED): Override defination
>* config/riscv/riscv.opt: Add option support.
>
> gcc/testsuite/ChangeLog:
>
>* gcc.target/riscv/omit-frame-pointer-1.c: New test.
>* gcc.target/riscv/omit-frame-pointer-2.c: New test.
>* gcc.target/riscv/omit-frame-pointer-3.c: New test.
>* gcc.target/riscv/omit-frame-pointer-4.c: New test.
>* gcc.target/riscv/omit-frame-pointer-test.c: New test.
Not ACKing or NAKing at this time.

Why do you want this feature?

jeff


Re: [PATCH] inline: improve internal function costs

2023-06-04 Thread Jan Hubicka via Gcc-patches
> On Thu, 1 Jun 2023, Andre Vieira (lists) wrote:
> 
> > Hi,
> > 
> > This is a follow-up of the internal function patch to add widening and
> > narrowing patterns.  This patch improves the inliner cost estimation for
> > internal functions.
> 
> I have no idea why calls are special in IPA analyze_function_body
> and so I cannot say whether treating all internal fn calls as
> non-calls is correct there.  Honza?

The reason is that normal statements are acconted as part of the
function body, while calls have their costs attached to call edges
(so it can be adjusted when call is inlined to otherwise optimized).

However since internal functions have no cgraph edges, this looks like
a bug that we do not test it.  (the code was written before internal
calls was introduced).

I wonder if we don't want to have is_noninternal_gimple_call that could
be used by IPA code to test whether cgraph edge should exist for
the statement.
> 
> The tree-inline.cc change is OK though (you can push that separately).
The rest is OK too.
Honza
> 
> Thanks,
> Richard.
> 
> > Bootstrapped and regression tested on aarch64-unknown-linux-gnu.
> > 
> > gcc/ChangeLog:
> > 
> > * ipa-fnsummary.cc (analyze_function_body): Correctly handle
> > non-zero costed internal functions.
> > * tree-inline.cc (estimate_num_insns): Improve costing for internal
> > functions.
> > 
> 
> -- 
> Richard Biener 
> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
> Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
> HRB 36809 (AG Nuernberg)


RE: [PATCH] RISCV: Add -m(no)-omit-leaf-frame-pointer support.

2023-06-04 Thread Wang, Yanzhang via Gcc-patches
> +static bool
> +riscv_frame_pointer_required (void)
> +{
> +  if (riscv_save_frame_pointer && !crtl->is_leaf)
> +return true;
> +
> +  return false;
> +}
> 
> Can be simplified to return riscv_save_frame_pointer && !crtl->is_leaf;

Nice. It's much simpler. Will modify in another patch.

> +  riscv_save_frame_pointer = false;
> +  if (TARGET_OMIT_LEAF_FRAME_POINTER_P (global_options.x_target_flags))
> +{
> +  if (!global_options.x_flag_omit_frame_pointer)
> + riscv_save_frame_pointer = true;
> +
> +  global_options.x_flag_omit_frame_pointer = 1;
> +}
> 
> Does this mean if omit_leaf_frame will also set the omit_frame_pointer
> implicitly?
>

For the flag it's yes but for the behavior it's no. The behavior still is
based on the flag of omit-frame-pointer's value.

- ON, than the frame pointer of non-leaf functions will be omitted.
- OFF(no), than the frame pointer of non-leaf functions will not be omitted.

In the other words, if we want to omit the leaf frame pointers,

- if we want to omit the non-leaf fp too, we need only save the ra for the 
non-leaf.
- if we don't, we need to save the fp+ra for the non-leaf but no fp+ra for the 
leaf.

We need to override the option (x_flag_omit_frame_pointer) because it's the
first priority when determine whether the frame pointer is needed. If it's
turned off, the frame pointer will be saved for leaf functions too even
though we turn on the omit-leaf-frame-pointer.

To distinguish the two scenarios above, we need to add another variable to
save the flag user set originally otherwise it will be threw away.

Yanzhang

> -Original Message-
> From: Li, Pan2 
> Sent: Monday, June 5, 2023 9:04 AM
> To: Wang, Yanzhang ; gcc-patches@gcc.gnu.org
> Cc: juzhe.zh...@rivai.ai; kito.ch...@sifive.com
> Subject: RE: [PATCH] RISCV: Add -m(no)-omit-leaf-frame-pointer support.
> 
> Some nit comments.
> 
> +static bool
> +riscv_frame_pointer_required (void)
> +{
> +  if (riscv_save_frame_pointer && !crtl->is_leaf)
> +return true;
> +
> +  return false;
> +}
> 
> Can be simplified to return riscv_save_frame_pointer && !crtl->is_leaf;
> 
> +  riscv_save_frame_pointer = false;
> +  if (TARGET_OMIT_LEAF_FRAME_POINTER_P (global_options.x_target_flags))
> +{
> +  if (!global_options.x_flag_omit_frame_pointer)
> + riscv_save_frame_pointer = true;
> +
> +  global_options.x_flag_omit_frame_pointer = 1;
> +}
> 
> Does this mean if omit_leaf_frame will also set the omit_frame_pointer
> implicitly?
> 
> Pan
> 
> 
> -Original Message-
> From: Wang, Yanzhang 
> Sent: Friday, June 2, 2023 3:07 PM
> To: gcc-patches@gcc.gnu.org
> Cc: juzhe.zh...@rivai.ai; kito.ch...@sifive.com; Li, Pan2
> ; Wang, Yanzhang 
> Subject: [PATCH] RISCV: Add -m(no)-omit-leaf-frame-pointer support.
> 
> From: Yanzhang Wang 
> 
> gcc/ChangeLog:
> 
>   * config/riscv/riscv.cc (riscv_save_reg_p): Save ra for leaf
> when enabling -mno-omit-leaf-frame-pointer
>   (riscv_option_override): Override omit-frame-pointer.
>   (riscv_frame_pointer_required): Save s0 for non-leaf function
>   (TARGET_FRAME_POINTER_REQUIRED): Override defination
>   * config/riscv/riscv.opt: Add option support.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/riscv/omit-frame-pointer-1.c: New test.
>   * gcc.target/riscv/omit-frame-pointer-2.c: New test.
>   * gcc.target/riscv/omit-frame-pointer-3.c: New test.
>   * gcc.target/riscv/omit-frame-pointer-4.c: New test.
>   * gcc.target/riscv/omit-frame-pointer-test.c: New test.
> 
> Signed-off-by: Yanzhang Wang 
> ---
>  gcc/config/riscv/riscv.cc | 31 ++-
>  gcc/config/riscv/riscv.opt|  4 +++
>  .../gcc.target/riscv/omit-frame-pointer-1.c   |  7 +
>  .../gcc.target/riscv/omit-frame-pointer-2.c   |  7 +
>  .../gcc.target/riscv/omit-frame-pointer-3.c   |  7 +
>  .../gcc.target/riscv/omit-frame-pointer-4.c   |  7 +
>  .../riscv/omit-frame-pointer-test.c   | 13 
>  7 files changed, 75 insertions(+), 1 deletion(-)  create mode 100644
> gcc/testsuite/gcc.target/riscv/omit-frame-pointer-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/omit-frame-pointer-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/omit-frame-pointer-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/omit-frame-pointer-4.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/omit-frame-pointer-
> test.c
> 
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index
> 5d2550871c7..e02f9cb50a4 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -408,6 +408,10 @@ static const struct riscv_tune_info
> riscv_tune_info_table[] = {  #include "riscv-cores.def"
>  };
> 
> +/* Global variable to distinguish whether we should save and restore
> s0/fp for
> +   function.  */
> +static bool riscv_save_frame_pointer;
> +
>  void riscv_frame_info::reset(void)
>  {
>total_size = 0;
> @@ -4744,7 +4

[PATCH V2] VECT: Add SELECT_VL support

2023-06-04 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch address comments from Richard and rebase to trunk.

This patch is adding SELECT_VL middle-end support
allow target have target dependent optimization in case of
length calculation.

This patch is inspired by RVV ISA and LLVM:
https://reviews.llvm.org/D99750

The SELECT_VL is same behavior as LLVM "get_vector_length" with
these following properties:

1. Only apply on single-rgroup.
2. non SLP.
3. adjust loop control IV.
4. adjust data reference IV.
5. allow non-vf elements processing in non-final iteration

Code:
   # void vvaddint32(size_t n, const int*x, const int*y, int*z)
# { for (size_t i=0; i
-  _36 = MIN_EXPR ;
+  _36 = (MIN_EXPR | SELECT_VL) ;
   ...
   vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
   ...
@@ -549,15 +549,28 @@ vect_set_loop_controls_directly (class loop *loop, 
loop_vec_info loop_vinfo,
   tree step = rgc->controls.length () == 1 ? rgc->controls[0]
   : make_ssa_name (iv_type);
   /* Create decrement IV.  */
-  create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
-&incr_gsi, insert_after, &index_before_incr,
-&index_after_incr);
-  gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
-   index_before_incr,
-   nitems_step));
+  if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+   {
+ create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
+insert_after, &index_before_incr, &index_after_incr);
+ tree len = gimple_build (header_seq, IFN_SELECT_VL, iv_type,
+  index_before_incr, nitems_step);
+ gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len));
+   }
+  else
+   {
+ create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
+&incr_gsi, insert_after, &index_before_incr,
+&index_after_incr);
+ gimple_seq_add_stmt (header_seq,
+  gimple_build_assign (step, MIN_EXPR,
+   index_before_incr,
+   nitems_step));
+   }
   *iv_step = step;
   *compare_step = nitems_step;
-  return index_before_incr;
+  return LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? index_after_incr
+  : index_before_incr;
 }
 
   /* Create increment IV.  */
@@ -888,7 +901,8 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
   /* Get a boolean result that tells us whether to iterate.  */
   edge exit_edge = single_exit (loop);
   gcond *cond_stmt;
-  if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
+  if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
+  && !LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
 {
   gcc_assert (compare_step);
   tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : 
GT_EXPR;
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 5b7a0da0034..68c3432c0a4 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -974,6 +974,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
vec_info_shared *shared)
 can_use_partial_vectors_p (param_vect_partial_vector_usage != 0),
 using_partial_vectors_p (false),
 using_decrementing_iv_p (false),
+using_select_vl_p (false),
 epil_using_partial_vectors_p (false),
 partial_load_store_bias (0),
 peeling_for_gaps (false),
@@ -2737,6 +2738,53 @@ start_over:
LOOP_VINFO_VECT_FACTOR (loop_vinfo
 LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo) = true;
 
+  /* If we're using decrement IV approach in loop control, we can use output of
+ SELECT_VL to adjust IV of loop control and data reference when it 
satisfies
+ the following checks:
+
+ (a) SELECT_VL is supported by the target.
+ (b) LOOP_VINFO is single-rgroup control.
+ (c) non-SLP.
+ (d) LOOP can not be unrolled.
+
+ Otherwise, we use MIN_EXPR approach.
+
+ 1. We only apply SELECT_VL on single-rgroup since:
+
+ (1). Multiple-rgroup controls N vector loads/stores would need N pointer
+ updates by variable amounts.
+ (2). SELECT_VL allows flexible length (<=VF) in each iteration.
+ (3). For decrement IV approach, we calculate the MAX length of the loop
+ and then deduce the length of each control from this MAX length.
+
+ Base on (1), (2) and (3) situations, if we try to use SELECT_VL on
+ multiple-rgroup control, we need to generate multiple SELECT_VL to
+ carefully adjust length of each control. Such approach is very inefficient
+ and unprofitable for targets that are using a standalone instruction
+ to configure the length of each 

[PATCH] Fix PR 110085: `make clean` in GCC directory on sh target causes a failure

2023-06-04 Thread Andrew Pinski via Gcc-patches
On sh target, there is a MULTILIB_DIRNAMES (or is it MULTILIB_OPTIONS) named m2,
this conflicts with the langauge m2. So when you do a `make clean`, it will 
remove
the m2 directory and then a build will fail. Now since r0-78222-gfa9585134f6f58,
the multilib directories are no longer created in the gcc directory as libgcc
was moved to the toplevel. So we can remove the part of clean that removes those
directories.

Tested on x86_64-linux-gnu and a cross to sh-elf that `make clean` followed by
`make` works again.

OK?

gcc/ChangeLog:

PR bootstrap/110085
* Makefile.in (clean): Remove the removing of
MULTILIB_DIR/MULTILIB_OPTIONS directories.
---
 gcc/Makefile.in | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 1d39e6dd3f8..0c02f312985 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3622,13 +3622,6 @@ clean: mostlyclean lang.clean
-rm -f doc/*.pdf
 # Delete the include directories.
-rm -rf include include-fixed
-# Delete files used by the "multilib" facility (including libgcc subdirs).
-   -rm -f multilib.h tmpmultilib*
-   -if [ "x$(MULTILIB_DIRNAMES)" != x ] ; then \
- rm -rf $(MULTILIB_DIRNAMES); \
-   else if [ "x$(MULTILIB_OPTIONS)" != x ] ; then \
- rm -rf `echo $(MULTILIB_OPTIONS) | sed -e 's/\// /g'`; \
-   fi ; fi
 
 # Delete all files that users would normally create
 # while building and installing GCC.
-- 
2.31.1



Re: [PATCH] Fix PR 110085: `make clean` in GCC directory on sh target causes a failure

2023-06-04 Thread Richard Biener via Gcc-patches



> Am 05.06.2023 um 06:42 schrieb Andrew Pinski via Gcc-patches 
> :
> 
> On sh target, there is a MULTILIB_DIRNAMES (or is it MULTILIB_OPTIONS) named 
> m2,
> this conflicts with the langauge m2. So when you do a `make clean`, it will 
> remove
> the m2 directory and then a build will fail. Now since 
> r0-78222-gfa9585134f6f58,
> the multilib directories are no longer created in the gcc directory as libgcc
> was moved to the toplevel. So we can remove the part of clean that removes 
> those
> directories.
> 
> Tested on x86_64-linux-gnu and a cross to sh-elf that `make clean` followed by
> `make` works again.
> 
> OK?

Ok

> gcc/ChangeLog:
> 
>PR bootstrap/110085
>* Makefile.in (clean): Remove the removing of
>MULTILIB_DIR/MULTILIB_OPTIONS directories.
> ---
> gcc/Makefile.in | 7 ---
> 1 file changed, 7 deletions(-)
> 
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index 1d39e6dd3f8..0c02f312985 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -3622,13 +3622,6 @@ clean: mostlyclean lang.clean
>-rm -f doc/*.pdf
> # Delete the include directories.
>-rm -rf include include-fixed
> -# Delete files used by the "multilib" facility (including libgcc subdirs).
> --rm -f multilib.h tmpmultilib*
> --if [ "x$(MULTILIB_DIRNAMES)" != x ] ; then \
> -  rm -rf $(MULTILIB_DIRNAMES); \
> -else if [ "x$(MULTILIB_OPTIONS)" != x ] ; then \
> -  rm -rf `echo $(MULTILIB_OPTIONS) | sed -e 's/\// /g'`; \
> -fi ; fi
> 
> # Delete all files that users would normally create
> # while building and installing GCC.
> -- 
> 2.31.1
> 


Re: [PATCH] libgcc: Use initarray section type for .init_stack

2023-06-04 Thread Kewen.Lin via Gcc-patches
on 2023/6/1 00:57, Ian Lance Taylor wrote:
> On Wed, May 31, 2023 at 12:41 AM Kewen.Lin via Gcc-patches
>  wrote:
>>
 libgcc/ChangeLog:

  * config/i386/morestack.S: Use @init_array rather than
  @progbits for section type of section .init_array.
  * config/rs6000/morestack.S: Likewise.
  * config/s390/morestack.S: Likewise.
>>>
>>> s390 parts are ok. I did run a bootstrap and regression. Looks all good. 
>>> Thanks!
>>
>> Thanks for testing this on s390, really appreciate!
>>
>> Hi Ian & Uros,
>>
>> Do you have any concerns on this, or does it look good to you?
> 
> This is OK.

Pushed in r14-1540-g83c3550ee96aa2, thanks all!

BR,
Kewen

> 
> Thanks.
> 
> Ian





Re: [PATCH] Fix PR 110085: `make clean` in GCC directory on sh target causes a failure

2023-06-04 Thread Andrew Pinski via Gcc-patches
On Sun, Jun 4, 2023 at 10:24 PM Richard Biener via Gcc-patches
 wrote:
>
>
>
> > Am 05.06.2023 um 06:42 schrieb Andrew Pinski via Gcc-patches 
> > :
> >
> > On sh target, there is a MULTILIB_DIRNAMES (or is it MULTILIB_OPTIONS) 
> > named m2,
> > this conflicts with the langauge m2. So when you do a `make clean`, it will 
> > remove
> > the m2 directory and then a build will fail. Now since 
> > r0-78222-gfa9585134f6f58,
> > the multilib directories are no longer created in the gcc directory as 
> > libgcc
> > was moved to the toplevel. So we can remove the part of clean that removes 
> > those
> > directories.
> >
> > Tested on x86_64-linux-gnu and a cross to sh-elf that `make clean` followed 
> > by
> > `make` works again.
> >
> > OK?
>
> Ok

Is a similar patch ok for GCC 13 branch as we would get a similar
failure there too?

Thanks,
Andrew

>
> > gcc/ChangeLog:
> >
> >PR bootstrap/110085
> >* Makefile.in (clean): Remove the removing of
> >MULTILIB_DIR/MULTILIB_OPTIONS directories.
> > ---
> > gcc/Makefile.in | 7 ---
> > 1 file changed, 7 deletions(-)
> >
> > diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> > index 1d39e6dd3f8..0c02f312985 100644
> > --- a/gcc/Makefile.in
> > +++ b/gcc/Makefile.in
> > @@ -3622,13 +3622,6 @@ clean: mostlyclean lang.clean
> >-rm -f doc/*.pdf
> > # Delete the include directories.
> >-rm -rf include include-fixed
> > -# Delete files used by the "multilib" facility (including libgcc subdirs).
> > --rm -f multilib.h tmpmultilib*
> > --if [ "x$(MULTILIB_DIRNAMES)" != x ] ; then \
> > -  rm -rf $(MULTILIB_DIRNAMES); \
> > -else if [ "x$(MULTILIB_OPTIONS)" != x ] ; then \
> > -  rm -rf `echo $(MULTILIB_OPTIONS) | sed -e 's/\// /g'`; \
> > -fi ; fi
> >
> > # Delete all files that users would normally create
> > # while building and installing GCC.
> > --
> > 2.31.1
> >


[PATCH 2/2] Handle const_int in expand_single_bit_test

2023-06-04 Thread Andrew Pinski via Gcc-patches
After expanding directly to rtl instead of
creating a tree, we could end up with
a const_int which is not ready to be handled
by extract_bit_field.
So need to the constant folding here instead.

OK? bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR middle-end/110117

gcc/ChangeLog:

* expr.cc (expand_single_bit_test): Handle
const_int from expand_expr.

gcc/testsuite/ChangeLog:

* gcc.dg/pr110117-1.c: New test.
* gcc.dg/pr110117-2.c: New test.
---
 gcc/expr.cc   | 10 +++---
 gcc/testsuite/gcc.dg/pr110117-1.c | 31 +++
 gcc/testsuite/gcc.dg/pr110117-2.c |  7 +++
 3 files changed, 45 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr110117-1.c
 create mode 100644 gcc/testsuite/gcc.dg/pr110117-2.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index ca008cd453e..868d812eb1a 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -12958,12 +12958,16 @@ expand_single_bit_test (location_t loc, enum 
tree_code code,
 
   rtx inner0 = expand_expr (inner, NULL_RTX, VOIDmode, EXPAND_NORMAL);
 
+  if (CONST_SCALAR_INT_P (inner0))
+{
+  wide_int t = rtx_mode_t (inner0, operand_mode);
+  bool setp = (wi::lrshift(t, bitnum) & 1) != 0;
+  return (setp ^ (code == EQ_EXPR)) ? const1_rtx : const0_rtx;
+}
   int bitpos = bitnum;
 
-  scalar_int_mode imode = as_a (GET_MODE (inner0));
-
   if (BYTES_BIG_ENDIAN)
-bitpos = GET_MODE_BITSIZE (imode) - 1 - bitpos;
+bitpos = GET_MODE_BITSIZE (operand_mode) - 1 - bitpos;
 
   inner0 = extract_bit_field (inner0, 1, bitpos, 1, target,
  operand_mode, mode, 0, NULL);
diff --git a/gcc/testsuite/gcc.dg/pr110117-1.c 
b/gcc/testsuite/gcc.dg/pr110117-1.c
new file mode 100644
index 000..fd9a9e3268e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr110117-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -ftree-vrp -fno-tree-ccp -fno-tree-forwprop" } */
+int a, b, d;
+unsigned c;
+int main() {
+  char e = -10;
+  int f = 1, g = 0;
+  if (a) {
+char h = e;
+  i:
+c = ~h - (-g & f || e);
+int j = b % c;
+g = j % 9;
+if (c) {
+  if (d)
+e = 0;
+  while (!g)
+;
+  int k = 0;
+l:
+  if (k)
+goto i;
+}
+  }
+  if (e > -10) {
+if (g)
+  f = 0;
+goto l;
+  }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr110117-2.c 
b/gcc/testsuite/gcc.dg/pr110117-2.c
new file mode 100644
index 000..2e353258084
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr110117-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp 
-fno-tree-forwprop -fno-tree-fre -fno-tree-copy-prop" } */
+int f()
+{
+  int t = 0;
+  return (t & 1) != 0;
+}
-- 
2.31.1



[PATCH 1/2] Improve do_store_flag for single bit when there is no non-zero bits

2023-06-04 Thread Andrew Pinski via Gcc-patches
In r14-1534-g908e5ab5c11c, I forgot you could turn off CCP or
turn off the bit tracking part of CCP so we would lose out
what TER was able to do before hand. This moves around the
TER code so that it is used instead of just the nonzerobits.
It also makes it easier to remove the TER part of the code
later on too.

OK? Bootstrapped and tested on x86_64-linux-gnu.

Note it reintroduces PR 110117 (which was accidently fixed after
r14-1534-g908e5ab5c11c). The next patch in series will fix that.

gcc/ChangeLog:

* expr.cc (do_store_flag): Rearrange the
TER code so that it overrides the nonzero bits
info if we had `a & POW2`.
---
 gcc/expr.cc | 28 +++-
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 58f5fe76372..ca008cd453e 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -13164,38 +13164,32 @@ do_store_flag (sepops ops, rtx target, machine_mode 
mode)
   && (TYPE_PRECISION (ops->type) != 1 || TYPE_UNSIGNED (ops->type)))
 {
   wide_int nz = tree_nonzero_bits (arg0);
+  gimple *srcstmt = get_def_for_expr (arg0, BIT_AND_EXPR);
+  /* If the defining statement was (x & POW2), then use that instead of
+the non-zero bits.  */
+  if (srcstmt && integer_pow2p (gimple_assign_rhs2 (srcstmt)))
+   {
+ nz = wi::to_wide (gimple_assign_rhs2 (srcstmt));
+ arg0 = gimple_assign_rhs1 (srcstmt);
+   }
 
   if (wi::popcount (nz) == 1
  && (integer_zerop (arg1)
  || wi::to_wide (arg1) == nz))
{
- tree op0;
- int bitnum;
- gimple *srcstmt = get_def_for_expr (arg0, BIT_AND_EXPR);
- /* If the defining statement was (x & POW2), then remove the and
-as we are going to add it back. */
- if (srcstmt
- && integer_pow2p (gimple_assign_rhs2 (srcstmt)))
-   {
- op0 = gimple_assign_rhs1 (srcstmt);
- bitnum = tree_log2 (gimple_assign_rhs2 (srcstmt));
-   }
- else
-   {
- op0 = arg0;
- bitnum = wi::exact_log2 (nz);
-   }
+ int bitnum = wi::exact_log2 (nz);
  enum tree_code tcode = EQ_EXPR;
  if ((code == NE) ^ !integer_zerop (arg1))
tcode = NE_EXPR;
 
  type = lang_hooks.types.type_for_mode (mode, unsignedp);
  return expand_single_bit_test (loc, tcode,
-op0,
+arg0,
 bitnum, type, target, mode);
}
 }
 
+
   if (! get_subtarget (target)
   || GET_MODE (subtarget) != operand_mode)
 subtarget = 0;
-- 
2.31.1



Re: [PATCH, PR110086] avr: Fix ICE on optimize attribute

2023-06-04 Thread Richard Biener via Gcc-patches
On Fri, Jun 2, 2023 at 11:54 AM SenthilKumar.Selvaraj--- via
Gcc-patches  wrote:
>
> Hi,
>
> This patch fixes an ICE when an optimize attribute changes the prevailing
> optimization level.
>
> I found https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105069 describing the
> same ICE for the sh target, where the fix was to enable save/restore of
> target specific options modified via TARGET_OPTIMIZATION_TABLE hook.
>
> For the AVR target, mgas-isr-prologues and -mmain-is-OS_task are those
> target specific options. As they enable generation of more optimal code,
> this patch adds the Optimization option property to those option records,
> and that fixes the ICE.
>
> Regression run shows no regressions, and >100 new PASSes.
> Ok to commit to master?

LGTM

Richard.

> Regards
> Senthil
>
>
> PR 110086
>
> gcc/ChangeLog:
>
> * config/avr/avr.opt (mgas-isr-prologues, mmain-is-OS_task):
> Add Optimization option property.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/avr/pr110086.c: New test.
>
> diff --git gcc/config/avr/avr.opt gcc/config/avr/avr.opt
> index f62d746..5a0b465 100644
> --- gcc/config/avr/avr.opt
> +++ gcc/config/avr/avr.opt
> @@ -27,7 +27,7 @@ Target RejectNegative Joined Var(avr_mmcu) 
> MissingArgError(missing device or arc
>  -mmcu=MCU  Select the target MCU.
>
>  mgas-isr-prologues
> -Target Var(avr_gasisr_prologues) UInteger Init(0)
> +Target Var(avr_gasisr_prologues) UInteger Init(0) Optimization
>  Allow usage of __gcc_isr pseudo instructions in ISR prologues and epilogues.
>
>  mn-flash=
> @@ -65,7 +65,7 @@ Target Joined RejectNegative UInteger Var(avr_branch_cost) 
> Init(0)
>  Set the branch costs for conditional branch instructions.  Reasonable values 
> are small, non-negative integers.  The default
> branch cost is 0.
>
>  mmain-is-OS_task
> -Target Mask(MAIN_IS_OS_TASK)
> +Target Mask(MAIN_IS_OS_TASK) Optimization
>  Treat main as if it had attribute OS_task.
>
>  morder1
> diff --git gcc/testsuite/gcc.target/avr/pr110086.c 
> gcc/testsuite/gcc.target/avr/pr110086.c
> new file mode 100644
> index 000..6b97620
> --- /dev/null
> +++ gcc/testsuite/gcc.target/avr/pr110086.c
> @@ -0,0 +1,5 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Os" } */
> +
> +void __attribute__((optimize("O0"))) foo(void) {
> +}


[PATCH] [RISC-V] add TC for save-restore cfi directives.

2023-06-04 Thread Fei Gao
gcc/testsuite/ChangeLog:

* gcc.target/riscv/save-restore-cfi.c: New test to check save-restore 
cfi directives.
---
 .../gcc.target/riscv/save-restore-cfi.c | 17 +
 1 file changed, 17 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/save-restore-cfi.c

diff --git a/gcc/testsuite/gcc.target/riscv/save-restore-cfi.c 
b/gcc/testsuite/gcc.target/riscv/save-restore-cfi.c
new file mode 100644
index 000..a39f3060981
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/save-restore-cfi.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-g -Os -march=rv32imafc -mabi=ilp32f -msave-restore 
-mcmodel=medlow" } */
+/* { dg-skip-if "" { *-*-* } {"-O2" "-O1" "-O0" "-Og" "-O3" "-Oz" "-flto"} } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 16} 2} } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 32} 1} } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1} } */
+
+char my_getchar();
+float getf();
+
+int foo()
+{
+  int s0 = my_getchar();
+  float f0 = getf();
+  int b = my_getchar();
+  return f0 + s0 + b;
+}
-- 
2.17.1



Re: PING Re: [PATCH RFA (tree-eh)] c++: use __cxa_call_terminate for MUST_NOT_THROW [PR97720]

2023-06-04 Thread Richard Biener via Gcc-patches
On Fri, Jun 2, 2023 at 6:57 PM Jason Merrill via Gcc-patches
 wrote:
>
> Since Jonathan approved the library change, I'm looking for middle-end
> approval for the tree-eh change, even without advice on the potential
> follow-up.
>
> On 5/24/23 14:55, Jason Merrill wrote:
> > Middle-end folks: any thoughts about how best to make the change described 
> > in
> > the last paragraph below?
> >
> > Library folks: any thoughts on the changes to __cxa_call_terminate?
> >
> > -- 8< --
> >
> > [except.handle]/7 says that when we enter std::terminate due to a throw,
> > that is considered an active handler.  We already implemented that properly
> > for the case of not finding a handler (__cxa_throw calls __cxa_begin_catch
> > before std::terminate) and the case of finding a callsite with no landing
> > pad (the personality function calls __cxa_call_terminate which calls
> > __cxa_begin_catch), but for the case of a throw in a try/catch in a noexcept
> > function, we were emitting a cleanup that calls std::terminate directly
> > without ever calling __cxa_begin_catch to handle the exception.
> >
> > A straightforward way to fix this seems to be calling __cxa_call_terminate
> > instead.  However, that requires exporting it from libstdc++, which we have
> > not previously done.  Despite the name, it isn't actually part of the ABI
> > standard.  Nor is __cxa_call_unexpected, as far as I can tell, but that one
> > is also used by clang.  For this case they use __clang_call_terminate; it
> > seems reasonable to me for us to stick with __cxa_call_terminate.
> >
> > I also change __cxa_call_terminate to take void* for simplicity in the front
> > end (and consistency with __cxa_call_unexpected) but that isn't necessary if
> > it's undesirable for some reason.
> >
> > This patch does not fix the issue that representing the noexcept as a
> > cleanup is wrong, and confuses the handler search; since it looks like a
> > cleanup in the EH tables, the unwinder keeps looking until it finds the
> > catch in main(), which it should never have gotten to.  Without the
> > try/catch in main, the unwinder would reach the end of the stack and say no
> > handler was found.  The noexcept is a handler, and should be treated as one,
> > as it is when the landing pad is omitted.
> >
> > The best fix for that issue seems to me to be to represent an
> > ERT_MUST_NOT_THROW after an ERT_TRY in an action list as though it were an
> > ERT_ALLOWED_EXCEPTIONS (since indeed it is an exception-specification).  The
> > actual code generation shouldn't need to change (apart from the change made
> > by this patch), only the action table entry.
> >
> >   PR c++/97720
> >
> > gcc/cp/ChangeLog:
> >
> >   * cp-tree.h (enum cp_tree_index): Add CPTI_CALL_TERMINATE_FN.
> >   (call_terminate_fn): New macro.
> >   * cp-gimplify.cc (gimplify_must_not_throw_expr): Use it.
> >   * except.cc (init_exception_processing): Set it.
> >   (cp_protect_cleanup_actions): Return it.
> >
> > gcc/ChangeLog:
> >
> >   * tree-eh.cc (lower_resx): Pass the exception pointer to the
> >   failure_decl.
> >   * except.h: Tweak comment.
> >
> > libstdc++-v3/ChangeLog:
> >
> >   * libsupc++/eh_call.cc (__cxa_call_terminate): Take void*.
> >   * config/abi/pre/gnu.ver: Add it.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   * g++.dg/eh/terminate2.C: New test.
> > ---
> >   gcc/cp/cp-tree.h |  2 ++
> >   gcc/except.h |  2 +-
> >   gcc/cp/cp-gimplify.cc|  2 +-
> >   gcc/cp/except.cc |  5 -
> >   gcc/testsuite/g++.dg/eh/terminate2.C | 30 
> >   gcc/tree-eh.cc   | 16 ++-
> >   libstdc++-v3/libsupc++/eh_call.cc|  4 +++-
> >   libstdc++-v3/config/abi/pre/gnu.ver  |  7 +++
> >   8 files changed, 63 insertions(+), 5 deletions(-)
> >   create mode 100644 gcc/testsuite/g++.dg/eh/terminate2.C
> >
> > diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
> > index a1b882f11fe..a8465a988b5 100644
> > --- a/gcc/cp/cp-tree.h
> > +++ b/gcc/cp/cp-tree.h
> > @@ -217,6 +217,7 @@ enum cp_tree_index
> >  definitions.  */
> >   CPTI_ALIGN_TYPE,
> >   CPTI_TERMINATE_FN,
> > +CPTI_CALL_TERMINATE_FN,
> >   CPTI_CALL_UNEXPECTED_FN,
> >
> >   /* These are lazily inited.  */
> > @@ -358,6 +359,7 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX];
> >   /* Exception handling function declarations.  */
> >   #define terminate_fn
> > cp_global_trees[CPTI_TERMINATE_FN]
> >   #define call_unexpected_fn  
> > cp_global_trees[CPTI_CALL_UNEXPECTED_FN]
> > +#define call_terminate_fn
> > cp_global_trees[CPTI_CALL_TERMINATE_FN]
> >   #define get_exception_ptr_fn
> > cp_global_trees[CPTI_GET_EXCEPTION_PTR_FN]
> >   #define begin_catch_fn  
> > cp_global_trees[CPTI_BEGIN_CATCH_FN]
> >   #define end_catch_fn
> > cp_gl

Re: [PATCH V2, rs6000] Disable generation of scalar modulo instructions

2023-06-04 Thread Kewen.Lin via Gcc-patches
Hi Pat,

Thanks for fixing this and sorry for the late review!

on 2023/4/18 20:22, Pat Haugen wrote:
> Updated from prior patch to also disable for int128.
> 
> 
> Disable generation of scalar modulo instructions.
> 
> It was recently discovered that the scalar modulo instructions can suffer
> noticeable performance issues for certain input values. This patch disables
> their generation since the equivalent div/mul/sub sequence does not suffer
> the same problem.
> 
> Bootstrapped and regression tested on powerpc64/powerpc64le.
> Ok for master and backports after burn in?
> 
> -Pat
> 
> 
> 2023-04-18  Pat Haugen  
> 
> gcc/
> * config/rs6000/rs6000.h (RS6000_DISABLE_SCALAR_MODULO): New.
> * config/rs6000/rs6000.md (mod3, *mod3): Disable.
> (define_expand umod3): New.
> (define_insn umod3): Rename to *umod3 and disable.
> (umodti3, modti3): Disable.

I noticed that there is one place in rs6000_rtx_costs only checking
TARGET_MODULO for if counting extra cost for umod/mod, I guess we
should update it as well (for scalar int modes)?

  /* Add in shift and subtract for MOD unless we have a mod instruction. */
  if (!TARGET_MODULO && (code == MOD || code == UMOD))
*total += COSTS_N_INSNS (2);

> 
> gcc/testsuite/
> * gcc.target/powerpc/clone1.c: Add xfails.
> * gcc.target/powerpc/clone3.c: Likewise.
> * gcc.target/powerpc/mod-1.c: Likewise.
> * gcc.target/powerpc/mod-2.c: Likewise.
> * gcc.target/powerpc/p10-vdivq-vmodq.c: Likewise.
> 
> 
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index 3503614efbd..1cf0a0013c0 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -2492,3 +2492,9 @@ while (0)
>     rs6000_asm_output_opcode (STREAM);    \
>  }    \
>    while (0)

The diff file seemed to expand tab with some spaces unexpectedly, such as the
above lines.

> +
> +/* Disable generation of scalar modulo instructions due to performance issues
> +   with certain input values. This can be removed in the future when the

nit:   ~~ two spaces instead of one.

> +   issues have been resolved.  */
> +#define RS6000_DISABLE_SCALAR_MODULO 1
> +
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 44f7dd509cb..4f397bc9179 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -3421,6 +3421,17 @@ (define_expand "mod3"
>  FAIL;
> 
>    operands[2] = force_reg (mode, operands[2]);
> +
> +  if (RS6000_DISABLE_SCALAR_MODULO)
> +    {
> +  temp1 = gen_reg_rtx (mode);
> +  temp2 = gen_reg_rtx (mode);
> +
> +  emit_insn (gen_div3 (temp1, operands[1], operands[2]));
> +  emit_insn (gen_mul3 (temp2, temp1, operands[2]));
> +  emit_insn (gen_sub3 (operands[0], operands[1], temp2));
> +  DONE;
> +    }

nit: wrong indent.

>  }
>    else
>  {
> @@ -3440,17 +3451,42 @@ (define_insn "*mod3"
>    [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
>  (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
>   (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
> -  "TARGET_MODULO"
> +  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
>    "mods %0,%1,%2"
>    [(set_attr "type" "div")
>     (set_attr "size" "")])
> 
> +;; This define_expand can be removed when RS6000_DISABLE_SCALAR_MODULO is
> +;; removed.
> +(define_expand "umod3"
> +  [(set (match_operand:GPR 0 "gpc_reg_operand")
> +    (umod:GPR (match_operand:GPR 1 "gpc_reg_operand")
> +  (match_operand:GPR 2 "gpc_reg_operand")))]
> +  ""
> +{
> +  rtx temp1;
> +  rtx temp2;
> +
> +  if (!TARGET_MODULO)
> +    FAIL;
> 
> -(define_insn "umod3"
> +  if (RS6000_DISABLE_SCALAR_MODULO)
> +    {
> +  temp1 = gen_reg_rtx (mode);
> +  temp2 = gen_reg_rtx (mode);
> +
> +  emit_insn (gen_udiv3 (temp1, operands[1], operands[2]));
> +  emit_insn (gen_mul3 (temp2, temp1, operands[2]));
> +  emit_insn (gen_sub3 (operands[0], operands[1], temp2));
> +  DONE;
> +    }
> +})
> +
> +(define_insn "*umod3"
>    [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
>  (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
>    (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
> -  "TARGET_MODULO"
> +  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
>    "modu %0,%1,%2"
>    [(set_attr "type" "div")
>     (set_attr "size" "")])
> @@ -3507,7 +3543,7 @@ (define_insn "umodti3"
>    [(set (match_operand:TI 0 "altivec_register_operand" "=v")
>  (umod:TI (match_operand:TI 1 "altivec_register_operand" "v")
>   (match_operand:TI 2 "altivec_register_operand" "v")))]
> -  "TARGET_POWER10 && TARGET_POWERPC64"
> +  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
>    "vmoduq %0,%1,%2"
>    [(set_attr "type" "vecdiv")
>     (set_attr "size" "128")])
> @@ -3516,7 +3552,7 @@ (define_insn "modti3"
>    [(set (match_operand:TI 0 "altivec_registe

Re: [PATCH V2] VECT: Add SELECT_VL support

2023-06-04 Thread Richard Sandiford via Gcc-patches
juzhe.zh...@rivai.ai writes:
> +  /* If we're using decrement IV approach in loop control, we can use output 
> of
> + SELECT_VL to adjust IV of loop control and data reference when it 
> satisfies
> + the following checks:
> +
> + (a) SELECT_VL is supported by the target.
> + (b) LOOP_VINFO is single-rgroup control.
> + (c) non-SLP.
> + (d) LOOP can not be unrolled.
> +
> + Otherwise, we use MIN_EXPR approach.
> +
> + 1. We only apply SELECT_VL on single-rgroup since:
> +
> + (1). Multiple-rgroup controls N vector loads/stores would need N pointer
> +   updates by variable amounts.
> + (2). SELECT_VL allows flexible length (<=VF) in each iteration.
> + (3). For decrement IV approach, we calculate the MAX length of the loop
> +   and then deduce the length of each control from this MAX length.
> +
> + Base on (1), (2) and (3) situations, if we try to use SELECT_VL on
> + multiple-rgroup control, we need to generate multiple SELECT_VL to
> + carefully adjust length of each control.

If we use SELECT_VL to refer only to the target-independent ifn, I don't
see why this last bit is true.  Like I said in the previous message,
when it comes to determining the length of each control, the approach we
take for MIN_EXPR IVs should work for SELECT_VL IVs.  The point is that,
in both cases, any inactive lanes are always the last lanes.

E.g. suppose that, for one particular iteration, SELECT_VL decides that
6 lanes should be active in a loop with VF==8.  If there is a 2-control
rgroup with 4 lanes each, the first control must be 4 and the second
control must be 2, just as if a MIN_EXPR had decided that 6 lanes of
the final iteration are active.

I'm not saying the decision itself is wrong.  But I think the explanation
could be clearer.

> + Such approach is very inefficient
> + and unprofitable for targets that are using a standalone instruction
> + to configure the length of each operation.
> + E.g. RISC-V vector use 'vsetvl' to configure the length of each 
> operation.

What I don't understand is why this isn't also a problem with the
fallback MIN_EXPR approach.  That is, with the same example as above,
but using MIN_EXPR IVs, I would have expected:

  VF == 8

  1-control rgroup "A":
A set by MIN_EXPR IV

  2-control rgroup "B1", "B2":
B1 = MIN (A, 4)
B2 = A - B1

and so the vectors controlled by A, B1 and B2 would all have different
lengths.

Is the point that, when using MIN_EXPR, this only happens in the
final iteration?  And that you use a tail/epilogue loop for that,
so that the main loop body operates on full vectors only?

Thanks,
Richard


Re: [RFA] Improve strcmp expansion when one input is a constant string.

2023-06-04 Thread Richard Biener via Gcc-patches
On Sun, Jun 4, 2023 at 11:41 PM Jeff Law via Gcc-patches
 wrote:
>
> While investigating a RISC-V backend patch from Jivan I noticed a
> regression in terms of dynamic instruction counts for the omnetpp
> benchmark in spec2017.
>
> https://gcc.gnu.org/pipermail/gcc-patches/2023-June/620577.html
>
> The code we we with Jivan's patch at expansion time looks like this for
> each character in the input string:
>
>
>
> (insn 6 5 7 (set (reg:SI 137)
>  (zero_extend:SI (mem:QI (reg/v/f:DI 135 [ x ]) [0 MEM
>  [(void *)x_2(D)]+0 S1 A8]))) "j.c":5:11 -1
>   (nil))
>
> (insn 7 6 8 (set (reg:DI 138)
>  (sign_extend:DI (plus:SI (reg:SI 137)
>  (const_int -108 [0xff94] "j.c":5:11 -1
>   (nil))
>
> (insn 8 7 9 (set (reg:SI 136)
>  (subreg/s/u:SI (reg:DI 138) 0)) "j.c":5:11 -1
>   (expr_list:REG_EQUAL (plus:SI (reg:SI 137)
>  (const_int -108 [0xff94]))
>  (nil)))
>
> (insn 9 8 10 (set (reg:DI 139)
>  (sign_extend:DI (reg:SI 136))) "j.c":5:11 -1
>   (nil))
>
> (jump_insn 10 9 11 (set (pc)
>  (if_then_else (ne (reg:DI 139)
>  (const_int 0 [0]))
>  (label_ref 64)
>  (pc))) "j.c":5:11 -1
>   (nil))
>
>
> Ignore insn 9.  fwprop will turn it into a trivial copy from r138->r139
> which will ultimately propagate away.
>
>
> All the paths eventually transfer to control to the label in question,
> either by jumping or falling thru on the last character.  After a bit of
> cleanup by fwprop & friends we have:
>
>
>
> > (insn 6 3 7 2 (set (reg:SI 137 [ MEM  [(void *)x_2(D)] ])
> > (zero_extend:SI (mem:QI (reg/v/f:DI 135 [ x ]) [0 MEM  
> > [(void *)x_2(D)]+0 S1 A8]))) "j.c":5:11 114 {zero_extendqisi2}
> >  (nil))
> > (insn 7 6 8 2 (set (reg:DI 138)
> > (sign_extend:DI (plus:SI (reg:SI 137 [ MEM  [(void 
> > *)x_2(D)] ])
> > (const_int -108 [0xff94] "j.c":5:11 6 
> > {addsi3_extended}
> >  (expr_list:REG_DEAD (reg:SI 137 [ MEM  [(void *)x_2(D)] ])
> > (nil)))
> > (insn 8 7 10 2 (set (reg:SI 136 [ MEM  [(void *)x_2(D)]+11 ])
> > (subreg/s/u:SI (reg:DI 138) 0)) "j.c":5:11 180 {*movsi_internal}
> >  (nil))
> > (jump_insn 10 8 73 2 (set (pc)
> > (if_then_else (ne (reg:DI 138)
> > (const_int 0 [0]))
> > (label_ref 64)
> > (pc))) "j.c":5:11 243 {*branchdi}
> >  (expr_list:REG_DEAD (reg:DI 138)
> > (int_list:REG_BR_PROB 536870916 (nil)))
> >  -> 64)
>
>
> insn 8 is the result of wanting the ultimate result of the strcmp to be
> an "int" type (SImode).Note that (reg 136) is the result of the
> strcmp.  It gets set in each fragment of code that compares one element
> in the string.  It's also live after the strcmp sequence.   As a result
> combine isn't going to be able to clean this up.
>
> Note how (reg 136) births while (reg 138) is live and even though (reg
> 136) is a copy of (reg 138), IRA doesn't have the necessary code to
> determine that the regs do not conflict.  As a result (reg 136) and (reg
> 138) must be allocated different hard registers and we get code like this:
>
> > lbu a5,0(a0)# 6 [c=28 l=4]  zero_extendqisi2/1
> > addiw   a5,a5,-108  # 7 [c=8 l=4]  addsi3_extended/1
> > mv  a4,a5   # 8 [c=4 l=4]  *movsi_internal/0
> > bne a5,zero,.L2 # 10[c=4 l=4]  *branchdi
>
> Note the annoying "mv".
>
>
> Rather than do a conversion for each character, we could do each step in
> word_mode and do the conversion once at the end of the whole sequence.
>
> So for each character we expand to:
>
> > (insn 6 5 7 (set (reg:DI 138)
> > (zero_extend:DI (mem:QI (reg/v/f:DI 135 [ x ]) [0 MEM  
> > [(void *)x_2(D)]+0 S1 A8]))) "j.c":5:11 -1
> >  (nil))
> >
> > (insn 7 6 8 (set (reg:DI 137)
> > (plus:DI (reg:DI 138)
> > (const_int -108 [0xff94]))) "j.c":5:11 -1
> >  (nil))
> >
> > (jump_insn 8 7 9 (set (pc)
> > (if_then_else (ne (reg:DI 137)
> > (const_int 0 [0]))
> > (label_ref 41)
> > (pc))) "j.c":5:11 -1
> >  (nil))
>
> Good.  Then at the end of the sequence we have:
> > (code_label 41 40 42 2 (nil) [0 uses])
> >
> > (insn 42 41 43 (set (reg:SI 136)
> > (subreg:SI (reg:DI 137) 0)) "j.c":5:11 -1
> >  (nil))
>
> Which seems like exactly what we want.  At the assembly level we get:
>  lbu a5,0(a0)# 6 [c=28 l=4]  zero_extendqidi2/1
>  addia0,a5,-108  # 7 [c=4 l=4]  adddi3/1
>  bne a0,zero,.L2 # 8 [c=4 l=4]  *branchdi
> [ ... ]
>
> At the end of the sequence we realize the narrowing subreg followed by
> an extnesion isn't necessary and just remove them.
>
> The ultimate result is omnetpp goes from a small regression to a small
> overall improvement with Jivan's patch.
>
> Bootstrapped and regression tested 

Re: [PATCH] Fix PR 110085: `make clean` in GCC directory on sh target causes a failure

2023-06-04 Thread Richard Biener via Gcc-patches
On Mon, Jun 5, 2023 at 7:43 AM Andrew Pinski  wrote:
>
> On Sun, Jun 4, 2023 at 10:24 PM Richard Biener via Gcc-patches
>  wrote:
> >
> >
> >
> > > Am 05.06.2023 um 06:42 schrieb Andrew Pinski via Gcc-patches 
> > > :
> > >
> > > On sh target, there is a MULTILIB_DIRNAMES (or is it MULTILIB_OPTIONS) 
> > > named m2,
> > > this conflicts with the langauge m2. So when you do a `make clean`, it 
> > > will remove
> > > the m2 directory and then a build will fail. Now since 
> > > r0-78222-gfa9585134f6f58,
> > > the multilib directories are no longer created in the gcc directory as 
> > > libgcc
> > > was moved to the toplevel. So we can remove the part of clean that 
> > > removes those
> > > directories.
> > >
> > > Tested on x86_64-linux-gnu and a cross to sh-elf that `make clean` 
> > > followed by
> > > `make` works again.
> > >
> > > OK?
> >
> > Ok
>
> Is a similar patch ok for GCC 13 branch as we would get a similar
> failure there too?

Yes, though I wonder if we should worry.

Richard.

> Thanks,
> Andrew
>
> >
> > > gcc/ChangeLog:
> > >
> > >PR bootstrap/110085
> > >* Makefile.in (clean): Remove the removing of
> > >MULTILIB_DIR/MULTILIB_OPTIONS directories.
> > > ---
> > > gcc/Makefile.in | 7 ---
> > > 1 file changed, 7 deletions(-)
> > >
> > > diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> > > index 1d39e6dd3f8..0c02f312985 100644
> > > --- a/gcc/Makefile.in
> > > +++ b/gcc/Makefile.in
> > > @@ -3622,13 +3622,6 @@ clean: mostlyclean lang.clean
> > >-rm -f doc/*.pdf
> > > # Delete the include directories.
> > >-rm -rf include include-fixed
> > > -# Delete files used by the "multilib" facility (including libgcc 
> > > subdirs).
> > > --rm -f multilib.h tmpmultilib*
> > > --if [ "x$(MULTILIB_DIRNAMES)" != x ] ; then \
> > > -  rm -rf $(MULTILIB_DIRNAMES); \
> > > -else if [ "x$(MULTILIB_OPTIONS)" != x ] ; then \
> > > -  rm -rf `echo $(MULTILIB_OPTIONS) | sed -e 's/\// /g'`; \
> > > -fi ; fi
> > >
> > > # Delete all files that users would normally create
> > > # while building and installing GCC.
> > > --
> > > 2.31.1
> > >


[PATCH] rs6000: Remove duplicate expression [PR106907]

2023-06-04 Thread P Jeevitha via Gcc-patches
PR106907 has few warnings spotted from cppcheck. In that addressing duplicate
expression issue here. Here the same expression is used twice in logical
AND(&&) operation which result in same result so removing that.

2023-06-05  Jeevitha Palanisamy  

gcc/
PR target/106907
* config/rs6000/rs6000.cc (vec_const_128bit_to_bytes): Remove
duplicate expression.


diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 42f49e4a56b..d197c3f3289 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -28784,7 +28784,6 @@ vec_const_128bit_to_bytes (rtx op,
 
   info->all_words_same
 = (info->words[0] == info->words[1]
-   && info->words[0] == info->words[1]
&& info->words[0] == info->words[2]
&& info->words[0] == info->words[3]);
 




[PATCH v1] RISC-V: Support RVV FP16 ZVFH floating-point intrinsic API

2023-06-04 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch support the intrinsic API of FP16 ZVFH floating-point. Aka
SEW=16 for below instructions:

vfadd vfsub vfrsub vfwadd vfwsub
vfmul vfdiv vfrdiv vfwmul
vfmacc vfnmacc vfmsac vfnmsac vfmadd
vfnmadd vfmsub vfnmsub vfwmacc vfwnmacc vfwmsac vfwnmsac
vfsqrt vfrsqrt7 vfrec7
vfmin vfmax
vfsgnj vfsgnjn vfsgnjx
vmfeq vmfne vmflt vmfle vmfgt vmfge
vfclass vfmerge
vfmv
vfcvt vfwcvt vfncvt

Then users can leverage the instrinsic APIs to perform the FP=16 related
operations. Please note not all the instrinsic APIs are coverred in the
test files, only pick some typical ones due to too many. We will perform
the FP16 related instrinsic API test entirely soon.

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-types.def
(vfloat32mf2_t): New type for DEF_RVV_WEXTF_OPS.
(vfloat32m1_t): Ditto.
(vfloat32m2_t): Ditto.
(vfloat32m4_t): Ditto.
(vfloat32m8_t): Ditto.
(vint16mf4_t): New type for DEF_RVV_CONVERT_I_OPS.
(vint16mf2_t): Ditto.
(vint16m1_t): Ditto.
(vint16m2_t): Ditto.
(vint16m4_t): Ditto.
(vint16m8_t): Ditto.
(vuint16mf4_t): New type for DEF_RVV_CONVERT_U_OPS.
(vuint16mf2_t): Ditto.
(vuint16m1_t): Ditto.
(vuint16m2_t): Ditto.
(vuint16m4_t): Ditto.
(vuint16m8_t): Ditto.
(vint32mf2_t): New type for DEF_RVV_WCONVERT_I_OPS.
(vint32m1_t): Ditto.
(vint32m2_t): Ditto.
(vint32m4_t): Ditto.
(vint32m8_t): Ditto.
(vuint32mf2_t): New type for DEF_RVV_WCONVERT_U_OPS.
(vuint32m1_t): Ditto.
(vuint32m2_t): Ditto.
(vuint32m4_t): Ditto.
(vuint32m8_t): Ditto.
* config/riscv/vector-iterators.md: Add FP=16 support for V,
VWCONVERTI, VCONVERT, VNCONVERT, VMUL1 and vlmul1.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/zvfh-intrinsic.c: New test.
---
 .../riscv/riscv-vector-builtins-types.def |  32 ++
 gcc/config/riscv/vector-iterators.md  |  21 +
 .../riscv/rvv/base/zvfh-intrinsic.c   | 418 ++
 3 files changed, 471 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-intrinsic.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def 
b/gcc/config/riscv/riscv-vector-builtins-types.def
index 9cb3aca992e..348aa05dd91 100644
--- a/gcc/config/riscv/riscv-vector-builtins-types.def
+++ b/gcc/config/riscv/riscv-vector-builtins-types.def
@@ -518,11 +518,24 @@ DEF_RVV_FULL_V_U_OPS (vuint64m2_t, RVV_REQUIRE_FULL_V)
 DEF_RVV_FULL_V_U_OPS (vuint64m4_t, RVV_REQUIRE_FULL_V)
 DEF_RVV_FULL_V_U_OPS (vuint64m8_t, RVV_REQUIRE_FULL_V)
 
+DEF_RVV_WEXTF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WEXTF_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
+DEF_RVV_WEXTF_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
+DEF_RVV_WEXTF_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32)
+DEF_RVV_WEXTF_OPS (vfloat32m8_t, RVV_REQUIRE_ELEN_FP_32)
+
 DEF_RVV_WEXTF_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_WEXTF_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_WEXTF_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_WEXTF_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64)
 
+DEF_RVV_CONVERT_I_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_CONVERT_I_OPS (vint16mf2_t, 0)
+DEF_RVV_CONVERT_I_OPS (vint16m1_t, 0)
+DEF_RVV_CONVERT_I_OPS (vint16m2_t, 0)
+DEF_RVV_CONVERT_I_OPS (vint16m4_t, 0)
+DEF_RVV_CONVERT_I_OPS (vint16m8_t, 0)
+
 DEF_RVV_CONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_CONVERT_I_OPS (vint32m1_t, 0)
 DEF_RVV_CONVERT_I_OPS (vint32m2_t, 0)
@@ -533,6 +546,13 @@ DEF_RVV_CONVERT_I_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_I_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_I_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64)
 
+DEF_RVV_CONVERT_U_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_CONVERT_U_OPS (vuint16mf2_t, 0)
+DEF_RVV_CONVERT_U_OPS (vuint16m1_t, 0)
+DEF_RVV_CONVERT_U_OPS (vuint16m2_t, 0)
+DEF_RVV_CONVERT_U_OPS (vuint16m4_t, 0)
+DEF_RVV_CONVERT_U_OPS (vuint16m8_t, 0)
+
 DEF_RVV_CONVERT_U_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_CONVERT_U_OPS (vuint32m1_t, 0)
 DEF_RVV_CONVERT_U_OPS (vuint32m2_t, 0)
@@ -543,11 +563,23 @@ DEF_RVV_CONVERT_U_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_U_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_U_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64)
 
+DEF_RVV_WCONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WCONVERT_I_OPS (vint32m1_t, 0)
+DEF_RVV_WCONVERT_I_OPS (vint32m2_t, 0)
+DEF_RVV_WCONVERT_I_OPS (vint32m4_t, 0)
+DEF_RVV_WCONVERT_I_OPS (vint32m8_t, 0)
+
 DEF_RVV_WCONVERT_I_OPS (vint64m1_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_ELEN_64)
 DEF_RVV_WCONVERT_I_OPS (vint64m2_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_ELEN_64)
 DEF_RVV_WCONVERT_I_OPS (vint64m4_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_ELEN_64)
 DEF_RVV_WCONVERT_I_OPS (vint64m8_t, RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQ