[PATCH v3] libgfortran: Replace mutex with rwlock

2023-04-19 Thread Lipeng Zhu via Gcc-patches
This patch try to introduce the rwlock and split the read/write to
unit_root tree and unit_cache with rwlock instead of the mutex to
increase CPU efficiency. In the get_gfc_unit function, the percentage
to step into the insert_unit function is around 30%, in most instances,
we can get the unit in the phase of reading the unit_cache or unit_root
tree. So split the read/write phase by rwlock would be an approach to
make it more parallel.

BTW, the IPC metrics can gain around 9x in our test
server with 220 cores. The benchmark we used is
https://github.com/rwesson/NEAT

libgcc/ChangeLog:

* gthr-posix.h (__GTHREAD_RWLOCK_INIT): New macro
(__gthrw): New function
(__gthread_rwlock_rdlock): New function
(__gthread_rwlock_tryrdlock): New function
(__gthread_rwlock_wrlock): New function
(__gthread_rwlock_trywrlock): New function
(__gthread_rwlock_unlock): New function

libgfortran/ChangeLog:

* io/async.c (DEBUG_LINE): New
* io/async.h (RWLOCK_DEBUG_ADD): New macro
(CHECK_RDLOCK): New macro
(CHECK_WRLOCK): New macro
(TAIL_RWLOCK_DEBUG_QUEUE): New macro
(IN_RWLOCK_DEBUG_QUEUE): New macro
(RDLOCK): New macro
(WRLOCK): New macro
(RWUNLOCK): New macro
(RD_TO_WRLOCK): New macro
(INTERN_RDLOCK): New macro
(INTERN_WRLOCK): New macro
(INTERN_RWUNLOCK): New macro
* io/io.h (internal_proto): Define unit_rwlock
* io/transfer.c (st_read_done_worker): Relace unit_lock with unit_rwlock
(st_write_done_worker): Relace unit_lock with unit_rwlock
* io/unit.c (get_gfc_unit): Relace unit_lock with unit_rwlock
(if): Relace unit_lock with unit_rwlock
(close_unit_1): Relace unit_lock with unit_rwlock
(close_units): Relace unit_lock with unit_rwlock
(newunit_alloc): Relace unit_lock with unit_rwlock
* io/unix.c (flush_all_units): Relace unit_lock with unit_rwlock

---
v1 -> v2:
Limit the pthread_rwlock usage in libgcc only when __cplusplus isn't defined.

v2 -> v3:
Rebase the patch with trunk branch.

Signed-off-by: Lipeng Zhu 
---
 libgcc/gthr-posix.h   |  60 +++
 libgfortran/io/async.c|   4 +
 libgfortran/io/async.h| 151 ++
 libgfortran/io/io.h   |  15 ++--
 libgfortran/io/transfer.c |   8 +-
 libgfortran/io/unit.c |  65 
 libgfortran/io/unix.c |  16 ++--
 7 files changed, 273 insertions(+), 46 deletions(-)

diff --git a/libgcc/gthr-posix.h b/libgcc/gthr-posix.h
index aebcfdd9f4c..73283082997 100644
--- a/libgcc/gthr-posix.h
+++ b/libgcc/gthr-posix.h
@@ -48,6 +48,9 @@ typedef pthread_t __gthread_t;
 typedef pthread_key_t __gthread_key_t;
 typedef pthread_once_t __gthread_once_t;
 typedef pthread_mutex_t __gthread_mutex_t;
+#ifndef __cplusplus
+typedef pthread_rwlock_t __gthread_rwlock_t;
+#endif
 typedef pthread_mutex_t __gthread_recursive_mutex_t;
 typedef pthread_cond_t __gthread_cond_t;
 typedef struct timespec __gthread_time_t;
@@ -58,6 +61,9 @@ typedef struct timespec __gthread_time_t;
 
 #define __GTHREAD_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER
 #define __GTHREAD_MUTEX_INIT_FUNCTION __gthread_mutex_init_function
+#ifndef __cplusplus
+#define __GTHREAD_RWLOCK_INIT PTHREAD_RWLOCK_INITIALIZER
+#endif
 #define __GTHREAD_ONCE_INIT PTHREAD_ONCE_INIT
 #if defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER)
 #define __GTHREAD_RECURSIVE_MUTEX_INIT PTHREAD_RECURSIVE_MUTEX_INITIALIZER
@@ -135,6 +141,13 @@ __gthrw(pthread_mutexattr_init)
 __gthrw(pthread_mutexattr_settype)
 __gthrw(pthread_mutexattr_destroy)
 
+#ifndef __cplusplus
+__gthrw(pthread_rwlock_rdlock)
+__gthrw(pthread_rwlock_tryrdlock)
+__gthrw(pthread_rwlock_wrlock)
+__gthrw(pthread_rwlock_trywrlock)
+__gthrw(pthread_rwlock_unlock)
+#endif
 
 #if defined(_LIBOBJC) || defined(_LIBOBJC_WEAK)
 /* Objective-C.  */
@@ -885,6 +898,53 @@ __gthread_cond_destroy (__gthread_cond_t* __cond)
   return __gthrw_(pthread_cond_destroy) (__cond);
 }
 
+#ifndef __cplusplus
+static inline int
+__gthread_rwlock_rdlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_rdlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_tryrdlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_tryrdlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_wrlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_wrlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_trywrlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_trywrlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_unlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_unlock) (__rwlock);
+  else
+return 0;
+}
+#endif
+
 #endif /* _LIBOBJC */
 
 #endif /* ! GCC_GTHR_POSIX_H */
diff --git a/lib

Re: [PATCH v2] expansion: make layout of x_shift*cost[][][] more efficient

2023-04-19 Thread Richard Biener via Gcc-patches
On Tue, Apr 18, 2023 at 10:51 PM Vineet Gupta  wrote:
>
> when debugging expmed.[ch] for PR/108987 saw that some of the cost arrays have
> less than ideal layout as follows:
>
>x_shift*cost[0..63][speed][modes]
>
> We would want speed to be first index since a typical compile will have
> that fixed, followed by mode and then the shift values.
>
> It should be non-functional from compiler semantics pov, except
> executing slightly faster due to better locality of shift values for
> given speed and mode. And also a bit more intutive when debugging.

OK, but please wait 24h in case somebody else wants to comment.

Thanks,
Richard.

> gcc/Changelog:
>
> * expmed.h (x_shift*_cost): convert to int [speed][mode][shift].
> (shift*_cost_ptr ()): Access x_shift*_cost array directly.
>
> Signed-off-by: Vineet Gupta 
> ---
> Changes since v1:
>- Post a non stale version of patch
> ---
>  gcc/expmed.h | 27 +--
>  1 file changed, 13 insertions(+), 14 deletions(-)
>
> diff --git a/gcc/expmed.h b/gcc/expmed.h
> index c747a0da1637..22ae1d2d0743 100644
> --- a/gcc/expmed.h
> +++ b/gcc/expmed.h
> @@ -161,15 +161,14 @@ struct target_expmed {
>struct expmed_op_cheap x_sdiv_pow2_cheap;
>struct expmed_op_cheap x_smod_pow2_cheap;
>
> -  /* Cost of various pieces of RTL.  Note that some of these are indexed by
> - shift count and some by mode.  */
> +  /* Cost of various pieces of RTL.  */
>int x_zero_cost[2];
>struct expmed_op_costs x_add_cost;
>struct expmed_op_costs x_neg_cost;
> -  struct expmed_op_costs x_shift_cost[MAX_BITS_PER_WORD];
> -  struct expmed_op_costs x_shiftadd_cost[MAX_BITS_PER_WORD];
> -  struct expmed_op_costs x_shiftsub0_cost[MAX_BITS_PER_WORD];
> -  struct expmed_op_costs x_shiftsub1_cost[MAX_BITS_PER_WORD];
> +  int x_shift_cost[2][NUM_MODE_IPV_INT][MAX_BITS_PER_WORD];
> +  int x_shiftadd_cost[2][NUM_MODE_IPV_INT][MAX_BITS_PER_WORD];
> +  int x_shiftsub0_cost[2][NUM_MODE_IPV_INT][MAX_BITS_PER_WORD];
> +  int x_shiftsub1_cost[2][NUM_MODE_IPV_INT][MAX_BITS_PER_WORD];
>struct expmed_op_costs x_mul_cost;
>struct expmed_op_costs x_sdiv_cost;
>struct expmed_op_costs x_udiv_cost;
> @@ -395,8 +394,8 @@ neg_cost (bool speed, machine_mode mode)
>  inline int *
>  shift_cost_ptr (bool speed, machine_mode mode, int bits)
>  {
> -  return expmed_op_cost_ptr (&this_target_expmed->x_shift_cost[bits],
> -speed, mode);
> +  int midx = expmed_mode_index (mode);
> +  return &this_target_expmed->x_shift_cost[speed][midx][bits];
>  }
>
>  /* Set the COST of doing a shift in MODE by BITS when optimizing for SPEED.  
> */
> @@ -421,8 +420,8 @@ shift_cost (bool speed, machine_mode mode, int bits)
>  inline int *
>  shiftadd_cost_ptr (bool speed, machine_mode mode, int bits)
>  {
> -  return expmed_op_cost_ptr (&this_target_expmed->x_shiftadd_cost[bits],
> -speed, mode);
> +  int midx = expmed_mode_index (mode);
> +  return &this_target_expmed->x_shiftadd_cost[speed][midx][bits];
>  }
>
>  /* Set the COST of doing a shift in MODE by BITS followed by an add when
> @@ -448,8 +447,8 @@ shiftadd_cost (bool speed, machine_mode mode, int bits)
>  inline int *
>  shiftsub0_cost_ptr (bool speed, machine_mode mode, int bits)
>  {
> -  return expmed_op_cost_ptr (&this_target_expmed->x_shiftsub0_cost[bits],
> -speed, mode);
> +  int midx = expmed_mode_index (mode);
> +  return &this_target_expmed->x_shiftsub0_cost[speed][midx][bits];
>  }
>
>  /* Set the COST of doing a shift in MODE by BITS and then subtracting a
> @@ -475,8 +474,8 @@ shiftsub0_cost (bool speed, machine_mode mode, int bits)
>  inline int *
>  shiftsub1_cost_ptr (bool speed, machine_mode mode, int bits)
>  {
> -  return expmed_op_cost_ptr (&this_target_expmed->x_shiftsub1_cost[bits],
> -speed, mode);
> +  int midx = expmed_mode_index (mode);
> +  return &this_target_expmed->x_shiftsub1_cost[speed][midx][bits];
>  }
>
>  /* Set the COST of subtracting a shift in MODE by BITS from a value when
> --
> 2.34.1
>


[PATCH] [i386] Support type _Float16/__bf16 independent of SSE2.

2023-04-19 Thread liuhongt via Gcc-patches
-Jakub's comments--
That said, these fundamental types whose presence/absence depends on ISA flags
are quite problematic IMHO, as they are incompatible with the target
attribute/pragmas. Whether they are available or not available depends on
whether in this case SSE2 is enabled during compiler initialization (aka after
parsing command line options) and then they are available or unavailable to
everything else based on that.
-comments end--

Enable _Float16 and __bf16 all the time but issue errors when the
types are used in conversion, unary operation, binary operation,
parameter passing or value return when TARGET_SSE2 is not available.

Also undef macros which are used by libgcc/libstdc++ to check the
backend support of the _Float16/__bf16 types when TARGET_SSE2 is not
available.

Bootstrapped and regtested  on x86_64-pc-linux-gnu{-m32,}
Also successfully cross-build targte i686-linux-gnu.
Ok for trunk?

gcc/ChangeLog:

PR target/109054
* config/i386/i386-builtins.cc
(ix86_register_float16_builtin_type): Remove TARGET_SSE2.
(ix86_register_bf16_builtin_type): Ditto.
* config/i386/i386-c.cc (ix86_target_macros): When TARGET_SSE2
isn't available, undef the macros which are used to check the
backend support of the _Float16/__bf16 types when building
libstdc++ and libgcc.
* config/i386/i386.cc (construct_container): Issue errors for
HFmode/BFmode when TARGET_SSE2 is not available.
(function_value_32): Ditto.
(ix86_scalar_mode_supported_p): Remove TARGET_SSE2 for HFmode/BFmode.
(ix86_libgcc_floating_mode_supported_p): Ditto.
(ix86_emit_support_tinfos): Adjust codes.
(ix86_invalid_conversion): New function.
(ix86_invalid_unary_op): Ditto.
(ix86_invalid_binary_op): Ditto.
(TARGET_INVALID_CONVERSION): Defined.
(TARGET_INVALID_UNARY_OP): Defined.
(TARGET_INVALID_BINARY_OP): Defined.
* config/i386/immintrin.h: Remove #ifdef __SSE2__ for fp16/bf16
related instrinsics header filers.
* config/i386/i386.h (VALID_SSE2_TYPE_MODE): New macro.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr109054.c: New test.
* gcc.target/i386/sse2-bfloat16-1.c: Adjust error info.
* gcc.target/i386/sse2-float16-1.c: Ditto.
* gcc.target/i386/sse2-float16-4.c: New test.
* gcc.target/i386/sse2-float16-5.c: New test.
* g++.target/i386/float16-1.C: Adjust error info.
---
 gcc/config/i386/i386-builtins.cc  |   4 +-
 gcc/config/i386/i386-c.cc |  37 ++
 gcc/config/i386/i386.cc   | 117 --
 gcc/config/i386/i386.h|   4 +
 gcc/config/i386/immintrin.h   |   4 -
 gcc/testsuite/g++.target/i386/float16-1.C |   8 +-
 gcc/testsuite/gcc.target/i386/pr109054.c  |   6 +
 .../gcc.target/i386/sse2-bfloat16-1.c |   8 +-
 .../gcc.target/i386/sse2-float16-1.c  |   8 +-
 .../gcc.target/i386/sse2-float16-4.c  |  25 
 .../gcc.target/i386/sse2-float16-5.c  |  24 
 11 files changed, 217 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr109054.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-5.c

diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index fc0c82b156e..1cdabfd3a0a 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -1367,7 +1367,7 @@ ix86_register_float16_builtin_type (void)
   else
 ix86_float16_type_node = float16_type_node;
 
-  if (!maybe_get_identifier ("_Float16") && TARGET_SSE2)
+  if (!maybe_get_identifier ("_Float16"))
 lang_hooks.types.register_builtin_type (ix86_float16_type_node,
"_Float16");
 }
@@ -1385,7 +1385,7 @@ ix86_register_bf16_builtin_type (void)
   else
 ix86_bf16_type_node = bfloat16_type_node;
 
-  if (!maybe_get_identifier ("__bf16") && TARGET_SSE2)
+  if (!maybe_get_identifier ("__bf16"))
 lang_hooks.types.register_builtin_type (ix86_bf16_type_node, "__bf16");
 }
 
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index e7bd7cc706c..eb77d0af226 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -817,6 +817,43 @@ ix86_target_macros (void)
   if (!TARGET_80387)
 cpp_define (parse_in, "_SOFT_FLOAT");
 
+  /* HFmode/BFmode is supported without depending any isa
+ in scalar_mode_supported_p and libgcc_floating_mode_supported_p,
+ but according to psABI, they're really supported w/ SSE2 and above.
+ Since libstdc++ uses __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__
+ for backend support of the types, undef the macros to avoid
+ build failure, see PR109504.  */
+  if (!TARGET_SSE2)
+{
+  if (c_dialect_cxx ()
+ && cx

[PATCH] rtl-optimization/109237 - speedup bb_is_just_return

2023-04-19 Thread Richard Biener via Gcc-patches
For the testcase bb_is_just_return is on top of the profile, changing
it to walk BB insns backwards puts it off the profile.  That's because
in the forward walk you have to process possibly many debug insns
but in a backward walk you very likely run into control insns first.

This is a fixed version of the patch originally applied and
reverted.

Re-bootstrap & regtest running on x86_64-unknown-linux-gnu, will push
after that re-succeeds.

Richard.

PR rtl-optimization/109237
* cfgcleanup.cc (bb_is_just_return): Walk insns backwards.
---
 gcc/cfgcleanup.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/cfgcleanup.cc b/gcc/cfgcleanup.cc
index 194e0e5de12..78f59e99653 100644
--- a/gcc/cfgcleanup.cc
+++ b/gcc/cfgcleanup.cc
@@ -2608,14 +2608,14 @@ bb_is_just_return (basic_block bb, rtx_insn **ret, 
rtx_insn **use)
   if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
 return false;
 
-  FOR_BB_INSNS (bb, insn)
+  FOR_BB_INSNS_REVERSE (bb, insn)
 if (NONDEBUG_INSN_P (insn))
   {
rtx pat = PATTERN (insn);
 
if (!*ret && ANY_RETURN_P (pat))
  *ret = insn;
-   else if (!*ret && !*use && GET_CODE (pat) == USE
+   else if (*ret && !*use && GET_CODE (pat) == USE
&& REG_P (XEXP (pat, 0))
&& REG_FUNCTION_VALUE_P (XEXP (pat, 0)))
  *use = insn;
-- 
2.35.3


Re: [PATCH] LoongArch: fix MUSL_DYNAMIC_LINKER

2023-04-19 Thread Lulu Cheng



在 2023/4/17 下午2:51, 樊鹏 写道:

Yes, https://wiki.musl-libc.org/guidelines-for-distributions.html, 
"Multilib/multi-arch" section of this
introduces it.


Hi,  fanpeng:

I agree with ruoyao, add this link to the commit message.

I have no problem with other.

Thanks!



-Original Messages-
From: "Xi Ruoyao" 
Sent Time:2023-04-17 14:36:52 (星期一)
To: "Peng Fan" , gcc-patches@gcc.gnu.org
Cc: chengl...@loongson.cn
Subject: Re: [PATCH] LoongArch: fix MUSL_DYNAMIC_LINKER

On Mon, 2023-04-17 at 10:39 +0800, Peng Fan wrote:

The system based on musl has no '/lib64', so change it.

I like the change.  IMO Glibc-based systems should avoid /lib64 as well
but it's too late to change it now.

Could you provide a link to the Musl doc as a reference?  I'd like to
include the link in the commit message.


gcc/
 * config/loongarch/gnu-user.h (MUSL_DYNAMIC_LINKER): Redefine.

Signed-off-by: Peng Fan 
---
  gcc/config/loongarch/gnu-user.h | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/gnu-user.h
b/gcc/config/loongarch/gnu-user.h
index aecaa02a199..fa1a5211419 100644
--- a/gcc/config/loongarch/gnu-user.h
+++ b/gcc/config/loongarch/gnu-user.h
@@ -33,9 +33,14 @@ along with GCC; see the file COPYING3.  If not see
  #define GLIBC_DYNAMIC_LINKER \
    "/lib" ABI_GRLEN_SPEC "/ld-linux-loongarch-" ABI_SPEC ".so.1"
  
+#define MUSL_ABI_SPEC \

+  "%{mabi=lp64d:-lp64d}" \
+  "%{mabi=lp64f:-lp64f}" \
+  "%{mabi=lp64s:-lp64s}"
+
  #undef MUSL_DYNAMIC_LINKER
  #define MUSL_DYNAMIC_LINKER \
-  "/lib" ABI_GRLEN_SPEC "/ld-musl-loongarch-" ABI_SPEC ".so.1"
+  "/lib/ld-musl-loongarch" ABI_GRLEN_SPEC MUSL_ABI_SPEC ".so.1"
  
  #undef GNU_USER_TARGET_LINK_SPEC

  #define GNU_USER_TARGET_LINK_SPEC \

--
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


本邮件及其附件含有龙芯中科的商业秘密信息,仅限于发送给上面地址中列出的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制或散发)本邮件及其附件中的信息。如果您错收本邮件,请您立即电话或邮件通知发件人并删除本邮件。
This email and its attachments contain confidential information from Loongson 
Technology , which is intended only for the person or entity whose address is 
listed above. Any use of the information contained herein in any way 
(including, but not limited to, total or partial disclosure, reproduction or 
dissemination) by persons other than the intended recipient(s) is prohibited. 
If you receive this email in error, please notify the sender by phone or email 
immediately and delete it.




Re: [PATCH] Introduce VIRTUAL_REGISTER_P and VIRTUAL_REGISTER_NUM_P predicates

2023-04-19 Thread Jakub Jelinek via Gcc-patches
On Wed, Apr 19, 2023 at 08:53:42AM +0200, Uros Bizjak wrote:
> On Tue, Apr 18, 2023 at 7:20 PM Jakub Jelinek  wrote:
> >
> > On Mon, Apr 17, 2023 at 11:27:28PM +0200, Uros Bizjak via Gcc-patches wrote:
> > > --- a/gcc/rtl.h
> > > +++ b/gcc/rtl.h
> > > @@ -1972,6 +1972,13 @@ set_regno_raw (rtx x, unsigned int regno, unsigned 
> > > int nregs)
> > >  /* 1 if the given register number REG_NO corresponds to a hard register. 
> > >  */
> > >  #define HARD_REGISTER_NUM_P(REG_NO) ((REG_NO) < FIRST_PSEUDO_REGISTER)
> > >
> > > +/* 1 if the given register REG corresponds to a virtual register.  */
> > > +#define VIRTUAL_REGISTER_P(REG) (VIRTUAL_REGISTER_NUM_P (REGNO (REG)))
> > > +
> > > +/* 1 if the given register number REG_NO corresponds to a virtual 
> > > register.  */
> > > +#define VIRTUAL_REGISTER_NUM_P(REG_NO)   
> > > \
> > > +  (IN_RANGE (REG_NO, FIRST_VIRTUAL_REGISTER, LAST_VIRTUAL_REGISTER))
> >
> > Why the ()s around both definitions?
> > IN_RANGE adds its own and anything on top of that is just superfluous.
> 
> Mainly to imitate the surrounding code (e.g. HARD_REGISTER_P) that is
> quite generous with brackets.
> 
> I can remove external brackets from both definitions, but I'd remove
> them also from the HARD_REGISTER_P definition.

Please do.  HARD_REGISTER_NUM_P obviously needs to keep it.

Jakub



[committed] testsuite: Fix up pr109524.C for -std=c++23 [PR109524]

2023-04-19 Thread Jakub Jelinek via Gcc-patches
On Mon, Apr 17, 2023 at 08:41:38AM +, Richard Biener via Gcc-patches wrote:
>   * g++.dg/pr109524.C: New testcase.

This testcase was reduced such that it isn't valid C++23, so with my
usual testing with GXX_TESTSUITE_STDS=98,11,14,17,20,2b it fails:
FAIL: g++.dg/pr109524.C  -std=gnu++2b (test for excess errors)
.../gcc/testsuite/g++.dg/pr109524.C: In function 'nn hh(nn)':
.../gcc/testsuite/g++.dg/pr109524.C:35:12: error: cannot bind non-const lvalue 
reference of type 'nn&' to an rvalue of type 'nn'
.../gcc/testsuite/g++.dg/pr109524.C:17:6: note:   initializing argument 1 of 
'nn::nn(nn&)'
The following patch fixes that and I've verified it doesn't change
anything on what the test was testing, it still ICEs in r13-7198 and
passes in r13-7203, now in all language modes (except for 98 where
it is intentionally UNSUPPORTED).

Tested on x86_64-linux -m32/-m64, committed to trunk and 13 branch
as obvious.

2023-04-19  Jakub Jelinek  

PR tree-optimization/109524
* g++.dg/pr109524.C (nn::nn): Change argument type from nn & to
const nn &.

--- gcc/testsuite/g++.dg/pr109524.C.jj  2023-04-17 11:44:12.405551621 +0200
+++ gcc/testsuite/g++.dg/pr109524.C 2023-04-19 09:54:45.211210827 +0200
@@ -14,7 +14,7 @@ struct _Guard {
 };
 struct nn {
   int * _M_dataplus;
-  nn(nn &)
+  nn(const nn &)
 {
 f();
 _Guard   __guard;


Jakub



[PATCH] LoongArch: fix MUSL_DYNAMIC_LINKER

2023-04-19 Thread Peng Fan
The system based on musl has no '/lib64', so change it.

https://wiki.musl-libc.org/guidelines-for-distributions.html,
"Multilib/multi-arch" section of this introduces it.

gcc/
* config/loongarch/gnu-user.h (MUSL_DYNAMIC_LINKER: Redefine.)

Signed-off-by: Peng Fan 
Suggested-by: Xi Ruoyao 
---
 gcc/config/loongarch/gnu-user.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
index aecaa02a199..fa1a5211419 100644
--- a/gcc/config/loongarch/gnu-user.h
+++ b/gcc/config/loongarch/gnu-user.h
@@ -33,9 +33,14 @@ along with GCC; see the file COPYING3.  If not see
 #define GLIBC_DYNAMIC_LINKER \
   "/lib" ABI_GRLEN_SPEC "/ld-linux-loongarch-" ABI_SPEC ".so.1"
 
+#define MUSL_ABI_SPEC \
+  "%{mabi=lp64d:-lp64d}" \
+  "%{mabi=lp64f:-lp64f}" \
+  "%{mabi=lp64s:-lp64s}"
+
 #undef MUSL_DYNAMIC_LINKER
 #define MUSL_DYNAMIC_LINKER \
-  "/lib" ABI_GRLEN_SPEC "/ld-musl-loongarch-" ABI_SPEC ".so.1"
+  "/lib/ld-musl-loongarch" ABI_GRLEN_SPEC MUSL_ABI_SPEC ".so.1"
 
 #undef GNU_USER_TARGET_LINK_SPEC
 #define GNU_USER_TARGET_LINK_SPEC \
-- 
2.20.1



[PATCH][committed] aarch64: PR target/108840 Simplify register shift RTX costs and eliminate shift amount masking

2023-04-19 Thread Kyrylo Tkachov via Gcc-patches
Hi all,

In this PR we fail to eliminate explicit &31 operations for variable shifts 
such as in:
void
bar (int x[3], int y)
{
  x[0] <<= (y & 31);
  x[1] <<= (y & 31);
  x[2] <<= (y & 31);
}

This is rejected by RTX costs that end up giving too high a cost for:
(set (reg:SI 96)
(ashift:SI (reg:SI 98)
(subreg:QI (and:SI (reg:SI 99)
(const_int 31 [0x1f])) 0)))

There is code to handle the AND-31 case in rtx costs, but it gets confused by 
the subreg.
It's easy enough to fix by looking inside the subreg when costing the 
expression.
While doing that I noticed that the ASHIFT case and the other shift-like cases 
are almost identical
and we should just merge them. This code will only be used for valid insns 
anyway, so the code after this
patch should do the Right Thing (TM) for all such shift cases.

With this patch there are no more "and wn, wn, 31" instructions left in the 
testcase.

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.
Thanks,
Kyrill

PR target/108840

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_rtx_costs): Merge ASHIFT and
ROTATE, ROTATERT, LSHIFTRT, ASHIFTRT cases.  Handle subregs in op1.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/pr108840.c: New test.


reg-shift-and.patch
Description: reg-shift-and.patch


Re: [aarch64] Use wzr/xzr for assigning vector element to 0

2023-04-19 Thread Prathamesh Kulkarni via Gcc-patches
On Tue, 31 Jan 2023 at 11:51, Richard Sandiford
 wrote:
>
> Prathamesh Kulkarni  writes:
> > On Mon, 23 Jan 2023 at 22:26, Richard Sandiford
> >  wrote:
> >>
> >> Prathamesh Kulkarni  writes:
> >> > On Wed, 18 Jan 2023 at 19:59, Richard Sandiford
> >> >  wrote:
> >> >>
> >> >> Prathamesh Kulkarni  writes:
> >> >> > On Tue, 17 Jan 2023 at 18:29, Richard Sandiford
> >> >> >  wrote:
> >> >> >>
> >> >> >> Prathamesh Kulkarni  writes:
> >> >> >> > Hi Richard,
> >> >> >> > For the following (contrived) test:
> >> >> >> >
> >> >> >> > void foo(int32x4_t v)
> >> >> >> > {
> >> >> >> >   v[3] = 0;
> >> >> >> >   return v;
> >> >> >> > }
> >> >> >> >
> >> >> >> > -O2 code-gen:
> >> >> >> > foo:
> >> >> >> > fmovs1, wzr
> >> >> >> > ins v0.s[3], v1.s[0]
> >> >> >> > ret
> >> >> >> >
> >> >> >> > I suppose we can instead emit the following code-gen ?
> >> >> >> > foo:
> >> >> >> >  ins v0.s[3], wzr
> >> >> >> >  ret
> >> >> >> >
> >> >> >> > combine produces:
> >> >> >> > Failed to match this instruction:
> >> >> >> > (set (reg:V4SI 95 [ v ])
> >> >> >> > (vec_merge:V4SI (const_vector:V4SI [
> >> >> >> > (const_int 0 [0]) repeated x4
> >> >> >> > ])
> >> >> >> > (reg:V4SI 97)
> >> >> >> > (const_int 8 [0x8])))
> >> >> >> >
> >> >> >> > So, I wrote the following pattern to match the above insn:
> >> >> >> > (define_insn "aarch64_simd_vec_set_zero"
> >> >> >> >   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
> >> >> >> > (vec_merge:VALL_F16
> >> >> >> > (match_operand:VALL_F16 1 "const_dup0_operand" "w")
> >> >> >> > (match_operand:VALL_F16 3 "register_operand" "0")
> >> >> >> > (match_operand:SI 2 "immediate_operand" "i")))]
> >> >> >> >   "TARGET_SIMD"
> >> >> >> >   {
> >> >> >> > int elt = ENDIAN_LANE_N (, exact_log2 (INTVAL 
> >> >> >> > (operands[2])));
> >> >> >> > operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
> >> >> >> > return "ins\\t%0.[%p2], wzr";
> >> >> >> >   }
> >> >> >> > )
> >> >> >> >
> >> >> >> > which now matches the above insn produced by combine.
> >> >> >> > However, in reload dump, it creates a new insn for assigning
> >> >> >> > register to (const_vector (const_int 0)),
> >> >> >> > which results in:
> >> >> >> > (insn 19 8 13 2 (set (reg:V4SI 33 v1 [99])
> >> >> >> > (const_vector:V4SI [
> >> >> >> > (const_int 0 [0]) repeated x4
> >> >> >> > ])) "wzr-test.c":8:1 1269 {*aarch64_simd_movv4si}
> >> >> >> >  (nil))
> >> >> >> > (insn 13 19 14 2 (set (reg/i:V4SI 32 v0)
> >> >> >> > (vec_merge:V4SI (reg:V4SI 33 v1 [99])
> >> >> >> > (reg:V4SI 32 v0 [97])
> >> >> >> > (const_int 8 [0x8]))) "wzr-test.c":8:1 1808
> >> >> >> > {aarch64_simd_vec_set_zerov4si}
> >> >> >> >  (nil))
> >> >> >> >
> >> >> >> > and eventually the code-gen:
> >> >> >> > foo:
> >> >> >> > moviv1.4s, 0
> >> >> >> > ins v0.s[3], wzr
> >> >> >> > ret
> >> >> >> >
> >> >> >> > To get rid of redundant assignment of 0 to v1, I tried to split the
> >> >> >> > above pattern
> >> >> >> > as in the attached patch. This works to emit code-gen:
> >> >> >> > foo:
> >> >> >> > ins v0.s[3], wzr
> >> >> >> > ret
> >> >> >> >
> >> >> >> > However, I am not sure if this is the right approach. Could you 
> >> >> >> > suggest,
> >> >> >> > if it'd be possible to get rid of UNSPEC_SETZERO in the patch ?
> >> >> >>
> >> >> >> The problem is with the "w" constraint on operand 1, which tells LRA
> >> >> >> to force the zero into an FPR.  It should work if you remove the
> >> >> >> constraint.
> >> >> > Ah indeed, sorry about that, changing the constrained works.
> >> >>
> >> >> "i" isn't right though, because that's for scalar integers.
> >> >> There's no need for any constraint here -- the predicate does
> >> >> all of the work.
> >> >>
> >> >> > Does the attached patch look OK after bootstrap+test ?
> >> >> > Since we're in stage-4, shall it be OK to commit now, or queue it for 
> >> >> > stage-1 ?
> >> >>
> >> >> It needs tests as well. :-)
> >> >>
> >> >> Also:
> >> >>
> >> >> > Thanks,
> >> >> > Prathamesh
> >> >> >
> >> >> >
> >> >> >>
> >> >> >> Also, I think you'll need to use zr for the zero, so that
> >> >> >> it uses xzr for 64-bit elements.
> >> >> >>
> >> >> >> I think this and the existing patterns ought to test
> >> >> >> exact_log2 (INTVAL (operands[2])) >= 0 in the insn condition,
> >> >> >> since there's no guarantee that RTL optimisations won't form
> >> >> >> vec_merges that have other masks.
> >> >> >>
> >> >> >> Thanks,
> >> >> >> Richard
> >> >> >
> >> >> > [aarch64] Use wzr/xzr for assigning 0 to vector element.
> >> >> >
> >> >> > gcc/ChangeLog:
> >> >> >   * config/aaarch64/aarch64-simd.md 
> >> >> > (aarch64_simd_vec_set_zero):
> >> >> >   New pattern.
> >> >> >   * config/aarch64/predicates.md (const_dup0_operand): New.
> >> >> >

RE: [PATCH v2] RISC-V: Allow Vector IOR(V1, NOT V1) optimization

2023-04-19 Thread Li, Pan2 via Gcc-patches
Hi Richard,

Do you have any idea about this? I leverage git gcc-commit-mklog, it will 
generate something as below. It looks no text after colon. I am not sure if I 
need to add something by myself.

gcc/ChangeLog:

* simplify-rtx.cc (simplify_context::simplify_binary_operation_1): 
<=== no text here.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/mask_insn_shortcut.c:   
 <=== no text here.
* gcc.target/riscv/simplify_ior_optimization.c: New test.

# Please enter the commit message for your changes. Lines starting
# with '#' will be ignored, and an empty message aborts the commit.
#
# On branch master
# Your branch is up to date with 'origin/master'.
#
# Changes to be committed:
#...modified:   gcc/simplify-rtx.cc
#...modified:   gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
#...new file:   gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c

Pan

-Original Message-
From: Li, Pan2 
Sent: Wednesday, April 19, 2023 2:47 PM
To: Richard Biener 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; 
richard.sandif...@arm.com; Wang, Yanzhang 
Subject: RE: [PATCH v2] RISC-V: Allow Vector IOR(V1, NOT V1) optimization

Oh, I see. The message need to be re-generated. Thank you for pointing out, 
will update ASPA.

Pan

-Original Message-
From: Richard Biener 
Sent: Wednesday, April 19, 2023 2:40 PM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; 
richard.sandif...@arm.com; Wang, Yanzhang 
Subject: Re: [PATCH v2] RISC-V: Allow Vector IOR(V1, NOT V1) optimization

On Tue, 18 Apr 2023, pan2...@intel.com wrote:

> From: Pan Li 
> 
> This patch add the optimization for the vector IOR(V1, NOT V1). Assume 
> we have below sample code.
> 
> vbool32_t test_shortcut_for_riscv_vmorn_case_5(vbool32_t v1, size_t
> vl) {
>   return __riscv_vmorn_mm_b32(v1, v1, vl); }
> 
> Before this patch:
> vsetvli  a5,zero,e8,mf4,ta,ma
> vlm.vv24,0(a1)
> vsetvli  zero,a2,e8,mf4,ta,ma
> vmorn.mm v24,v24,v24
> vsetvli  a5,zero,e8,mf4,ta,ma
> vsm.vv24,0(a0)
> ret
> 
> After this patch:
> vsetvli zero,a2,e8,mf4,ta,ma
> vmset.m v24
> vsetvli a5,zero,e8,mf4,ta,ma
> vsm.v   v24,0(a0)
> ret
> 
> Or in RTL's perspective,
> from:
> (ior:VNx2BI (reg/v:VNx2BI 137 [ v1 ]) (not:VNx2BI (reg/v:VNx2BI 137 [
> v1 ])))
> to:
> (const_vector:VNx2BI repeat [ (const_int 1 [0x1]) ])
> 
> The similar optimization like VMANDN has enabled already. There should 
> be no difference execpt the operator when compare the VMORN and VMANDN 
> for such kind of optimization. The patch allows the VECTOR_BOOL 
> IOR(V1, NOT V1) simplification besides the existing SCALAR_INT mode.
> 
> gcc/ChangeLog:
> 
>   * simplify-rtx.cc (simplify_context::simplify_binary_operation_1):

This needs some text

> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/riscv/rvv/base/mask_insn_shortcut.c:

Likewise.

OK with that fixed.

>   * gcc.target/riscv/simplify_ior_optimization.c: New test.
> 
> Signed-off-by: Pan Li 
> ---
>  gcc/simplify-rtx.cc   |  4 +-
>  .../riscv/rvv/base/mask_insn_shortcut.c   |  3 +-
>  .../riscv/simplify_ior_optimization.c | 50 +++
>  3 files changed, 53 insertions(+), 4 deletions(-)  create mode 100644 
> gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c
> 
> diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index 
> ee75079917f..3bc9b2f55ea 100644
> --- a/gcc/simplify-rtx.cc
> +++ b/gcc/simplify-rtx.cc
> @@ -3332,8 +3332,8 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
> code,
>if (((GET_CODE (op0) == NOT && rtx_equal_p (XEXP (op0, 0), op1))
>  || (GET_CODE (op1) == NOT && rtx_equal_p (XEXP (op1, 0), op0)))
> && ! side_effects_p (op0)
> -   && SCALAR_INT_MODE_P (mode))
> - return constm1_rtx;
> +   && GET_MODE_CLASS (mode) != MODE_CC)
> + return CONSTM1_RTX (mode);
>  
>/* (ior A C) is C if all bits of A that might be nonzero are on in C.  
> */
>if (CONST_INT_P (op1)
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> index 83cc4a1b5a5..57d0241675a 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> @@ -233,9 +233,8 @@ vbool64_t
> test_shortcut_for_riscv_vmxnor_case_6(vbool64_t v1, size_t vl) {
>  /* { dg-final { scan-assembler-not {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+} } 
> } */
>  /* { dg-final { scan-assembler-not {vmor\.mm\s+v[0-9]+,\s*v[0-9]+} } 
> } */
>  /* { dg-final { scan-assembler-not {vmnor\.mm\s+v[0-9]+,\s*v[0-9]+} } 
> } */
> -/* { dg-final { scan-assembler-times 
> {vmorn\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 7 } } */
>  /* { dg-final { scan-assembler-not {vmxnor\.mm\s+v[0-9]+,\s*v[0-9]+} 
> } } */
>  /* { dg-final { scan-assembler-times {vmclr\.m\s+v[0-9

RE: [PATCH] RISC-V: Allow VMS{Compare} (V1, V1) shortcut optimization

2023-04-19 Thread Li, Pan2 via Gcc-patches
Passed the X86 bootstrap and regression tests.

Pan

-Original Message-
From: Li, Pan2  
Sent: Wednesday, April 19, 2023 11:21 AM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@sifive.com; Wang, Yanzhang 
; Li, Pan2 
Subject: [PATCH] RISC-V: Allow VMS{Compare} (V1, V1) shortcut optimization

From: Pan Li 

This patch try to adjust the RISC-V Vector RTL for the generic shortcut 
optimization for RVV integer compare.
It includes compare operator eq, ne, ltu, lt, leu, le, gtu, gt, geu and ge.

Assume we have below test code.
vbool1_t test_shortcut_for_riscv_vmslt_case_0(vint8m8_t v1, size_t vl) {
  return __riscv_vmslt_vv_i8m8_b1(v1, v1, vl); }

Before this patch:
vsetvli  zero,a2,e8,m8,ta,ma
vl8re8.v v24,0(a1)
vmslt.vv v8,v24,v24
vsetvli  a5,zero,e8,m8,ta,ma
vsm.vv8,0(a0)
ret

After this patch:
vsetvli zero,a2,e8,mf8,ta,ma
vmclr.m v24
vsetvli zero,a5,e8,mf8,ta,ma
vsm.v   v24,0(a0)
ret

However, there some cases in the test files cannot be optimized right now. We 
will file separated patches to try to make it happen.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (emit_pred_op):
* config/riscv/riscv-vector-builtins-bases.cc:
* config/riscv/vector.md:

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/integer_compare_insn_shortcut.c: New test.

Signed-off-by: Pan Li 
Co-authored-by: Ju-Zhe Zhong 
---
 gcc/config/riscv/riscv-v.cc   |  15 +-
 .../riscv/riscv-vector-builtins-bases.cc  |   6 +-
 gcc/config/riscv/vector.md|  14 +-
 .../rvv/base/integer_compare_insn_shortcut.c  | 291 ++
 4 files changed, 319 insertions(+), 7 deletions(-)  create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/integer_compare_insn_shortcut.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 
392f5d02e17..c3881920812 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -71,12 +71,23 @@ public:
 add_input_operand (RVV_VUNDEF (mode), mode);
   }
   void add_policy_operand (enum tail_policy vta, enum mask_policy vma)
+  {
+add_tail_policy_operand (vta);
+add_mask_policy_operand (vma);
+  }
+
+  void add_tail_policy_operand (enum tail_policy vta)
   {
 rtx tail_policy_rtx = gen_int_mode (vta, Pmode);
-rtx mask_policy_rtx = gen_int_mode (vma, Pmode);
 add_input_operand (tail_policy_rtx, Pmode);
+  }
+
+  void add_mask_policy_operand (enum mask_policy vma)  {
+rtx mask_policy_rtx = gen_int_mode (vma, Pmode);
 add_input_operand (mask_policy_rtx, Pmode);
   }
+
   void add_avl_type_operand (avl_type type)
   {
 add_input_operand (gen_int_mode (type, Pmode), Pmode); @@ -206,6 +217,8 @@ 
emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
 
   if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
 e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy 
());
+  else
+e.add_tail_policy_operand (get_prefer_tail_policy ());
 
   if (vlmax_p)
 e.add_avl_type_operand (avl_type::VLMAX); diff --git 
a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 52467bbc961..7c6064a5a24 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -756,7 +756,7 @@ template  class mask_logic : public 
function_base  {
 public:
-  bool apply_tail_policy_p () const override { return false; }
+  bool apply_tail_policy_p () const override { return true; }
   bool apply_mask_policy_p () const override { return false; }
 
   rtx expand (function_expander &e) const override @@ -768,7 +768,7 @@ 
template  class mask_nlogic : public function_base  {
 public:
-  bool apply_tail_policy_p () const override { return false; }
+  bool apply_tail_policy_p () const override { return true; }
   bool apply_mask_policy_p () const override { return false; }
 
   rtx expand (function_expander &e) const override @@ -780,7 +780,7 @@ 
template  class mask_notlogic : public function_base  {
 public:
-  bool apply_tail_policy_p () const override { return false; }
+  bool apply_tail_policy_p () const override { return true; }
   bool apply_mask_policy_p () const override { return false; }
 
   rtx expand (function_expander &e) const override diff --git 
a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 
0ecca98f20c..6819363b9ff 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1032,6 +1032,7 @@ (define_insn_and_split "@pred_mov"
[(match_operand:VB 1 "vector_all_trues_mask_operand" "Wc1, Wc1, 
Wc1, Wc1, Wc1")
 (match_operand 4 "vector_length_operand"" rK,  rK,  
rK,  rK,  rK")
 (match_operand 5 "const_int_operand""  i,   i,   
i,   i,   i")
+(match_operand 6 "const_int_operand""  i,   i,   
i,   i,   i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (match_op

[PATCH] tree-vect-patterns: Improve __builtin_{clz,ctz,ffs}ll vectorization [PR109011]

2023-04-19 Thread Jakub Jelinek via Gcc-patches
Hi!

For __builtin_popcountll tree-vect-patterns.cc has
vect_recog_popcount_pattern, which improves the vectorized code.
Without that the vectorization is always multi-type vectorization
in the loop (at least int and long long types) where we emit two
.POPCOUNT calls with long long arguments and int return value and then
widen to long long, so effectively after vectorization do the
V?DImode -> V?DImode popcount twice, then pack the result into V?SImode
and immediately unpack.

The following patch extends that handling to __builtin_{clz,ctz,ffs}ll
builtins as well (as long as there is an optab for them; more to come
laster).

Bootstrapped/regtested on x86_64-linux and i686-linux, plus tested on
the testcase in crosses to powerpc64le-linux and s390x-linux.  Ok
for trunk?

x86 can do __builtin_popcountll with -mavx512vpopcntdq, __builtin_clzll
with -mavx512cd, ppc can do __builtin_popcountll and __builtin_clzll
with -mpower8-vector and __builtin_ctzll with -mpower9-vector, s390
can do __builtin_{popcount,clz,ctz}ll with -march=z13 -mzarch (i.e. VX).

2023-04-19  Jakub Jelinek  

PR tree-optimization/109011
* tree-vect-patterns.cc (vect_recog_popcount_pattern): Rename to ...
(vect_recog_popcount_clz_ctz_ffs_pattern): ... this.  Handle also
CLZ, CTZ and FFS.  Remove vargs variable, use
gimple_build_call_internal rather than gimple_build_call_internal_vec.
(vect_vect_recog_func_ptrs): Adjust popcount entry.

* gcc.dg/vect/pr109011-1.c: New test.

--- gcc/tree-vect-patterns.cc.jj2023-03-01 09:51:27.995362601 +0100
+++ gcc/tree-vect-patterns.cc   2023-04-18 17:16:42.733935262 +0200
@@ -1501,7 +1501,7 @@ vect_recog_widen_minus_pattern (vec_info
  "vect_recog_widen_minus_pattern");
 }
 
-/* Function vect_recog_popcount_pattern
+/* Function vect_recog_popcount_clz_ctz_ffs_pattern
 
Try to find the following pattern:
 
@@ -1530,16 +1530,20 @@ vect_recog_widen_minus_pattern (vec_info
* Return value: A new stmt that will be used to replace the sequence of
stmts that constitute the pattern. In this case it will be:
B = .POPCOUNT (A);
+
+   Similarly for clz, ctz and ffs.
 */
 
 static gimple *
-vect_recog_popcount_pattern (vec_info *vinfo,
-stmt_vec_info stmt_vinfo, tree *type_out)
+vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
+stmt_vec_info stmt_vinfo,
+tree *type_out)
 {
   gassign *last_stmt = dyn_cast  (stmt_vinfo->stmt);
-  gimple *popcount_stmt, *pattern_stmt;
+  gimple *call_stmt, *pattern_stmt;
   tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
-  auto_vec vargs;
+  internal_fn ifn = IFN_LAST;
+  int addend = 0;
 
   /* Find B = (TYPE1) temp_out. */
   if (!last_stmt)
@@ -1557,51 +1561,137 @@ vect_recog_popcount_pattern (vec_info *v
   if (TREE_CODE (rhs_oprnd) != SSA_NAME
   || !has_single_use (rhs_oprnd))
 return NULL;
-  popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
+  call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
 
   /* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
-  if (!is_gimple_call (popcount_stmt))
+  if (!is_gimple_call (call_stmt))
 return NULL;
-  switch (gimple_call_combined_fn (popcount_stmt))
+  switch (gimple_call_combined_fn (call_stmt))
 {
+  int val;
 CASE_CFN_POPCOUNT:
+  ifn = IFN_POPCOUNT;
+  break;
+CASE_CFN_CLZ:
+  ifn = IFN_CLZ;
+  /* Punt if call result is unsigned and defined value at zero
+is negative, as the negative value doesn't extend correctly.  */
+  if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
+ && gimple_call_internal_p (call_stmt)
+ && CLZ_DEFINED_VALUE_AT_ZERO
+  (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
+ && val < 0)
+   return NULL;
+  break;
+CASE_CFN_CTZ:
+  ifn = IFN_CTZ;
+  /* Punt if call result is unsigned and defined value at zero
+is negative, as the negative value doesn't extend correctly.  */
+  if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
+ && gimple_call_internal_p (call_stmt)
+ && CTZ_DEFINED_VALUE_AT_ZERO
+  (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
+ && val < 0)
+   return NULL;
+  break;
+CASE_CFN_FFS:
+  ifn = IFN_FFS;
   break;
 default:
   return NULL;
 }
 
-  if (gimple_call_num_args (popcount_stmt) != 1)
+  if (gimple_call_num_args (call_stmt) != 1)
 return NULL;
 
-  rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
+  rhs_oprnd = gimple_call_arg (call_stmt, 0);
   vect_unpromoted_value unprom_diff;
-  rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
-   &unprom_diff);
+  rhs_origin
+= vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
 
   if (!rhs_origin)
 return NULL;
 
-  /* In

RE: [PATCH v2] RISC-V: Allow Vector IOR(V1, NOT V1) optimization

2023-04-19 Thread Richard Biener via Gcc-patches
On Wed, 19 Apr 2023, Li, Pan2 wrote:

> Hi Richard,
> 
> Do you have any idea about this? I leverage git gcc-commit-mklog, it 
> will generate something as below. It looks no text after colon. I am not 
> sure if I need to add something by myself.

Well, you need to add a description of your change!

> gcc/ChangeLog:
> 
> * simplify-rtx.cc (simplify_context::simplify_binary_operation_1):
>  <=== no text here.
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.target/riscv/rvv/base/mask_insn_shortcut.c: 
><=== no text here.
> * gcc.target/riscv/simplify_ior_optimization.c: New test.
> 
> # Please enter the commit message for your changes. Lines starting
> # with '#' will be ignored, and an empty message aborts the commit.
> #
> # On branch master
> # Your branch is up to date with 'origin/master'.
> #
> # Changes to be committed:
> #...modified:   gcc/simplify-rtx.cc
> #...modified:   
> gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> #...new file:   gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c
> 
> Pan
> 
> -Original Message-
> From: Li, Pan2 
> Sent: Wednesday, April 19, 2023 2:47 PM
> To: Richard Biener 
> Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; 
> richard.sandif...@arm.com; Wang, Yanzhang 
> Subject: RE: [PATCH v2] RISC-V: Allow Vector IOR(V1, NOT V1) optimization
> 
> Oh, I see. The message need to be re-generated. Thank you for pointing out, 
> will update ASPA.
> 
> Pan
> 
> -Original Message-
> From: Richard Biener 
> Sent: Wednesday, April 19, 2023 2:40 PM
> To: Li, Pan2 
> Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; 
> richard.sandif...@arm.com; Wang, Yanzhang 
> Subject: Re: [PATCH v2] RISC-V: Allow Vector IOR(V1, NOT V1) optimization
> 
> On Tue, 18 Apr 2023, pan2...@intel.com wrote:
> 
> > From: Pan Li 
> > 
> > This patch add the optimization for the vector IOR(V1, NOT V1). Assume 
> > we have below sample code.
> > 
> > vbool32_t test_shortcut_for_riscv_vmorn_case_5(vbool32_t v1, size_t
> > vl) {
> >   return __riscv_vmorn_mm_b32(v1, v1, vl); }
> > 
> > Before this patch:
> > vsetvli  a5,zero,e8,mf4,ta,ma
> > vlm.vv24,0(a1)
> > vsetvli  zero,a2,e8,mf4,ta,ma
> > vmorn.mm v24,v24,v24
> > vsetvli  a5,zero,e8,mf4,ta,ma
> > vsm.vv24,0(a0)
> > ret
> > 
> > After this patch:
> > vsetvli zero,a2,e8,mf4,ta,ma
> > vmset.m v24
> > vsetvli a5,zero,e8,mf4,ta,ma
> > vsm.v   v24,0(a0)
> > ret
> > 
> > Or in RTL's perspective,
> > from:
> > (ior:VNx2BI (reg/v:VNx2BI 137 [ v1 ]) (not:VNx2BI (reg/v:VNx2BI 137 [
> > v1 ])))
> > to:
> > (const_vector:VNx2BI repeat [ (const_int 1 [0x1]) ])
> > 
> > The similar optimization like VMANDN has enabled already. There should 
> > be no difference execpt the operator when compare the VMORN and VMANDN 
> > for such kind of optimization. The patch allows the VECTOR_BOOL 
> > IOR(V1, NOT V1) simplification besides the existing SCALAR_INT mode.
> > 
> > gcc/ChangeLog:
> > 
> > * simplify-rtx.cc (simplify_context::simplify_binary_operation_1):
> 
> This needs some text
> 
> > gcc/testsuite/ChangeLog:
> > 
> > * gcc.target/riscv/rvv/base/mask_insn_shortcut.c:
> 
> Likewise.
> 
> OK with that fixed.
> 
> > * gcc.target/riscv/simplify_ior_optimization.c: New test.
> > 
> > Signed-off-by: Pan Li 
> > ---
> >  gcc/simplify-rtx.cc   |  4 +-
> >  .../riscv/rvv/base/mask_insn_shortcut.c   |  3 +-
> >  .../riscv/simplify_ior_optimization.c | 50 +++
> >  3 files changed, 53 insertions(+), 4 deletions(-)  create mode 100644 
> > gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c
> > 
> > diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index 
> > ee75079917f..3bc9b2f55ea 100644
> > --- a/gcc/simplify-rtx.cc
> > +++ b/gcc/simplify-rtx.cc
> > @@ -3332,8 +3332,8 @@ simplify_context::simplify_binary_operation_1 
> > (rtx_code code,
> >if (((GET_CODE (op0) == NOT && rtx_equal_p (XEXP (op0, 0), op1))
> >|| (GET_CODE (op1) == NOT && rtx_equal_p (XEXP (op1, 0), op0)))
> >   && ! side_effects_p (op0)
> > - && SCALAR_INT_MODE_P (mode))
> > -   return constm1_rtx;
> > + && GET_MODE_CLASS (mode) != MODE_CC)
> > +   return CONSTM1_RTX (mode);
> >  
> >/* (ior A C) is C if all bits of A that might be nonzero are on in 
> > C.  */
> >if (CONST_INT_P (op1)
> > diff --git
> > a/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> > b/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> > index 83cc4a1b5a5..57d0241675a 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> > @@ -233,9 +233,8 @@ vbool64_t
> > test_shortcut_for_riscv_vmxnor_case_6(vbool64_t v1, size_t vl) {
> >  /* { dg-final { scan-assembler-not {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+} } 
> > } */
> >  /* { dg-final { scan-assembler-not 

Re: [PATCH] tree-vect-patterns: Improve __builtin_{clz,ctz,ffs}ll vectorization [PR109011]

2023-04-19 Thread Richard Biener via Gcc-patches
On Wed, 19 Apr 2023, Jakub Jelinek wrote:

> Hi!
> 
> For __builtin_popcountll tree-vect-patterns.cc has
> vect_recog_popcount_pattern, which improves the vectorized code.
> Without that the vectorization is always multi-type vectorization
> in the loop (at least int and long long types) where we emit two
> .POPCOUNT calls with long long arguments and int return value and then
> widen to long long, so effectively after vectorization do the
> V?DImode -> V?DImode popcount twice, then pack the result into V?SImode
> and immediately unpack.
> 
> The following patch extends that handling to __builtin_{clz,ctz,ffs}ll
> builtins as well (as long as there is an optab for them; more to come
> laster).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, plus tested on
> the testcase in crosses to powerpc64le-linux and s390x-linux.  Ok
> for trunk?

OK.

Richard.

> x86 can do __builtin_popcountll with -mavx512vpopcntdq, __builtin_clzll
> with -mavx512cd, ppc can do __builtin_popcountll and __builtin_clzll
> with -mpower8-vector and __builtin_ctzll with -mpower9-vector, s390
> can do __builtin_{popcount,clz,ctz}ll with -march=z13 -mzarch (i.e. VX).
> 
> 2023-04-19  Jakub Jelinek  
> 
>   PR tree-optimization/109011
>   * tree-vect-patterns.cc (vect_recog_popcount_pattern): Rename to ...
>   (vect_recog_popcount_clz_ctz_ffs_pattern): ... this.  Handle also
>   CLZ, CTZ and FFS.  Remove vargs variable, use
>   gimple_build_call_internal rather than gimple_build_call_internal_vec.
>   (vect_vect_recog_func_ptrs): Adjust popcount entry.
> 
>   * gcc.dg/vect/pr109011-1.c: New test.
> 
> --- gcc/tree-vect-patterns.cc.jj  2023-03-01 09:51:27.995362601 +0100
> +++ gcc/tree-vect-patterns.cc 2023-04-18 17:16:42.733935262 +0200
> @@ -1501,7 +1501,7 @@ vect_recog_widen_minus_pattern (vec_info
> "vect_recog_widen_minus_pattern");
>  }
>  
> -/* Function vect_recog_popcount_pattern
> +/* Function vect_recog_popcount_clz_ctz_ffs_pattern
>  
> Try to find the following pattern:
>  
> @@ -1530,16 +1530,20 @@ vect_recog_widen_minus_pattern (vec_info
> * Return value: A new stmt that will be used to replace the sequence of
> stmts that constitute the pattern. In this case it will be:
> B = .POPCOUNT (A);
> +
> +   Similarly for clz, ctz and ffs.
>  */
>  
>  static gimple *
> -vect_recog_popcount_pattern (vec_info *vinfo,
> -  stmt_vec_info stmt_vinfo, tree *type_out)
> +vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
> +  stmt_vec_info stmt_vinfo,
> +  tree *type_out)
>  {
>gassign *last_stmt = dyn_cast  (stmt_vinfo->stmt);
> -  gimple *popcount_stmt, *pattern_stmt;
> +  gimple *call_stmt, *pattern_stmt;
>tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
> -  auto_vec vargs;
> +  internal_fn ifn = IFN_LAST;
> +  int addend = 0;
>  
>/* Find B = (TYPE1) temp_out. */
>if (!last_stmt)
> @@ -1557,51 +1561,137 @@ vect_recog_popcount_pattern (vec_info *v
>if (TREE_CODE (rhs_oprnd) != SSA_NAME
>|| !has_single_use (rhs_oprnd))
>  return NULL;
> -  popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
> +  call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
>  
>/* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
> -  if (!is_gimple_call (popcount_stmt))
> +  if (!is_gimple_call (call_stmt))
>  return NULL;
> -  switch (gimple_call_combined_fn (popcount_stmt))
> +  switch (gimple_call_combined_fn (call_stmt))
>  {
> +  int val;
>  CASE_CFN_POPCOUNT:
> +  ifn = IFN_POPCOUNT;
> +  break;
> +CASE_CFN_CLZ:
> +  ifn = IFN_CLZ;
> +  /* Punt if call result is unsigned and defined value at zero
> +  is negative, as the negative value doesn't extend correctly.  */
> +  if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
> +   && gimple_call_internal_p (call_stmt)
> +   && CLZ_DEFINED_VALUE_AT_ZERO
> +(SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
> +   && val < 0)
> + return NULL;
> +  break;
> +CASE_CFN_CTZ:
> +  ifn = IFN_CTZ;
> +  /* Punt if call result is unsigned and defined value at zero
> +  is negative, as the negative value doesn't extend correctly.  */
> +  if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
> +   && gimple_call_internal_p (call_stmt)
> +   && CTZ_DEFINED_VALUE_AT_ZERO
> +(SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
> +   && val < 0)
> + return NULL;
> +  break;
> +CASE_CFN_FFS:
> +  ifn = IFN_FFS;
>break;
>  default:
>return NULL;
>  }
>  
> -  if (gimple_call_num_args (popcount_stmt) != 1)
> +  if (gimple_call_num_args (call_stmt) != 1)
>  return NULL;
>  
> -  rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
> +  rhs_oprnd = gimple_call_arg (call_stmt, 0);
>vect_unpromoted_value unprom_diff;
> -  rhs_origin = vect_lo

[PATCH] c: Avoid -Wenum-int-mismatch warning for redeclaration of builtin acc_on_device [PR107041]

2023-04-19 Thread Jakub Jelinek via Gcc-patches
Hi!

The new -Wenum-int-mismatch warning triggers with -Wsystem-headers in
, for obvious reasons the builtin acc_on_device uses int
type argument rather than enum which isn't defined yet when the builtin
is created, while the OpenACC spec requires it to have acc_device_t
enum argument.  The header makes sure it has int underlying type by using
negative and __INT_MAX__ enumerators.

I've tried to make the builtin typegeneric or just varargs, but that
changes behavior e.g. when one calls it with some C++ class which has
cast operator to acc_device_t, so the following patch instead disables
the warning for this builtin.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk
and 13.2?

2023-04-19  Jakub Jelinek  

PR c/107041
* c-decl.cc (diagnose_mismatched_decls): Avoid -Wenum-int-mismatch
warning on acc_on_device declaration.

* gcc.dg/goacc/pr107041.c: New test.

--- gcc/c/c-decl.cc.jj  2023-03-10 10:10:17.918387120 +0100
+++ gcc/c/c-decl.cc 2023-04-18 10:29:33.340793562 +0200
@@ -2219,7 +2219,14 @@ diagnose_mismatched_decls (tree newdecl,
 }
   /* Warn about enum/integer type mismatches.  They are compatible types
  (C2X 6.7.2.2/5), but may pose portability problems.  */
-  else if (enum_and_int_p && TREE_CODE (newdecl) != TYPE_DECL)
+  else if (enum_and_int_p
+  && TREE_CODE (newdecl) != TYPE_DECL
+  /* Don't warn about about acc_on_device builtin redeclaration,
+ the builtin is declared with int rather than enum because
+ the enum isn't intrinsic.  */
+  && !(TREE_CODE (olddecl) == FUNCTION_DECL
+   && fndecl_built_in_p (olddecl, BUILT_IN_ACC_ON_DEVICE)
+   && !C_DECL_DECLARED_BUILTIN (olddecl)))
 warned = warning_at (DECL_SOURCE_LOCATION (newdecl),
 OPT_Wenum_int_mismatch,
 "conflicting types for %q+D due to enum/integer "
--- gcc/testsuite/gcc.dg/goacc/pr107041.c.jj2023-04-18 10:18:07.039754258 
+0200
+++ gcc/testsuite/gcc.dg/goacc/pr107041.c   2023-04-18 10:17:21.252418797 
+0200
@@ -0,0 +1,23 @@
+/* PR c/107041 */
+/* { dg-do compile } */
+/* { dg-additional-options "-Wenum-int-mismatch" } */
+
+typedef enum acc_device_t {
+  acc_device_current = -1,
+  acc_device_none = 0,
+  acc_device_default = 1,
+  acc_device_host = 2,
+  acc_device_not_host = 4,
+  acc_device_nvidia = 5,
+  acc_device_radeon = 8,
+  _ACC_highest = __INT_MAX__
+} acc_device_t;
+
+int acc_on_device (acc_device_t);  /* { dg-bogus "conflicting 
types for 'acc_on_device' due to enum/integer mismatch; have 
'int\\\(acc_device_t\\\)'" } */
+int acc_on_device (acc_device_t);
+
+int
+foo (void)
+{
+  return acc_on_device (acc_device_host);
+}

Jakub



[PATCH v3] RISC-V: Align IOR optimization MODE_CLASS condition to AND.

2023-04-19 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch aligned the MODE_CLASS condition of the IOR to the AND. Then
more MODE_CLASS besides SCALAR_INT can able to perform the optimization
A | (~A) -> -1 similar to AND operator. For example as below sample code.

vbool32_t test_shortcut_for_riscv_vmorn_case_5(vbool32_t v1, size_t vl)
{
  return __riscv_vmorn_mm_b32(v1, v1, vl);
}

Before this patch:
vsetvli  a5,zero,e8,mf4,ta,ma
vlm.vv24,0(a1)
vsetvli  zero,a2,e8,mf4,ta,ma
vmorn.mm v24,v24,v24
vsetvli  a5,zero,e8,mf4,ta,ma
vsm.vv24,0(a0)
ret

After this patch:
vsetvli zero,a2,e8,mf4,ta,ma
vmset.m v24
vsetvli a5,zero,e8,mf4,ta,ma
vsm.v   v24,0(a0)
ret

Or in RTL's perspective,
from:
(ior:VNx2BI (reg/v:VNx2BI 137 [ v1 ]) (not:VNx2BI (reg/v:VNx2BI 137 [ v1 ])))
to:
(const_vector:VNx2BI repeat [ (const_int 1 [0x1]) ])

The similar optimization like VMANDN has enabled already. There should
be no difference execpt the operator when compare the VMORN and VMANDN
for such kind of optimization. The patch aligns the IOR MODE_CLASS condition
of the simplification to the AND operator.

gcc/ChangeLog:

* simplify-rtx.cc (simplify_context::simplify_binary_operation_1):
  Align IOR (A | (~A) -> -1) optimization MODE_CLASS condition to AND.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/mask_insn_shortcut.c: Update check
  condition.
* gcc.target/riscv/simplify_ior_optimization.c: New test.

Signed-off-by: Pan Li 
---
 gcc/simplify-rtx.cc   |  4 +-
 .../riscv/rvv/base/mask_insn_shortcut.c   |  3 +-
 .../riscv/simplify_ior_optimization.c | 50 +++
 3 files changed, 53 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index c57ff3320ee..d4aeebc7a5f 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -3370,8 +3370,8 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
   if (((GET_CODE (op0) == NOT && rtx_equal_p (XEXP (op0, 0), op1))
   || (GET_CODE (op1) == NOT && rtx_equal_p (XEXP (op1, 0), op0)))
  && ! side_effects_p (op0)
- && SCALAR_INT_MODE_P (mode))
-   return constm1_rtx;
+ && GET_MODE_CLASS (mode) != MODE_CC)
+   return CONSTM1_RTX (mode);
 
   /* (ior A C) is C if all bits of A that might be nonzero are on in C.  */
   if (CONST_INT_P (op1)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
index 83cc4a1b5a5..57d0241675a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
@@ -233,9 +233,8 @@ vbool64_t test_shortcut_for_riscv_vmxnor_case_6(vbool64_t 
v1, size_t vl) {
 /* { dg-final { scan-assembler-not {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+} } } */
 /* { dg-final { scan-assembler-not {vmor\.mm\s+v[0-9]+,\s*v[0-9]+} } } */
 /* { dg-final { scan-assembler-not {vmnor\.mm\s+v[0-9]+,\s*v[0-9]+} } } */
-/* { dg-final { scan-assembler-times 
{vmorn\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 7 } } */
 /* { dg-final { scan-assembler-not {vmxnor\.mm\s+v[0-9]+,\s*v[0-9]+} } } */
 /* { dg-final { scan-assembler-times {vmclr\.m\s+v[0-9]+} 14 } } */
-/* { dg-final { scan-assembler-times {vmset\.m\s+v[0-9]+} 7 } } */
+/* { dg-final { scan-assembler-times {vmset\.m\s+v[0-9]+} 14 } } */
 /* { dg-final { scan-assembler-times {vmmv\.m\s+v[0-9]+,\s*v[0-9]+} 14 } } */
 /* { dg-final { scan-assembler-times {vmnot\.m\s+v[0-9]+,\s*v[0-9]+} 14 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c 
b/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c
new file mode 100644
index 000..ec3bd0baf03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64 -O2" } */
+
+#include 
+
+uint8_t test_simplify_ior_scalar_case_0 (uint8_t a)
+{
+  return a | ~a;
+}
+
+uint16_t test_simplify_ior_scalar_case_1 (uint16_t a)
+{
+  return a | ~a;
+}
+
+uint32_t test_simplify_ior_scalar_case_2 (uint32_t a)
+{
+  return a | ~a;
+}
+
+uint64_t test_simplify_ior_scalar_case_3 (uint64_t a)
+{
+  return a | ~a;
+}
+
+int8_t test_simplify_ior_scalar_case_4 (int8_t a)
+{
+  return a | ~a;
+}
+
+int16_t test_simplify_ior_scalar_case_5 (int16_t a)
+{
+  return a | ~a;
+}
+
+int32_t test_simplify_ior_scalar_case_6 (int32_t a)
+{
+  return a | ~a;
+}
+
+int64_t test_simplify_ior_scalar_case_7 (int64_t a)
+{
+  return a | ~a;
+}
+
+/* { dg-final { scan-assembler-times {li\s+a[0-9]+,\s*-1} 6 } } */
+/* { dg-final { scan-assembler-times {li\s+a[0-9]+,\s*255} 1 } } */
+/* { dg-final { scan-assembler-times {li\s+a[0-9]+,\s*65536} 1 } } */
+/* { dg-final { scan-assembler-not {or\s+a[0-9]+} } } */
+/* { dg-final { scan-assembler-not {not\s+a[0-9]+} } } */
-- 
2.34.1



RE: [PATCH v2] RISC-V: Allow Vector IOR(V1, NOT V1) optimization

2023-04-19 Thread Li, Pan2 via Gcc-patches
Thank you for information. Updated the v3 version as below.

https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616154.html

Pan

-Original Message-
From: Richard Biener  
Sent: Wednesday, April 19, 2023 4:52 PM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; 
richard.sandif...@arm.com; Wang, Yanzhang 
Subject: RE: [PATCH v2] RISC-V: Allow Vector IOR(V1, NOT V1) optimization

On Wed, 19 Apr 2023, Li, Pan2 wrote:

> Hi Richard,
> 
> Do you have any idea about this? I leverage git gcc-commit-mklog, it 
> will generate something as below. It looks no text after colon. I am 
> not sure if I need to add something by myself.

Well, you need to add a description of your change!

> gcc/ChangeLog:
> 
> * simplify-rtx.cc (simplify_context::simplify_binary_operation_1):
>  <=== no text here.
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.target/riscv/rvv/base/mask_insn_shortcut.c: 
><=== no text here.
> * gcc.target/riscv/simplify_ior_optimization.c: New test.
> 
> # Please enter the commit message for your changes. Lines starting # 
> with '#' will be ignored, and an empty message aborts the commit.
> #
> # On branch master
> # Your branch is up to date with 'origin/master'.
> #
> # Changes to be committed:
> #...modified:   gcc/simplify-rtx.cc
> #...modified:   
> gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> #...new file:   gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c
> 
> Pan
> 
> -Original Message-
> From: Li, Pan2
> Sent: Wednesday, April 19, 2023 2:47 PM
> To: Richard Biener 
> Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; 
> kito.ch...@sifive.com; richard.sandif...@arm.com; Wang, Yanzhang 
> 
> Subject: RE: [PATCH v2] RISC-V: Allow Vector IOR(V1, NOT V1) 
> optimization
> 
> Oh, I see. The message need to be re-generated. Thank you for pointing out, 
> will update ASPA.
> 
> Pan
> 
> -Original Message-
> From: Richard Biener 
> Sent: Wednesday, April 19, 2023 2:40 PM
> To: Li, Pan2 
> Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; 
> kito.ch...@sifive.com; richard.sandif...@arm.com; Wang, Yanzhang 
> 
> Subject: Re: [PATCH v2] RISC-V: Allow Vector IOR(V1, NOT V1) 
> optimization
> 
> On Tue, 18 Apr 2023, pan2...@intel.com wrote:
> 
> > From: Pan Li 
> > 
> > This patch add the optimization for the vector IOR(V1, NOT V1). 
> > Assume we have below sample code.
> > 
> > vbool32_t test_shortcut_for_riscv_vmorn_case_5(vbool32_t v1, size_t
> > vl) {
> >   return __riscv_vmorn_mm_b32(v1, v1, vl); }
> > 
> > Before this patch:
> > vsetvli  a5,zero,e8,mf4,ta,ma
> > vlm.vv24,0(a1)
> > vsetvli  zero,a2,e8,mf4,ta,ma
> > vmorn.mm v24,v24,v24
> > vsetvli  a5,zero,e8,mf4,ta,ma
> > vsm.vv24,0(a0)
> > ret
> > 
> > After this patch:
> > vsetvli zero,a2,e8,mf4,ta,ma
> > vmset.m v24
> > vsetvli a5,zero,e8,mf4,ta,ma
> > vsm.v   v24,0(a0)
> > ret
> > 
> > Or in RTL's perspective,
> > from:
> > (ior:VNx2BI (reg/v:VNx2BI 137 [ v1 ]) (not:VNx2BI (reg/v:VNx2BI 137 
> > [
> > v1 ])))
> > to:
> > (const_vector:VNx2BI repeat [ (const_int 1 [0x1]) ])
> > 
> > The similar optimization like VMANDN has enabled already. There 
> > should be no difference execpt the operator when compare the VMORN 
> > and VMANDN for such kind of optimization. The patch allows the 
> > VECTOR_BOOL IOR(V1, NOT V1) simplification besides the existing SCALAR_INT 
> > mode.
> > 
> > gcc/ChangeLog:
> > 
> > * simplify-rtx.cc (simplify_context::simplify_binary_operation_1):
> 
> This needs some text
> 
> > gcc/testsuite/ChangeLog:
> > 
> > * gcc.target/riscv/rvv/base/mask_insn_shortcut.c:
> 
> Likewise.
> 
> OK with that fixed.
> 
> > * gcc.target/riscv/simplify_ior_optimization.c: New test.
> > 
> > Signed-off-by: Pan Li 
> > ---
> >  gcc/simplify-rtx.cc   |  4 +-
> >  .../riscv/rvv/base/mask_insn_shortcut.c   |  3 +-
> >  .../riscv/simplify_ior_optimization.c | 50 +++
> >  3 files changed, 53 insertions(+), 4 deletions(-)  create mode 
> > 100644 gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c
> > 
> > diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index 
> > ee75079917f..3bc9b2f55ea 100644
> > --- a/gcc/simplify-rtx.cc
> > +++ b/gcc/simplify-rtx.cc
> > @@ -3332,8 +3332,8 @@ simplify_context::simplify_binary_operation_1 
> > (rtx_code code,
> >if (((GET_CODE (op0) == NOT && rtx_equal_p (XEXP (op0, 0), op1))
> >|| (GET_CODE (op1) == NOT && rtx_equal_p (XEXP (op1, 0), op0)))
> >   && ! side_effects_p (op0)
> > - && SCALAR_INT_MODE_P (mode))
> > -   return constm1_rtx;
> > + && GET_MODE_CLASS (mode) != MODE_CC)
> > +   return CONSTM1_RTX (mode);
> >  
> >/* (ior A C) is C if all bits of A that might be nonzero are on in 
> > C.  */
> >if (CONST_INT_P (op1)
> > diff --git
> > a/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> > b/gcc/testsuite/gcc.targ

Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v

2023-04-19 Thread Prathamesh Kulkarni via Gcc-patches
On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni
 wrote:
>
> On Tue, 11 Apr 2023 at 14:17, Richard Biener  
> wrote:
> >
> > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches
> >  wrote:
> > >
> > > Hi,
> > > For the following test:
> > >
> > > svint32_t f(svint32_t v)
> > > {
> > >   return svrev_s32 (svrev_s32 (v));
> > > }
> > >
> > > We generate 2 rev instructions instead of nop:
> > > f:
> > > rev z0.s, z0.s
> > > rev z0.s, z0.s
> > > ret
> > >
> > > The attached patch tries to fix that by trying to recognize the following
> > > pattern in match.pd:
> > > v1 = VEC_PERM_EXPR (v0, v0, mask)
> > > v2 = VEC_PERM_EXPR (v1, v1, mask)
> > > -->
> > > v2 = v0
> > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... }
> > >
> > > Code-gen with patch:
> > > f:
> > > ret
> > >
> > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress.
> > > Does it look OK for stage-1 ?
> >
> > I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation 
> > should
> > handle two consecutive permutes with the is_combined_permutation_identity
> > which might need tweaking for VLA vectors
> Hi Richard,
> Thanks for the suggestions. The attached patch modifies
> is_combined_permutation_identity
> to recognize the above pattern.
> Does it look OK ?
> Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu.
Hi,
ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html

Thanks,
Prathamesh
>
> Thanks,
> Prathamesh
> >
> > Richard.
> >
> > >
> > > Thanks,
> > > Prathamesh


[PATCH][committed] aarch64: Delete __builtin_aarch64_neg* builtins and their use

2023-04-19 Thread Kyrylo Tkachov via Gcc-patches
Hi all,

I don't think we need to keep the __builtin_aarch64_neg* builtins around.
They are only used once in the vnegh_f16 intrinsic in arm_fp16.h and I AFAICT
it was added this way only for the sake of orthogonality in
https://gcc.gnu.org/g:d7f33f07d88984cbe769047e3d07fc21067fbba9
We already use normal "-" negation in the other vneg* intrinsics, so do so here 
as well.

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

* config/aarch64/aarch64-simd-builtins.def (neg): Delete builtins
definition.
* config/aarch64/arm_fp16.h (vnegh_f16): Reimplement using normal 
negation.


neg-remove.patch
Description: neg-remove.patch


Re: [PATCH] RISC-V: Allow VMS{Compare} (V1, V1) shortcut optimization

2023-04-19 Thread Kito Cheng via Gcc-patches
Hi Pan:

>rtx expand (function_expander &e) const override diff --git 
> a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 
> 0ecca98f20c..6819363b9ff 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -1032,6 +1032,7 @@ (define_insn_and_split "@pred_mov"
> [(match_operand:VB 1 "vector_all_trues_mask_operand" "Wc1, Wc1, 
> Wc1, Wc1, Wc1")
>  (match_operand 4 "vector_length_operand"" rK,  rK,  
> rK,  rK,  rK")
>  (match_operand 5 "const_int_operand""  i,   i,   
> i,   i,   i")
> +(match_operand 6 "const_int_operand""  i,   i,   
> i,   i,   i")

I didn't get why having one more tail policy operand for this pattern
could help?
Do you mind explaining more detail about this?

Thanks :)


[PATCH] Transform more gmp/mpfr uses to use RAII

2023-04-19 Thread Richard Biener via Gcc-patches
The following picks up the coccinelle generated patch from Bernhard,
leaving out the fortran frontend parts and fixing up the rest.
In particular both gmp.h and mpfr.h contain macros like
  #define mpfr_inf_p(_x)  ((_x)->_mpfr_exp == __MPFR_EXP_INF)
for which I add operator-> overloads to the auto_* classes.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

CCed Jonathan just to make sure I'm not doing anything stupid C++ wise
here.

* system.h (auto_mpz::operator->()): New.
* realmpfr.h (auto_mpfr::operator->()): New.
* builtins.cc (do_mpfr_lgamma_r): Use auto_mpfr.
* real.cc (real_from_string): Likewise.
(dconst_e_ptr): Likewise.
(dconst_sqrt2_ptr): Likewise.
* tree-ssa-loop-niter.cc (refine_value_range_using_guard):
Use auto_mpz.
(bound_difference_of_offsetted_base): Likewise.
(number_of_iterations_ne): Likewise.
(number_of_iterations_lt_to_ne): Likewise.
* ubsan.cc: Include realmpfr.h.
(ubsan_instrument_float_cast): Use auto_mpfr.
---
 gcc/builtins.cc|  4 +---
 gcc/real.cc| 22 +-
 gcc/realmpfr.h |  1 +
 gcc/system.h   |  1 +
 gcc/tree-ssa-loop-niter.cc | 29 -
 gcc/ubsan.cc   |  9 -
 6 files changed, 20 insertions(+), 46 deletions(-)

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 1bfdc598eec..80b8b89d98b 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -11084,15 +11084,13 @@ do_mpfr_lgamma_r (tree arg, tree arg_sg, tree type)
  const int prec = fmt->p;
  const mpfr_rnd_t rnd = fmt->round_towards_zero? MPFR_RNDZ : MPFR_RNDN;
  int inexact, sg;
- mpfr_t m;
  tree result_lg;
 
- mpfr_init2 (m, prec);
+ auto_mpfr m (prec);
  mpfr_from_real (m, ra, MPFR_RNDN);
  mpfr_clear_flags ();
  inexact = mpfr_lgamma (m, &sg, m, rnd);
  result_lg = do_mpfr_ckconv (m, type, inexact);
- mpfr_clear (m);
  if (result_lg)
{
  tree result_sg;
diff --git a/gcc/real.cc b/gcc/real.cc
index 126695bf2e2..cf164e5e945 100644
--- a/gcc/real.cc
+++ b/gcc/real.cc
@@ -2131,7 +2131,6 @@ real_from_string (REAL_VALUE_TYPE *r, const char *str)
 {
   /* Decimal floating point.  */
   const char *cstr = str;
-  mpfr_t m;
   bool inexact;
 
   while (*cstr == '0')
@@ -2148,21 +2147,15 @@ real_from_string (REAL_VALUE_TYPE *r, const char *str)
goto is_a_zero;
 
   /* Nonzero value, possibly overflowing or underflowing.  */
-  mpfr_init2 (m, SIGNIFICAND_BITS);
+  auto_mpfr m (SIGNIFICAND_BITS);
   inexact = mpfr_strtofr (m, str, NULL, 10, MPFR_RNDZ);
   /* The result should never be a NaN, and because the rounding is
 toward zero should never be an infinity.  */
   gcc_assert (!mpfr_nan_p (m) && !mpfr_inf_p (m));
   if (mpfr_zero_p (m) || mpfr_get_exp (m) < -MAX_EXP + 4)
-   {
- mpfr_clear (m);
- goto underflow;
-   }
+   goto underflow;
   else if (mpfr_get_exp (m) > MAX_EXP - 4)
-   {
- mpfr_clear (m);
- goto overflow;
-   }
+   goto overflow;
   else
{
  real_from_mpfr (r, m, NULL_TREE, MPFR_RNDZ);
@@ -2173,7 +2166,6 @@ real_from_string (REAL_VALUE_TYPE *r, const char *str)
  gcc_assert (r->cl == rvc_normal);
  /* Set a sticky bit if mpfr_strtofr was inexact.  */
  r->sig[0] |= inexact;
- mpfr_clear (m);
}
 }
 
@@ -2474,12 +2466,10 @@ dconst_e_ptr (void)
  These constants need to be given to at least 160 bits precision.  */
   if (value.cl == rvc_zero)
 {
-  mpfr_t m;
-  mpfr_init2 (m, SIGNIFICAND_BITS);
+  auto_mpfr m (SIGNIFICAND_BITS);
   mpfr_set_ui (m, 1, MPFR_RNDN);
   mpfr_exp (m, m, MPFR_RNDN);
   real_from_mpfr (&value, m, NULL_TREE, MPFR_RNDN);
-  mpfr_clear (m);
 
 }
   return &value;
@@ -2517,11 +2507,9 @@ dconst_sqrt2_ptr (void)
  These constants need to be given to at least 160 bits precision.  */
   if (value.cl == rvc_zero)
 {
-  mpfr_t m;
-  mpfr_init2 (m, SIGNIFICAND_BITS);
+  auto_mpfr m (SIGNIFICAND_BITS);
   mpfr_sqrt_ui (m, 2, MPFR_RNDN);
   real_from_mpfr (&value, m, NULL_TREE, MPFR_RNDN);
-  mpfr_clear (m);
 }
   return &value;
 }
diff --git a/gcc/realmpfr.h b/gcc/realmpfr.h
index 3824e62da19..a2b1bf6a2db 100644
--- a/gcc/realmpfr.h
+++ b/gcc/realmpfr.h
@@ -32,6 +32,7 @@ public:
   ~auto_mpfr () { mpfr_clear (m_mpfr); }
 
   operator mpfr_t& () { return m_mpfr; }
+  mpfr_ptr operator-> () { return m_mpfr; }
 
   auto_mpfr (const auto_mpfr &) = delete;
   auto_mpfr &operator= (const auto_mpfr &) = delete;
diff --git a/gcc/system.h b/gcc/system.h
index 65d514d3c01..c67bc42863f 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -709,6 +709,7 @@ public:
   ~auto_mpz () { mpz_clear (m_mpz); }
 
   

Re: Re: [PATCH] RISC-V: Allow VMS{Compare} (V1, V1) shortcut optimization

2023-04-19 Thread juzhe.zh...@rivai.ai
Since vms pattern has one more tail policy + avl_type operand,
wheras pred_mov (vmset.m/vmclr.m) only has avl_type operand.
GCC recognize (lt:(reg v) (reg v)) and lower it into (const_vector:0),
then map into pred_mov pattern (when both pred_mov and vms pattern has 
tail policy + avl_type operand).



juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2023-04-19 17:34
To: Li, Pan2
CC: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; Wang, 
Yanzhang
Subject: Re: [PATCH] RISC-V: Allow VMS{Compare} (V1, V1) shortcut optimization
Hi Pan:
 
>rtx expand (function_expander &e) const override diff --git 
> a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 
> 0ecca98f20c..6819363b9ff 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -1032,6 +1032,7 @@ (define_insn_and_split "@pred_mov"
> [(match_operand:VB 1 "vector_all_trues_mask_operand" "Wc1, Wc1, 
> Wc1, Wc1, Wc1")
>  (match_operand 4 "vector_length_operand"" rK,  rK,  
> rK,  rK,  rK")
>  (match_operand 5 "const_int_operand""  i,   i,   
> i,   i,   i")
> +(match_operand 6 "const_int_operand""  i,   i,   
> i,   i,   i")
 
I didn't get why having one more tail policy operand for this pattern
could help?
Do you mind explaining more detail about this?
 
Thanks :)
 


Re: Re: [PATCH] RISC-V: Allow VMS{Compare} (V1, V1) shortcut optimization

2023-04-19 Thread Kito Cheng via Gcc-patches
HI JuZhe:

Thanks for explaining!


Hi Pan:

I think that would be helpful if JuZhe's explaining that could be
written into the commit log.


> gcc/ChangeLog:
>
>* config/riscv/riscv-v.cc (emit_pred_op):
>* config/riscv/riscv-vector-builtins-bases.cc:
>* config/riscv/vector.md:

And don't forgot write some thing in ChangeLog...:P


[PATCH] Simplify gimple_assign_load

2023-04-19 Thread Richard Biener via Gcc-patches
The following simplifies and outlines gimple_assign_load.  In
particular it is not necessary to get at the base of the possibly
loaded expression but just handle the case of a single handled
component wrapping a non-memory operand.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* gimple.h (gimple_assign_load): Outline...
* gimple.cc (gimple_assign_load): ... here.  Avoid
get_base_address and instead just strip the outermost
handled component, treating a remaining handled component
as load.
---
 gcc/gimple.cc | 20 
 gcc/gimple.h  | 18 +-
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/gcc/gimple.cc b/gcc/gimple.cc
index 5e4eda417fb..e0ba42add39 100644
--- a/gcc/gimple.cc
+++ b/gcc/gimple.cc
@@ -1788,6 +1788,26 @@ gimple_assign_unary_nop_p (gimple *gs)
   == TYPE_MODE (TREE_TYPE (gimple_assign_rhs1 (gs);
 }
 
+/* Return true if GS is an assignment that loads from its rhs1.  */
+
+bool
+gimple_assign_load_p (const gimple *gs)
+{
+  tree rhs;
+  if (!gimple_assign_single_p (gs))
+return false;
+  rhs = gimple_assign_rhs1 (gs);
+  if (TREE_CODE (rhs) == WITH_SIZE_EXPR)
+return true;
+  if (handled_component_p (rhs))
+rhs = TREE_OPERAND (rhs, 0);
+  return (handled_component_p (rhs)
+ || DECL_P (rhs)
+ || TREE_CODE (rhs) == MEM_REF
+ || TREE_CODE (rhs) == TARGET_MEM_REF);
+}
+
+
 /* Set BB to be the basic block holding G.  */
 
 void
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 081d18e425a..daf55242f68 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -1629,6 +1629,7 @@ tree gimple_call_nonnull_arg (gcall *);
 bool gimple_assign_copy_p (gimple *);
 bool gimple_assign_ssa_name_copy_p (gimple *);
 bool gimple_assign_unary_nop_p (gimple *);
+bool gimple_assign_load_p (const gimple *);
 void gimple_set_bb (gimple *, basic_block);
 void gimple_assign_set_rhs_from_tree (gimple_stmt_iterator *, tree);
 void gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *, enum tree_code,
@@ -2952,23 +2953,6 @@ gimple_store_p (const gimple *gs)
   return lhs && !is_gimple_reg (lhs);
 }
 
-/* Return true if GS is an assignment that loads from its rhs1.  */
-
-inline bool
-gimple_assign_load_p (const gimple *gs)
-{
-  tree rhs;
-  if (!gimple_assign_single_p (gs))
-return false;
-  rhs = gimple_assign_rhs1 (gs);
-  if (TREE_CODE (rhs) == WITH_SIZE_EXPR)
-return true;
-  rhs = get_base_address (rhs);
-  return (DECL_P (rhs)
- || TREE_CODE (rhs) == MEM_REF || TREE_CODE (rhs) == TARGET_MEM_REF);
-}
-
-
 /* Return true if S is a type-cast assignment.  */
 
 inline bool
-- 
2.35.3


[PATCH v8] RISC-V: Add the 'zfa' extension, version 0.2.

2023-04-19 Thread Jin Ma via Gcc-patches
This patch adds the 'Zfa' extension for riscv, which is based on:
  https://github.com/riscv/riscv-isa-manual/commits/zfb
  
https://github.com/riscv/riscv-isa-manual/commit/1f038182810727f5feca311072e630d6baac51da

The binutils-gdb for 'Zfa' extension:
  https://github.com/a4lg/binutils-gdb/commits/riscv-zfa

What needs special explanation is:
1, The immediate number of the instructions FLI.H/S/D is represented in the 
assembly as a
  floating-point value, with scientific counting when rs1 is 1,2, and decimal 
numbers for
  the rest.

  Related llvm link:
https://reviews.llvm.org/D145645
  Related discussion link:
https://github.com/riscv/riscv-isa-manual/issues/980

2, According to riscv-spec, "The FCVTMO D.W.D instruction was added principally 
to
  accelerate the processing of JavaScript Numbers.", so it seems that no 
implementation
  is required.

3, The instructions FMINM and FMAXM correspond to C23 library function fminimum 
and fmaximum.
  Therefore, this patch has simply implemented the pattern of fminm3 
and
  fmaxm3 to prepare for later.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: Add zfa extension version.
* config/riscv/constraints.md (Zf): Constrain the floating point number 
that the
instructions FLI.H/S/D can load.
((TARGET_XTHEADFMV || TARGET_ZFA) ? FP_REGS : NO_REGS): enable FMVP.D.X 
and FMVH.X.D.
* config/riscv/iterators.md (ceil): New.
* config/riscv/riscv-protos.h (riscv_float_const_rtx_index_for_fli): 
New.
* config/riscv/riscv.cc (find_index_in_array): New.
(riscv_float_const_rtx_index_for_fli): Get the index of the 
floating-point number that
the instructions FLI.H/S/D can mov.
(riscv_cannot_force_const_mem): If instruction FLI.H/S/D can be used, 
memory is not applicable.
(riscv_const_insns): The cost of FLI.H/S/D is 3.
(riscv_legitimize_const_move): Likewise.
(riscv_split_64bit_move_p): If instruction FLI.H/S/D can be used, no 
split is required.
(riscv_output_move): Output the mov instructions in zfa extension.
(riscv_print_operand): Output the floating-point value of the FLI.H/S/D 
immediate in assembly
(riscv_secondary_memory_needed): Likewise.
* config/riscv/riscv.h (GP_REG_RTX_P): New.
* config/riscv/riscv.md (fminm3): New.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zfa-fleq-fltq-rv32.c: New test.
* gcc.target/riscv/zfa-fleq-fltq.c: New test.
* gcc.target/riscv/zfa-fli-rv32.c: New test.
* gcc.target/riscv/zfa-fli-zfh-rv32.c: New test.
* gcc.target/riscv/zfa-fli-zfh.c: New test.
* gcc.target/riscv/zfa-fli.c: New test.
* gcc.target/riscv/zfa-fmovh-fmovp-rv32.c: New test.
* gcc.target/riscv/zfa-fround-rv32.c: New test.
* gcc.target/riscv/zfa-fround.c: New test.
---
 gcc/common/config/riscv/riscv-common.cc   |   4 +
 gcc/config/riscv/constraints.md   |  11 +-
 gcc/config/riscv/iterators.md |   5 +
 gcc/config/riscv/riscv-opts.h |   3 +
 gcc/config/riscv/riscv-protos.h   |   1 +
 gcc/config/riscv/riscv.cc | 168 +-
 gcc/config/riscv/riscv.h  |   1 +
 gcc/config/riscv/riscv.md | 112 +---
 .../gcc.target/riscv/zfa-fleq-fltq-rv32.c |  19 ++
 .../gcc.target/riscv/zfa-fleq-fltq.c  |  19 ++
 gcc/testsuite/gcc.target/riscv/zfa-fli-rv32.c |  79 
 .../gcc.target/riscv/zfa-fli-zfh-rv32.c   |  41 +
 gcc/testsuite/gcc.target/riscv/zfa-fli-zfh.c  |  41 +
 gcc/testsuite/gcc.target/riscv/zfa-fli.c  |  79 
 .../gcc.target/riscv/zfa-fmovh-fmovp-rv32.c   |  10 ++
 .../gcc.target/riscv/zfa-fround-rv32.c|  42 +
 gcc/testsuite/gcc.target/riscv/zfa-fround.c   |  42 +
 17 files changed, 652 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fleq-fltq-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fleq-fltq.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-zfh-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-zfh.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fmovh-fmovp-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fround-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fround.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 309a52def75..f9fce6bcc38 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -217,6 +217,8 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zfh",   ISA_SPEC_CLASS_NONE, 1, 0},
   {"zfhmin",ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"zfa", ISA_SPEC_CLASS_NONE, 0, 2},
+
   {"zmm

Re: [PATCH] i386: Add new pattern for zero-extend cmov

2023-04-19 Thread Uros Bizjak via Gcc-patches
On Wed, Apr 19, 2023 at 1:33 AM Andrew Pinski via Gcc-patches
 wrote:
>
> After a phiopt change, I got a failure of cmov9.c.
> The RTL IR has zero_extend on the outside of
> the if_then_else rather than on the side. Both
> ways are considered canonical as mentioned in
> PR 66588.
>
> This fixes the failure I got and also adds a testcase
> which fails before even my phiopt patch but will pass
> with this patch.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with
> no regressions.
>
> gcc/ChangeLog:
>
> * config/i386/i386.md (*movsicc_noc_zext_1): New pattern.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/cmov10.c: New test.
> * gcc.target/i386/cmov11.c: New test.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.md| 16 
>  gcc/testsuite/gcc.target/i386/cmov10.c | 10 ++
>  gcc/testsuite/gcc.target/i386/cmov11.c | 10 ++
>  3 files changed, 36 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/cmov10.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/cmov11.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 1419ea4cff3..10f15b1e8a8 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -21959,6 +21959,22 @@ (define_insn "*movsicc_noc_zext"
>[(set_attr "type" "icmov")
> (set_attr "mode" "SI")])
>
> +(define_insn "*movsicc_noc_zext_1"
> +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r")
> +   (zero_extend:DI
> + (if_then_else:SI (match_operator 1 "ix86_comparison_operator"
> +[(reg FLAGS_REG) (const_int 0)])
> +(match_operand:SI 2 "nonimmediate_operand" "rm,0")
> +(match_operand:SI 3 "nonimmediate_operand" "0,rm"]
> +  "TARGET_64BIT
> +   && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
> +  "@
> +   cmov%O2%C1\t{%2, %k0|%k0, %2}
> +   cmov%O2%c1\t{%3, %k0|%k0, %3}"
> +  [(set_attr "type" "icmov")
> +   (set_attr "mode" "SI")])
> +
> +
>  ;; Don't do conditional moves with memory inputs.  This splitter helps
>  ;; register starved x86_32 by forcing inputs into registers before reload.
>  (define_split
> diff --git a/gcc/testsuite/gcc.target/i386/cmov10.c 
> b/gcc/testsuite/gcc.target/i386/cmov10.c
> new file mode 100644
> index 000..9ba23b191fb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/cmov10.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -dp" } */
> +/* { dg-final { scan-assembler-not "zero_extendsidi" } } */
> +
> +
> +void foo (unsigned long long *d, int a, unsigned int b, unsigned int c)
> +{
> +  *d = a ? b : c;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/i386/cmov11.c 
> b/gcc/testsuite/gcc.target/i386/cmov11.c
> new file mode 100644
> index 000..ba8a5e692b1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/cmov11.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -dp" } */
> +/* { dg-final { scan-assembler-not "zero_extendsidi" } } */
> +
> +unsigned long long foo (int a, unsigned b, unsigned  c)
> +{
> +  unsigned t = a ? b : c;
> +  return t;
> +}
> +
> --
> 2.31.1
>


Pushed: [PATCH] LoongArch: Improve GAR store for va_list

2023-04-19 Thread Xi Ruoyao via Gcc-patches
On Tue, 2023-04-18 at 20:03 +0800, Lulu Cheng wrote:
> 
> 在 2023/4/18 下午7:48, Xi Ruoyao 写道:
> > On Tue, 2023-04-18 at 19:21 +0800, Lulu Cheng wrote:
> > > 在 2023/4/18 下午5:27, Xi Ruoyao 写道:
> > > > On Mon, 2023-04-10 at 17:45 +0800, Lulu Cheng wrote:
> > > > > Sorry, it's my question. I still have some questions that I
> > > > > haven't
> > > > > understood, so I haven't replied to the email yet.:-(
> > > > I've verified the value of cfun->va_list_gpr_size with -fdump-
> > > > tree-
> > > > stdarg and various testcases (including extracting aggregates
> > > > and
> > > > floating-point values in the va list) and the result seems
> > > > correct.  And
> > > > gcc/testsuite/gcc.c-torture/execute/va-arg-*.c should provide a
> > > > good
> > > > enough test coverage.
> > > > 
> > > > Is there still something seemly problematic?
> > > 
> > > I think there is no problem with the code modification, but I
> > > found that
> > > the $r12 register is stored whether or not this patch is added. I
> > > don't
> > > understand why.:-(
> > It has been stored before the change:
> > 
> > test:
> > .LFB0 = .
> > .cfi_startproc
> > addi.d  $r3,$r3,-80
> > .cfi_def_cfa_offset 80
> > addi.d  $r12,$r3,24
> > st.d$r5,$r3,24
> > st.d$r6,$r3,32
> > st.d$r7,$r3,40
> > st.d$r8,$r3,48
> > st.d$r9,$r3,56
> > st.d$r10,$r3,64
> > st.d$r11,$r3,72
> > st.d$r12,$r3,8    # <=
> > add.w   $r4,$r5,$r4
> > addi.d  $r3,$r3,80
> > .cfi_def_cfa_offset 0
> > jr  $r1
> > .cfi_endproc
> > 
> > AFAIK it's related to how the variable arguments are implemented in
> > general.  The problem is when we expands __builtin_va_list or
> > __builtin_va_arg, the registers containing the variable arguments
> > and
> > the pointer to the variable argument store area (r12 in this case)
> > may
> > be already clobbered, so the compiler have to store them expanding
> > the
> > prologue of the function (when the prologue is expanded we don't
> > know if
> > the following code will clobber the registers).
> > 
> > This also causes a difficulty to avoid saving the GARs for *used*
> > variable arguments as well.
> > 
> > On x86_64 we have the same issue:
> > 
> > test:
> > .LFB0:
> > .cfi_startproc
> > leaq8(%rsp), %rax
> > movq%rsi, -40(%rsp)
> > movq%rax, -64(%rsp)    # <=
> > leaq-48(%rsp), %rax
> > movq%rax, -56(%rsp)
> > movl-40(%rsp), %eax
> > movl$8, -72(%rsp)
> > addl%edi, %eax
> > ret
> > .cfi_endproc
> > 
> > I'll try to remove all of these in the GCC 14 development cycle (as
> > they
> > are causing sub-optimal code in various Glibc functions), but it's
> > not
> > easy...
> > 
> > 
> Ok, I have no more questions.
> 
Pushed r14-69.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Pushed: [GCC14 PATCH] LoongArch: Improve cpymemsi expansion [PR109465]

2023-04-19 Thread Xi Ruoyao via Gcc-patches
On Wed, 2023-04-19 at 11:03 +0800, Lulu Cheng wrote:

/* snip */

> > +loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT
> > length,
> > +  HOST_WIDE_INT delta)
> >   {
> > -  HOST_WIDE_INT offset, delta;
> > -  unsigned HOST_WIDE_INT bits;
> > +  HOST_WIDE_INT offs, delta_cur;
> >     int i;
> >     machine_mode mode;
> >     rtx *regs;
> >   
> > -  bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN
> > (dest)));
> > -
> > -  mode = int_mode_for_size (bits, 0).require ();
> > -  delta = bits / BITS_PER_UNIT;
> > +  HOST_WIDE_INT num_reg = length / delta;
> 
> I think comments need to be added here, if it is not chasing the code,
> it is not easy to understand.

Pushed r14-70 with the following addition:

+  /* Calculate how many registers we'll need for the block move.
+ We'll emit length / delta move operations with delta as the size
+ first.  Then we may still have length % delta bytes not copied.
+ We handle these remaining bytes by move operations with smaller
+ (halfed) sizes.  For example, if length = 21 and delta = 8, we'll
+ emit two ld.d/st.d pairs, one ld.w/st.w pair, and one ld.b/st.b
+ pair.  For each load/store pair we use a dedicated register to keep
+ the pipeline as populated as possible.  */

> Otherwise LGTM!
> 
> Thanks!
> 
> > +  for (delta_cur = delta / 2; delta_cur != 0; delta_cur /= 2)
> > +    num_reg += !!(length & delta_cur);
> >   
> >     /* Allocate a buffer for the temporary registers.  */
> > -  regs = XALLOCAVEC (rtx, length / delta);
> > +  regs = XALLOCAVEC (rtx, num_reg);
> >   
> > -  /* Load as many BITS-sized chunks as possible.  Use a normal load
> > if
> > - the source has enough alignment, otherwise use left/right
> > pairs.  */
> > -  for (offset = 0, i = 0; offset + delta <= length; offset +=
> > delta, i++)
> > +  for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur
> > /= 2)
> >   {
> > -  regs[i] = gen_reg_rtx (mode);
> > -  loongarch_emit_move (regs[i], adjust_address (src, mode,
> > offset));
> > -    }
> > +  mode = int_mode_for_size (delta_cur * BITS_PER_UNIT,
> > 0).require ();
> >   
> > -  for (offset = 0, i = 0; offset + delta <= length; offset +=
> > delta, i++)
> > -    loongarch_emit_move (adjust_address (dest, mode, offset),
> > regs[i]);
> > +  for (; offs + delta_cur <= length; offs += delta_cur, i++)
> > +   {
> > + regs[i] = gen_reg_rtx (mode);
> > + loongarch_emit_move (regs[i], adjust_address (src, mode,
> > offs));
> > +   }
> > +    }
> >   
> > -  /* Mop up any left-over bytes.  */
> > -  if (offset < length)
> > +  for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur
> > /= 2)
> >   {
> > -  src = adjust_address (src, BLKmode, offset);
> > -  dest = adjust_address (dest, BLKmode, offset);
> > -  move_by_pieces (dest, src, length - offset,
> > - MIN (MEM_ALIGN (src), MEM_ALIGN (dest)),
> > - (enum memop_ret) 0);
> > +  mode = int_mode_for_size (delta_cur * BITS_PER_UNIT,
> > 0).require ();
> > +
> > +  for (; offs + delta_cur <= length; offs += delta_cur, i++)
> > +   loongarch_emit_move (adjust_address (dest, mode, offs),
> > regs[i]);
> >   }
> >   }
> >   
> > @@ -4523,10 +4520,11 @@ loongarch_adjust_block_mem (rtx mem,
> > HOST_WIDE_INT length, rtx *loop_reg,
> >   
> >   static void
> >   loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT
> > length,
> > -  HOST_WIDE_INT bytes_per_iter)
> > +  HOST_WIDE_INT align)
> >   {
> >     rtx_code_label *label;
> >     rtx src_reg, dest_reg, final_src, test;
> > +  HOST_WIDE_INT bytes_per_iter = align *
> > LARCH_MAX_MOVE_OPS_PER_LOOP_ITER;
> >     HOST_WIDE_INT leftover;
> >   
> >     leftover = length % bytes_per_iter;
> > @@ -4546,7 +4544,7 @@ loongarch_block_move_loop (rtx dest, rtx src,
> > HOST_WIDE_INT length,
> >     emit_label (label);
> >   
> >     /* Emit the loop body.  */
> > -  loongarch_block_move_straight (dest, src, bytes_per_iter);
> > +  loongarch_block_move_straight (dest, src, bytes_per_iter, align);
> >   
> >     /* Move on to the next block.  */
> >     loongarch_emit_move (src_reg,
> > @@ -4563,7 +4561,7 @@ loongarch_block_move_loop (rtx dest, rtx src,
> > HOST_WIDE_INT length,
> >   
> >     /* Mop up any left-over bytes.  */
> >     if (leftover)
> > -    loongarch_block_move_straight (dest, src, leftover);
> > +    loongarch_block_move_straight (dest, src, leftover, align);
> >     else
> >   /* Temporary fix for PR79150.  */
> >   emit_insn (gen_nop ());
> > @@ -4573,25 +4571,32 @@ loongarch_block_move_loop (rtx dest, rtx
> > src, HOST_WIDE_INT length,
> >  memory reference SRC to memory reference DEST.  */
> >   
> >   bool
> > -loongarch_expand_block_move (rtx dest, rtx src, rtx length)
> > +loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx
> > r_align)
> >

[PATCH] Avoid repeated forwarder_block_p calls in CFG cleanup

2023-04-19 Thread Richard Biener via Gcc-patches
CFG cleanup maintains BB_FORWARDER_BLOCK and uses FORWARDER_BLOCK_P
to check that apart from two places which use forwarder_block_p
in outgoing_edges_match alongside many BB_FORWARDER_BLOCK uses.

The following adjusts those.

Bootstrapped and tested on x86_64-unknown-linux-gnu, I verified
cc1files produce the same code with/without this patch.

OK?

Thanks,
Richard.

* cfgcleanup.cc (outgoing_edges_match): Use FORWARDER_BLOCK_P.
---
 gcc/cfgcleanup.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/cfgcleanup.cc b/gcc/cfgcleanup.cc
index 78f59e99653..5f1aa29c89d 100644
--- a/gcc/cfgcleanup.cc
+++ b/gcc/cfgcleanup.cc
@@ -1861,9 +1861,9 @@ outgoing_edges_match (int mode, basic_block bb1, 
basic_block bb2)
   /* fallthru edges must be forwarded to the same destination.  */
   if (fallthru1)
 {
-  basic_block d1 = (forwarder_block_p (fallthru1->dest)
+  basic_block d1 = (FORWARDER_BLOCK_P (fallthru1->dest)
? single_succ (fallthru1->dest): fallthru1->dest);
-  basic_block d2 = (forwarder_block_p (fallthru2->dest)
+  basic_block d2 = (FORWARDER_BLOCK_P (fallthru2->dest)
? single_succ (fallthru2->dest): fallthru2->dest);
 
   if (d1 != d2)
-- 
2.35.3


RE: Re: [PATCH] RISC-V: Allow VMS{Compare} (V1, V1) shortcut optimization

2023-04-19 Thread Li, Pan2 via Gcc-patches
Sure thing.

For Changlog, I consider it was generated automatically in previous. LOL.

Pan

-Original Message-
From: Kito Cheng  
Sent: Wednesday, April 19, 2023 5:46 PM
To: juzhe.zh...@rivai.ai
Cc: Li, Pan2 ; gcc-patches ; 
Kito.cheng ; Wang, Yanzhang 
Subject: Re: Re: [PATCH] RISC-V: Allow VMS{Compare} (V1, V1) shortcut 
optimization

HI JuZhe:

Thanks for explaining!


Hi Pan:

I think that would be helpful if JuZhe's explaining that could be written into 
the commit log.


> gcc/ChangeLog:
>
>* config/riscv/riscv-v.cc (emit_pred_op):
>* config/riscv/riscv-vector-builtins-bases.cc:
>* config/riscv/vector.md:

And don't forgot write some thing in ChangeLog...:P


[PATCH] RISC-V: Fix bug of PR109535

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

PR 109535

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (count_regno_occurrences): New function.
(pass_vsetvl::cleanup_insns): Fix bug.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/pr109535.C: New test.
* gcc.target/riscv/rvv/base/pr109535.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc  |  14 +-
 .../g++.target/riscv/rvv/base/pr109535.C  | 144 ++
 .../gcc.target/riscv/rvv/base/pr109535.c  |  11 ++
 3 files changed, 168 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr109535.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 1b66e3b9eeb..9c356ce5157 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1592,6 +1592,18 @@ backward_propagate_worthwhile_p (const basic_block 
cfg_bb,
   return true;
 }
 
+/* Count the number of REGNO in RINSN.  */
+static int
+count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
+{
+  int count = 0;
+  extract_insn (rinsn);
+  for (int i = 0; i < recog_data.n_operands; i++)
+if (refers_to_regno_p (regno, recog_data.operand[i]))
+  count++;
+  return count;
+}
+
 avl_info::avl_info (const avl_info &other)
 {
   m_value = other.get_value ();
@@ -3924,7 +3936,7 @@ pass_vsetvl::cleanup_insns (void) const
  if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn)))
continue;
  rtx avl = get_vl (rinsn);
- if (count_occurrences (PATTERN (rinsn), avl, 0) == 1)
+ if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
{
  /* Get the list of uses for the new instruction.  */
  auto attempt = crtl->ssa->new_change_attempt ();
diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C
new file mode 100644
index 000..7013cfcf4ee
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C
@@ -0,0 +1,144 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+typedef long size_t;
+typedef signed char int8_t;
+typedef  char uint8_t
+
+;
+template < typename > struct Relations{ using Unsigned = uint8_t; };
+template < typename T > using MakeUnsigned = typename Relations< T >::Unsigned;
+#pragma riscv intrinsic "vector"
+size_t ScaleByPower() {  return 0;}
+template < typename Lane, size_t , int > struct Simd {
+using T = Lane;
+
+template < typename NewT > using Rebind = Simd< NewT, 1, 0 >;
+};
+template < typename T > struct ClampNAndPow2 {
+using type = Simd< T, 65536, 0 >
+;
+};
+struct CappedTagChecker {
+using type = ClampNAndPow2< signed char >::type;
+};
+template < typename , size_t , int >
+using CappedTag = CappedTagChecker::type;
+template < class D > using TFromD = typename D::T;
+template < class T, class D > using Rebind = typename D::Rebind< T >;
+template < class D >
+using RebindToUnsigned = Rebind< MakeUnsigned<  D  >, D >;
+template < size_t N >
+size_t
+Lanes(Simd< uint8_t, N, 0 > ) {
+size_t kFull = 0;
+size_t kCap ;
+size_t actual =
+__riscv_vsetvl_e8m1(kCap);
+return actual;
+}
+template < size_t N >
+size_t
+Lanes(Simd< int8_t, N, 0 > ) {
+size_t kFull  ;
+size_t kCap ;
+size_t actual =
+__riscv_vsetvl_e8m1(kCap);
+return actual;
+}
+template < size_t N >
+vuint8m1_t
+Set(Simd< uint8_t, N, 0 > d, uint8_t arg) {
+size_t __trans_tmp_1 = Lanes(d);
+return __riscv_vmv_v_x_u8m1(arg, __trans_tmp_1);
+}
+template < size_t N >
+vint8m1_t Set(Simd< int8_t, N, 0 > , int8_t );
+template < class D > using VFromD = decltype(Set(D(), TFromD< D >()));
+template < class D >
+VFromD< D >
+Zero(D )
+;
+
+template < size_t N >
+vint8m1_t
+BitCastFromByte(Simd< int8_t, N, 0 >, vuint8m1_t v) {
+return __riscv_vreinterpret_v_u8m1_i8m1(v);
+}
+template < class D, class FromV >
+VFromD< D >
+BitCast(D d, FromV v) {
+return BitCastFromByte(d, v)
+
+;
+}
+template < size_t N >
+void
+Store(vint8m1_t v, Simd< int8_t, N, 0 > d) {
+int8_t *p ;
+__riscv_vse8_v_i8m1(p, v, Lanes(d));
+}
+template < class V, class D >
+void
+StoreU(V v, D d) {
+Store(v, d)
+;
+}
+template < class D > using Vec = decltype(Zero(D()));
+size_t Generate_count;
+template < class D, class Func>
+void Generate(D d, Func func) {
+RebindToUnsigned< D > du
+;
+size_t N = Lanes(d);
+Vec< decltype(du) > vidx ;
+for (; ; ) {
+   StoreU(func(d, vidx), d);
+   vidx = (Set(du, N));
+}
+}
+template < typename T, int , int kMinArg, class Test, int kPow2 >
+struct ForeachCappedR {
+static void Do(size_t , size_t ) {
+   CappedTag< T, kMinArg, kPow2 > d;
+   Test()(T(), d);
+}
+};
+template < class > struct ForeachCountAndMisalign;
+struct TestGenerate;
+template < int kPow2 = 1 > class ForExtendableVectors {
+public:
+
+template < typename T > void operator()(T) {
+   size_t max_lanes  ;
+   ForeachCappedR< T, 0, size_t{} ,
+  

[PATCH] RISC-V: Fix bug of PR109535

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

Testcase coming from Kito.

Co-authored-by: kito-cheng 
Co-authored-by: kito-cheng 

PR 109535

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (count_regno_occurrences): New function.
(pass_vsetvl::cleanup_insns): Fix bug.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/pr109535.C: New test.
* gcc.target/riscv/rvv/base/pr109535.c: New test.

Signed-off-by: Ju-Zhe Zhong 
Co-authored-by: kito-cheng 
Co-authored-by: kito-cheng 

---
 gcc/config/riscv/riscv-vsetvl.cc  |  14 +-
 .../g++.target/riscv/rvv/base/pr109535.C  | 144 ++
 .../gcc.target/riscv/rvv/base/pr109535.c  |  11 ++
 3 files changed, 168 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr109535.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 1b66e3b9eeb..9c356ce5157 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1592,6 +1592,18 @@ backward_propagate_worthwhile_p (const basic_block 
cfg_bb,
   return true;
 }
 
+/* Count the number of REGNO in RINSN.  */
+static int
+count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
+{
+  int count = 0;
+  extract_insn (rinsn);
+  for (int i = 0; i < recog_data.n_operands; i++)
+if (refers_to_regno_p (regno, recog_data.operand[i]))
+  count++;
+  return count;
+}
+
 avl_info::avl_info (const avl_info &other)
 {
   m_value = other.get_value ();
@@ -3924,7 +3936,7 @@ pass_vsetvl::cleanup_insns (void) const
  if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn)))
continue;
  rtx avl = get_vl (rinsn);
- if (count_occurrences (PATTERN (rinsn), avl, 0) == 1)
+ if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
{
  /* Get the list of uses for the new instruction.  */
  auto attempt = crtl->ssa->new_change_attempt ();
diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C
new file mode 100644
index 000..7013cfcf4ee
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C
@@ -0,0 +1,144 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+typedef long size_t;
+typedef signed char int8_t;
+typedef  char uint8_t
+
+;
+template < typename > struct Relations{ using Unsigned = uint8_t; };
+template < typename T > using MakeUnsigned = typename Relations< T >::Unsigned;
+#pragma riscv intrinsic "vector"
+size_t ScaleByPower() {  return 0;}
+template < typename Lane, size_t , int > struct Simd {
+using T = Lane;
+
+template < typename NewT > using Rebind = Simd< NewT, 1, 0 >;
+};
+template < typename T > struct ClampNAndPow2 {
+using type = Simd< T, 65536, 0 >
+;
+};
+struct CappedTagChecker {
+using type = ClampNAndPow2< signed char >::type;
+};
+template < typename , size_t , int >
+using CappedTag = CappedTagChecker::type;
+template < class D > using TFromD = typename D::T;
+template < class T, class D > using Rebind = typename D::Rebind< T >;
+template < class D >
+using RebindToUnsigned = Rebind< MakeUnsigned<  D  >, D >;
+template < size_t N >
+size_t
+Lanes(Simd< uint8_t, N, 0 > ) {
+size_t kFull = 0;
+size_t kCap ;
+size_t actual =
+__riscv_vsetvl_e8m1(kCap);
+return actual;
+}
+template < size_t N >
+size_t
+Lanes(Simd< int8_t, N, 0 > ) {
+size_t kFull  ;
+size_t kCap ;
+size_t actual =
+__riscv_vsetvl_e8m1(kCap);
+return actual;
+}
+template < size_t N >
+vuint8m1_t
+Set(Simd< uint8_t, N, 0 > d, uint8_t arg) {
+size_t __trans_tmp_1 = Lanes(d);
+return __riscv_vmv_v_x_u8m1(arg, __trans_tmp_1);
+}
+template < size_t N >
+vint8m1_t Set(Simd< int8_t, N, 0 > , int8_t );
+template < class D > using VFromD = decltype(Set(D(), TFromD< D >()));
+template < class D >
+VFromD< D >
+Zero(D )
+;
+
+template < size_t N >
+vint8m1_t
+BitCastFromByte(Simd< int8_t, N, 0 >, vuint8m1_t v) {
+return __riscv_vreinterpret_v_u8m1_i8m1(v);
+}
+template < class D, class FromV >
+VFromD< D >
+BitCast(D d, FromV v) {
+return BitCastFromByte(d, v)
+
+;
+}
+template < size_t N >
+void
+Store(vint8m1_t v, Simd< int8_t, N, 0 > d) {
+int8_t *p ;
+__riscv_vse8_v_i8m1(p, v, Lanes(d));
+}
+template < class V, class D >
+void
+StoreU(V v, D d) {
+Store(v, d)
+;
+}
+template < class D > using Vec = decltype(Zero(D()));
+size_t Generate_count;
+template < class D, class Func>
+void Generate(D d, Func func) {
+RebindToUnsigned< D > du
+;
+size_t N = Lanes(d);
+Vec< decltype(du) > vidx ;
+for (; ; ) {
+   StoreU(func(d, vidx), d);
+   vidx = (Set(du, N));
+}
+}
+template < typename T, int , int kMinArg, class Test, int kPow2 >
+struct ForeachCappedR {
+static void Do(size_t , size_t ) {
+   CappedTag< T, kMinArg, kPow2 > d;
+   Test()(T(), d);
+}
+};
+template < class > struct ForeachCountAndMisalign;
+struct TestGenerate;
+template < int

RE: [PATCH 2/3]middle-end match.pd: simplify debug dump checks

2023-04-19 Thread Tamar Christina via Gcc-patches
> -Original Message-
> From: Richard Biener 
> Sent: Tuesday, April 18, 2023 11:48 AM
> To: Tamar Christina 
> Cc: gcc-patches@gcc.gnu.org; nd ; rguent...@suse.de;
> j...@ventanamicro.com
> Subject: Re: [PATCH 2/3]middle-end match.pd: simplify debug dump checks
> 
> On Tue, Apr 18, 2023 at 12:22 PM Tamar Christina via Gcc-patches  patc...@gcc.gnu.org> wrote:
> >
> > Hi All,
> >
> > This is a small improvement in QoL codegen for match.pd to save time
> > not re-evaluating the condition for printing debug information in every
> function.
> >
> > There is a small but consistent runtime and compile time win here.
> > The runtime win comes from not having to do the condition over again,
> > and on Arm plaforms we now use the new test-and-branch support for
> > booleans to only have a single instruction here.
> >
> > Compile time win is gotten from not having to do all the string
> > parsing for the printf and having less string interning to do.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> 
> Ugh, I don't like the new global very much.  Can't we compute it in the 
> toplevel
> entry and pass it down as parameter?  Like passing down the actual dump FILE
> *?

Yeah that would work too, will do.

> 
> The file output in output_line_directive was because we originally had
> match.pd #includeing multiple match-*.pd files, we'd want to keep that
> supported I think.  But since the line directives are commented and there's 
> the
> same info available below, like
> 
> /* #line 798 "/home/rguenther/src/gcc-13-branch/gcc/match.pd" */
>   tree captures[2] ATTRIBUTE_UNUSED = { _p0, _p1 };
>   if (UNLIKELY (dump_file && (dump_flags &
> TDF_FOLDING))) fprintf (dump_file, "Matching expression %s:%d, %s:%d\n",
> "match.pd", 798, __FILE__, __LINE__);
> 
> there's probably no point in emitting them anymore (originally I emitted them
> non-commented but that didn't improve debugging much).  We might want
> to emit more "proper" line directives for the natively copied parts of 
> match.pd
> when code-generating c_expr parts, but that would be something separate.
> 
> Can you split the patch into two things?  A patch removing output of the
> commented line directives at the call sites is OK.

Sure, I'll hold up respinning waiting on the 3rd patch review since this one 
will change
that one as well, so easier to handle all comments at once.

Thanks for the review,
Tamar
> 
> Richard.
> 
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > PR bootstrap/84402
> > * dumpfile.h (dump_folding_p): New.
> > * dumpfile.cc (set_dump_file): Use it.
> > * generic-match-head.cc (dump_debug): New.
> > * gimple-match-head.cc (dump_debug): New.
> > * genmatch.cc (output_line_directive):  Support outputting only line
> > because file is implied.
> > (dt_simplify::gen_1): Call debug_dump instead of printf.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/dumpfile.h b/gcc/dumpfile.h index
> >
> 7d5eca899dcc98676a9ce7a7efff8e439854ff89..e7b595ddecdcca9983d958
> 4b8b24
> > 17ae1941c7d4 100644
> > --- a/gcc/dumpfile.h
> > +++ b/gcc/dumpfile.h
> > @@ -522,6 +522,7 @@ parse_dump_option (const char *, const char **);
> > extern FILE *dump_file;  extern dump_flags_t dump_flags;  extern const
> > char *dump_file_name;
> > +extern bool dump_folding_p;
> >
> >  extern bool dumps_are_enabled;
> >
> > diff --git a/gcc/dumpfile.cc b/gcc/dumpfile.cc index
> >
> 51f68c8c6b40051ba3125c84298ee44ca52f5d17..f805aa73f3aa244d84714
> 9eec265
> > 05181ce4efe8 100644
> > --- a/gcc/dumpfile.cc
> > +++ b/gcc/dumpfile.cc
> > @@ -63,6 +63,7 @@ FILE *dump_file = NULL;  const char *dump_file_name;
> > dump_flags_t dump_flags;  bool dumps_are_enabled = false;
> > +bool dump_folding_p = false;
> >
> >
> >  /* Set global "dump_file" to NEW_DUMP_FILE, refreshing the
> "dumps_are_enabled"
> > @@ -73,6 +74,7 @@ set_dump_file (FILE *new_dump_file)  {
> >dumpfile_ensure_any_optinfo_are_flushed ();
> >dump_file = new_dump_file;
> > +  dump_folding_p = dump_file && (dump_flags & TDF_FOLDING);
> >dump_context::get ().refresh_dumps_are_enabled ();  }
> >
> > diff --git a/gcc/generic-match-head.cc b/gcc/generic-match-head.cc
> > index
> >
> f011204c5be450663231bdece0596317b37f9f9b..16b8f9f3b61d3d5651a5
> a41a8c05
> > 52f50b55cc7c 100644
> > --- a/gcc/generic-match-head.cc
> > +++ b/gcc/generic-match-head.cc
> > @@ -102,3 +102,17 @@ optimize_successive_divisions_p (tree, tree)  {
> >return false;
> >  }
> > +
> > +/* Helper method for debug printing to reducing string parsing overhead.
> Keep
> > +   in sync with version in gimple-match-head.cc.  */
> > +
> > +static
> > +void dump_debug (bool simplify, int loc, const char *file, int
> > +lineno) {
> > +  if (simplify)
> > +fprintf (dump_file, "Applying pattern %s:%d, %s:%d\n", "match.pd", loc,
> > +file, lineno);
> > +  else
> > +

Re: Re: [PATCH] RISC-V: Fix bug reported by PR109535

2023-04-19 Thread juzhe.zh...@rivai.ai
Thanks Jeff.
Address Jeff's comment and resend fix patch:
https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616170.html 

This patch also added a testcase coming from Kito (Kito reduced google/highway 
testcase from over 10W lines codes into 100 lines codes!!!).



juzhe.zh...@rivai.ai
 
From: Jeff Law
Date: 2023-04-19 13:43
To: juzhe.zh...@rivai.ai; kito.cheng; Richard Biener
CC: gcc-patches; palmer
Subject: Re: [PATCH] RISC-V: Fix bug reported by PR109535
 
 
On 4/18/23 19:29, juzhe.zh...@rivai.ai wrote:
> I tried refers_to_regno_p
> It can not work for us since it just return true or false whether the 
> "rtx" has the regno.
Use refers_to_regno_p instead of the equality comparison for the REGNO. 
  So you're still going to have count_regno_occurrences, you're just 
changing the test it uses so that it works for modes which potentially
span multiple hard registers.
 
Note that you'll want to pass in AVL rather than REGNO (avl).  When you 
call refers_to_regno_p it'll look something like
 
tmp = REGNO (avl);
mode = GET_MODE (avl);
 
if (REG_P (recog_data.operand[i])
 && refers_to_regno_p (tmp, hard_regno_nregs (tmp, mode),
  recog_data.operand[i], NULL))
 
Or something like that.  I'm assuming AVL is a hard register at this 
point.  If it could be a pseudo the code will be slightly different.
 
I'm still not sure all this stuff is handling SUBREGs properly either. 
Though if it's only checked after reload, we should be OK as we should 
have simplified the subreg away.
 
 
 
Jeff
 
 
 
 


Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v

2023-04-19 Thread Richard Biener via Gcc-patches
On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni
 wrote:
>
> On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni
>  wrote:
> >
> > On Tue, 11 Apr 2023 at 14:17, Richard Biener  
> > wrote:
> > >
> > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches
> > >  wrote:
> > > >
> > > > Hi,
> > > > For the following test:
> > > >
> > > > svint32_t f(svint32_t v)
> > > > {
> > > >   return svrev_s32 (svrev_s32 (v));
> > > > }
> > > >
> > > > We generate 2 rev instructions instead of nop:
> > > > f:
> > > > rev z0.s, z0.s
> > > > rev z0.s, z0.s
> > > > ret
> > > >
> > > > The attached patch tries to fix that by trying to recognize the 
> > > > following
> > > > pattern in match.pd:
> > > > v1 = VEC_PERM_EXPR (v0, v0, mask)
> > > > v2 = VEC_PERM_EXPR (v1, v1, mask)
> > > > -->
> > > > v2 = v0
> > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... }
> > > >
> > > > Code-gen with patch:
> > > > f:
> > > > ret
> > > >
> > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in 
> > > > progress.
> > > > Does it look OK for stage-1 ?
> > >
> > > I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation 
> > > should
> > > handle two consecutive permutes with the is_combined_permutation_identity
> > > which might need tweaking for VLA vectors
> > Hi Richard,
> > Thanks for the suggestions. The attached patch modifies
> > is_combined_permutation_identity
> > to recognize the above pattern.
> > Does it look OK ?
> > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu.
> Hi,
> ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html

Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2
and amend the function comment accordingly, say,

  tem = VEC_PERM ;
  res = VEC_PERM ;

SAME_P specifies whether op0 and op1 compare equal.  */

+  if (def_stmt)
+gcc_checking_assert (is_gimple_assign (def_stmt)
+&& gimple_assign_rhs_code (def_stmt) == VEC_PERM_EXPR);
this is then unnecessary

   mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2);
+
+  /* For VLA masks, check for the following pattern:
+ v1 = VEC_PERM_EXPR (v0, v0, mask)
+ v2 = VEC_PERM_EXPR (v1, v1, mask)
+ -->
+ v2 = v0

you are not using 'mask' so please defer fold_ternary until after your
special-case.

+  if (operand_equal_p (mask1, mask2, 0)
+  && !VECTOR_CST_NELTS (mask1).is_constant ()
+  && def_stmt
+  && operand_equal_p (gimple_assign_rhs1 (def_stmt),
+ gimple_assign_rhs2 (def_stmt), 0))
+{
+  vec_perm_builder builder;
+  if (tree_to_vec_perm_builder (&builder, mask1))
+   {
+ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1));
+ vec_perm_indices sel (builder, 1, nelts);
+ if (sel.series_p (0, 1, nelts - 1, -1))
+   return 1;
+   }
+  return 0;

I'm defering to Richard whether this is the correct way to check for a vector
reversing mask (I wonder how constructing such mask is even possible)

Richard.

> Thanks,
> Prathamesh
> >
> > Thanks,
> > Prathamesh
> > >
> > > Richard.
> > >
> > > >
> > > > Thanks,
> > > > Prathamesh


Re: [PATCH 3/3]middle-end RFC - match.pd: automatically partition *-match.cc files.

2023-04-19 Thread Richard Biener via Gcc-patches
On Tue, Apr 18, 2023 at 12:23 PM Tamar Christina via Gcc-patches
 wrote:
>
> Hi All,
>
> Following on from Richi's RFC[1] this is another attempt to split up match.pd
> into multiple gimple-match and generic-match files.  This version is fully
> automated and requires no human intervention.
>
> First things first, some perf numbers.  The following shows the effect of the
> patch on my desktop doing parallel compilation of gimple-match:
>
> ++--++--+
> | splits | rel. improvement | splits | rel. improvement |
> ++--++--+
> |  1 | 0.00%| 33 | 91.03%   |
> |  2 | 71.77%   | 34 | 84.02%   |
> |  3 | 100.71%  | 35 | 83.42%   |
> |  4 | 143.08%  | 36 | 78.80%   |
> |  5 | 176.18%  | 37 | 74.06%   |
> |  6 | 174.40%  | 38 | 55.76%   |
> |  7 | 176.62%  | 39 | 66.90%   |
> |  8 | 168.35%  | 40 | 18.25%   |
> |  9 | 189.80%  | 41 | 16.55%   |
> | 10 | 171.77%  | 42 | 47.02%   |
> | 11 | 152.82%  | 43 | 15.29%   |
> | 12 | 112.20%  | 44 | 21.63%   |
> | 13 | 158.57%  | 45 | 41.53%   |
> | 14 | 158.57%  | 46 | 21.98%   |
> | 15 | 152.07%  | 47 | -42.74%  |
> | 16 | 151.70%  | 48 | -32.62%  |
> | 17 | 131.52%  | 49 | 11.81%   |
> | 18 | 133.11%  | 50 | 34.07%   |
> | 19 | 137.33%  | 51 | 2.71%|
> | 20 | 103.83%  | 52 | -22.23%  |
> | 21 | 132.47%  | 53 | 32.30%   |
> | 22 | 116.52%  | 54 | 21.45%   |
> | 23 | 112.73%  | 55 | 40.02%   |
> | 24 | 111.94%  | 56 | 42.83%   |
> | 25 | 112.73%  | 57 | -9.98%   |
> | 26 | 104.07%  | 58 | 18.01%   |
> | 27 | 113.27%  | 59 | -4.91%   |
> | 28 | 96.77%   | 60 | 22.94%   |
> | 29 | 93.42%   | 61 | -3.73%   |
> | 30 | 87.67%   | 62 | -27.43%  |
> | 31 | 89.54%   | 63 | -1.05%   |
> | 32 | 84.42%   | 64 | -5.44%   |
> ++--++--+
>
> As can be seen there seems to be a point of diminishing returns in doing 
> splits.
> This comes from the fact that these match files consume a sizeable amount of
> headers.  At a certain point the parsing overhead of the headers dominate and
> you start losing in gains.
>
> As such from this I've made the default 10 splits per file to allow for some
> room for growth in the future without needing changes to the split amount.
> Since 5-10 show roughly the same gains it means we can afford to double the
> file sizes before we need to up the split amount.  This can be controlled
> by the configure parameter --with-matchpd-partitions=.
>
> At 10 splits the sizes of the files are:
>
>  1.2M gimple-match-1.cc
>  490K gimple-match-2.cc
>  459K gimple-match-3.cc
>  462K gimple-match-4.cc
>  466K gimple-match-5.cc
>  690K gimple-match-6.cc
>  517K gimple-match-7.cc
>  693K gimple-match-8.cc
> 1011K gimple-match-9.cc
>  490K gimple-match-10.cc
>  210K gimple-match-auto.h
>
> The reason gimple-match-1.cc is so large is because it got allocated a very
> large function: gimple_simplify_NE_EXPR.
>
> Because of these sporadically large functions the allocation to a split 
> happens
> based on the amount of data already written to a split instead of just a 
> simple
> round robin allocation (though the patch supports that too.).   This means 
> that
> once gimple_simplify_NE_EXPR is allocated to gimple-match-1.cc nothing uses it
> again until the rest of the files catch up.
>
> To support this split a new header file *-match-auto.h is generated to allow
> the individual files to compile separately.
>
> To correctly link without giving duplicate symbol errors, all non-static
> functions in gimple-match-head.cc were moved to gimple-match-exports.cc since
> gimple-match-head.cc is included in the split file.  Doing this also shrinks
> the amount of data being compiled repeatedly and gimple-match-exports.cc is
> compiled in parallel.
>
> Lastly for the auto generated files I use pragmas to silence the unused
> predicate warnings instead of the previous Makefile way because I couldn't 
> find
> a way to set them without knowing the number of split files beforehand.
>
> Finally with this change, bootstrap time has dropped 8 minutes on AArch64.
>
> [1] https://gcc.gnu.org/legacy-ml/gcc-patches/2018-04/msg01125.html
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and n

[PATCH v2] RISC-V: Allow VMS{Compare} (V1, V1) shortcut optimization

2023-04-19 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch try to adjust the RISC-V Vector RTL for the
generic shortcut optimization for RVV integer compare.
It includes compare operator eq, ne, ltu, lt, leu, le,
gtu, gt, geu and ge.

Assume we have below test code.
vbool1_t test_shortcut_for_riscv_vmslt_case_0(vint8m8_t v1, size_t vl) {
  return __riscv_vmslt_vv_i8m8_b1(v1, v1, vl);
}

Before this patch:
vsetvli  zero,a2,e8,m8,ta,ma
vl8re8.v v24,0(a1)
vmslt.vv v8,v24,v24
vsetvli  a5,zero,e8,m8,ta,ma
vsm.vv8,0(a0)
ret

After this patch:
vsetvli zero,a2,e8,mf8,ta,ma
vmclr.m v24<- optimized to vmclr.m
vsetvli zero,a5,e8,mf8,ta,ma
vsm.v   v24,0(a0)
ret

We would like to make it happen in the generic way for the optimization.
The patch add one more operand(aka policy tail) to VMS{Compare} pattern,
to match the pred_mov (aka vmset/vmclr) pattern. We would like to
let the GCC to recognize (lt:(reg v) (reg v)) and lower it to
(const_vector:0), and then map into the pred_mov and VMS{Compare} pattern
for both the tail policy and avl operand.

The pred_mov may looks like
...(unspec:
 [(match_operand 1 ...)
  (match_operand 4 ...)
+ (match_operand 5 ...)   <- added policy tail
  (reg:SI VL)
  (reg:SI VTYPE)] ...)
   (match_operand 3 "vector_move_operand" ...)  <---+
   (match_operand 2 "vector_undef_operand" ...) |
|
The pred_cmp may looks like |
...(unspec: |
 [(match_operand  1 ...)|
  (match_operand  6 ...)|
  (match_operand  7 ...)|
  (match_operand  8 ...)  <- existing policy tail   |
  (reg:SI VL)   |
  (reg:SI VTYPE)] ...)   lower to
   (match_operator3 ...)  + |
 [(match_operator 4 ...)  +-+
  (match_operator 5 "vector_arith_operand"])] +
   (match_operand 2 "vector_undef_operand" ...)

However, there some cases in the test files cannot be optimized right
now. We will file separated patches to try to make it happen.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (emit_pred_op): Add api to add the
  policy tail or policy mask separately.
* config/riscv/riscv-vector-builtins-bases.cc: Change the
  VMS{Compare} default tail policy from false to true.
* config/riscv/vector.md: Add the policy tail operand for the
  pred_mov, pred_cmp.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/integer_compare_insn_shortcut.c: New test.

Signed-off-by: Pan Li 
Co-authored-by: Ju-Zhe Zhong 
---
 gcc/config/riscv/riscv-v.cc   |  15 +-
 .../riscv/riscv-vector-builtins-bases.cc  |   6 +-
 gcc/config/riscv/vector.md|  14 +-
 .../rvv/base/integer_compare_insn_shortcut.c  | 291 ++
 4 files changed, 319 insertions(+), 7 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/integer_compare_insn_shortcut.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 392f5d02e17..c3881920812 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -71,12 +71,23 @@ public:
 add_input_operand (RVV_VUNDEF (mode), mode);
   }
   void add_policy_operand (enum tail_policy vta, enum mask_policy vma)
+  {
+add_tail_policy_operand (vta);
+add_mask_policy_operand (vma);
+  }
+
+  void add_tail_policy_operand (enum tail_policy vta)
   {
 rtx tail_policy_rtx = gen_int_mode (vta, Pmode);
-rtx mask_policy_rtx = gen_int_mode (vma, Pmode);
 add_input_operand (tail_policy_rtx, Pmode);
+  }
+
+  void add_mask_policy_operand (enum mask_policy vma)
+  {
+rtx mask_policy_rtx = gen_int_mode (vma, Pmode);
 add_input_operand (mask_policy_rtx, Pmode);
   }
+
   void add_avl_type_operand (avl_type type)
   {
 add_input_operand (gen_int_mode (type, Pmode), Pmode);
@@ -206,6 +217,8 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, 
rtx len,
 
   if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
 e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy 
());
+  else
+e.add_tail_policy_operand (get_prefer_tail_policy ());
 
   if (vlmax_p)
 e.add_avl_type_operand (avl_type::VLMAX);
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 52467bbc961..7c6064a5a24 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -756,7 +756,7 @@ template
 class mask_logic : public function_base
 {
 public:
-  bool apply_tail_policy_p () const override { return false; }
+  bool apply_tail_policy_p () const override { return true; }
   bool apply_mask_policy_p () const override { retur

RE: Re: [PATCH] RISC-V: Allow VMS{Compare} (V1, V1) shortcut optimization

2023-04-19 Thread Li, Pan2 via Gcc-patches
Update the Patch v2 for more detail information for clarification. Please help 
to review continuously.

https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616175.html

Pan

-Original Message-
From: Li, Pan2 
Sent: Wednesday, April 19, 2023 6:33 PM
To: Kito Cheng ; juzhe.zh...@rivai.ai
Cc: gcc-patches ; Kito.cheng ; 
Wang, Yanzhang 
Subject: RE: Re: [PATCH] RISC-V: Allow VMS{Compare} (V1, V1) shortcut 
optimization

Sure thing.

For Changlog, I consider it was generated automatically in previous. LOL.

Pan

-Original Message-
From: Kito Cheng  
Sent: Wednesday, April 19, 2023 5:46 PM
To: juzhe.zh...@rivai.ai
Cc: Li, Pan2 ; gcc-patches ; 
Kito.cheng ; Wang, Yanzhang 
Subject: Re: Re: [PATCH] RISC-V: Allow VMS{Compare} (V1, V1) shortcut 
optimization

HI JuZhe:

Thanks for explaining!


Hi Pan:

I think that would be helpful if JuZhe's explaining that could be written into 
the commit log.


> gcc/ChangeLog:
>
>* config/riscv/riscv-v.cc (emit_pred_op):
>* config/riscv/riscv-vector-builtins-bases.cc:
>* config/riscv/vector.md:

And don't forgot write some thing in ChangeLog...:P


[PATCH] Remove odd code from gimple_can_merge_blocks_p

2023-04-19 Thread Richard Biener via Gcc-patches
The following removes a special case to not merge a block with
only a non-local label.  We have a restriction of non-local labels
to be the first statement (and label) in a block, but otherwise nothing,
if the last stmt of A is a non-local label then it will be still
the first statement of the combined A + B.  In particular we'd
happily merge when there's a stmt after that label.

The check originates from the tree-ssa merge.

Bootstrapped and tested on x86_64-unknown-linux-gnu with all
languages.

Will push later.

* tree-cfg.cc (gimple_can_merge_blocks_p): Remove condition
rejecting the merge when A contains only a non-local label.
---
 gcc/tree-cfg.cc | 6 --
 1 file changed, 6 deletions(-)

diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index a9fcc7fd050..ae53e15158a 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -1960,12 +1960,6 @@ gimple_can_merge_blocks_p (basic_block a, basic_block b)
   if (stmt && stmt_ends_bb_p (stmt))
 return false;
 
-  /* Do not allow a block with only a non-local label to be merged.  */
-  if (stmt)
-if (glabel *label_stmt = dyn_cast  (stmt))
-  if (DECL_NONLOCAL (gimple_label_label (label_stmt)))
-   return false;
-
   /* Examine the labels at the beginning of B.  */
   for (gimple_stmt_iterator gsi = gsi_start_bb (b); !gsi_end_p (gsi);
gsi_next (&gsi))
-- 
2.35.3


[PATCH 1/4] Avoid non-unified nodes on the topological sorting for PTA solving

2023-04-19 Thread Richard Biener via Gcc-patches
Since we do not update successor edges when merging nodes we have
to deal with this in the users.  The following avoids putting those
on the topo order vector.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-ssa-structalias.cc (topo_visit): Look at the real
destination of edges.
---
 gcc/tree-ssa-structalias.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index fa3a2e4e1f9..8976cc9c2f8 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -1632,8 +1632,9 @@ topo_visit (constraint_graph_t graph, struct topo_info 
*ti,
   if (graph->succs[n])
 EXECUTE_IF_SET_IN_BITMAP (graph->succs[n], 0, j, bi)
   {
-   if (!bitmap_bit_p (ti->visited, j))
- topo_visit (graph, ti, j);
+   unsigned k = find (j);
+   if (!bitmap_bit_p (ti->visited, k))
+ topo_visit (graph, ti, k);
   }
 
   ti->topo_order.safe_push (n);
-- 
2.35.3



[PATCH 2/4] Remove senseless store in do_sd_constraint

2023-04-19 Thread Richard Biener via Gcc-patches


Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-ssa-structalias.cc (do_sd_constraint): Do not write
to the LHS varinfo solution member.
---
 gcc/tree-ssa-structalias.cc | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index 8976cc9c2f8..89027ab573d 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -1724,10 +1724,7 @@ do_sd_constraint (constraint_graph_t graph, constraint_t 
c,
 done:
   /* If the LHS solution changed, mark the var as changed.  */
   if (flag)
-{
-  get_varinfo (lhs)->solution = sol;
-  bitmap_set_bit (changed, lhs);
-}
+bitmap_set_bit (changed, lhs);
 }
 
 /* Process a constraint C that represents *(x + off) = y using DELTA
-- 
2.35.3



[PATCH 3/4] Fix do_sd_constraint escape special casing

2023-04-19 Thread Richard Biener via Gcc-patches
The following fixes the escape special casing to test the proper
variable IDs.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-ssa-structalias.cc (do_sd_constraint): Fixup escape
special casing.
---
 gcc/tree-ssa-structalias.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index 89027ab573d..4f350bfbfc0 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -1706,7 +1706,7 @@ do_sd_constraint (constraint_graph_t graph, constraint_t 
c,
flag |= bitmap_ior_into (sol, get_varinfo (t)->solution);
  /* Merging the solution from ESCAPED needlessly increases
 the set.  Use ESCAPED as representative instead.  */
- else if (v->id == escaped_id)
+ else if (t == find (escaped_id))
flag |= bitmap_set_bit (sol, escaped_id);
  else if (v->may_have_pointers
   && add_graph_edge (graph, lhs, t))
-- 
2.35.3



[PATCH 4/4] Remove special-cased edges when solving copies

2023-04-19 Thread Richard Biener via Gcc-patches
The following makes sure to remove the copy edges we ignore or
need to special-case only once.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-ssa-structalias.cc (solve_graph): Remove self-copy
edges, remove edges from escaped after special-casing them.
---
 gcc/tree-ssa-structalias.cc | 25 ++---
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index 4f350bfbfc0..39c342fae41 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -2873,19 +2873,22 @@ solve_graph (constraint_graph_t graph)
}
  /* Don't try to propagate to ourselves.  */
  if (to == i)
-   continue;
-
- bitmap tmp = get_varinfo (to)->solution;
- bool flag = false;
-
- /* If we propagate from ESCAPED use ESCAPED as
-placeholder.  */
+   {
+ to_remove = j;
+ continue;
+   }
+ /* Early node unification can lead to edges from
+escaped - remove them.  */
  if (i == eff_escaped_id)
-   flag = bitmap_set_bit (tmp, escaped_id);
- else
-   flag = bitmap_ior_into (tmp, pts);
+   {
+ to_remove = j;
+ if (bitmap_set_bit (get_varinfo (to)->solution,
+ escaped_id))
+   bitmap_set_bit (changed, to);
+ continue;
+   }
 
- if (flag)
+ if (bitmap_ior_into (get_varinfo (to)->solution, pts))
bitmap_set_bit (changed, to);
}
  if (to_remove != ~0U)
-- 
2.35.3


Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v

2023-04-19 Thread Prathamesh Kulkarni via Gcc-patches
On Wed, 19 Apr 2023 at 16:17, Richard Biener  wrote:
>
> On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni
>  wrote:
> >
> > On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni
> >  wrote:
> > >
> > > On Tue, 11 Apr 2023 at 14:17, Richard Biener  
> > > wrote:
> > > >
> > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches
> > > >  wrote:
> > > > >
> > > > > Hi,
> > > > > For the following test:
> > > > >
> > > > > svint32_t f(svint32_t v)
> > > > > {
> > > > >   return svrev_s32 (svrev_s32 (v));
> > > > > }
> > > > >
> > > > > We generate 2 rev instructions instead of nop:
> > > > > f:
> > > > > rev z0.s, z0.s
> > > > > rev z0.s, z0.s
> > > > > ret
> > > > >
> > > > > The attached patch tries to fix that by trying to recognize the 
> > > > > following
> > > > > pattern in match.pd:
> > > > > v1 = VEC_PERM_EXPR (v0, v0, mask)
> > > > > v2 = VEC_PERM_EXPR (v1, v1, mask)
> > > > > -->
> > > > > v2 = v0
> > > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... }
> > > > >
> > > > > Code-gen with patch:
> > > > > f:
> > > > > ret
> > > > >
> > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in 
> > > > > progress.
> > > > > Does it look OK for stage-1 ?
> > > >
> > > > I didn't look at the patch but 
> > > > tree-ssa-forwprop.cc:simplify_permutation should
> > > > handle two consecutive permutes with the 
> > > > is_combined_permutation_identity
> > > > which might need tweaking for VLA vectors
> > > Hi Richard,
> > > Thanks for the suggestions. The attached patch modifies
> > > is_combined_permutation_identity
> > > to recognize the above pattern.
> > > Does it look OK ?
> > > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu.
> > Hi,
> > ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html
>
> Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2
> and amend the function comment accordingly, say,
>
>   tem = VEC_PERM ;
>   res = VEC_PERM ;
>
> SAME_P specifies whether op0 and op1 compare equal.  */
>
> +  if (def_stmt)
> +gcc_checking_assert (is_gimple_assign (def_stmt)
> +&& gimple_assign_rhs_code (def_stmt) == 
> VEC_PERM_EXPR);
> this is then unnecessary
>
>mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, 
> mask2);
> +
> +  /* For VLA masks, check for the following pattern:
> + v1 = VEC_PERM_EXPR (v0, v0, mask)
> + v2 = VEC_PERM_EXPR (v1, v1, mask)
> + -->
> + v2 = v0
>
> you are not using 'mask' so please defer fold_ternary until after your
> special-case.
>
> +  if (operand_equal_p (mask1, mask2, 0)
> +  && !VECTOR_CST_NELTS (mask1).is_constant ()
> +  && def_stmt
> +  && operand_equal_p (gimple_assign_rhs1 (def_stmt),
> + gimple_assign_rhs2 (def_stmt), 0))
> +{
> +  vec_perm_builder builder;
> +  if (tree_to_vec_perm_builder (&builder, mask1))
> +   {
> + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1));
> + vec_perm_indices sel (builder, 1, nelts);
> + if (sel.series_p (0, 1, nelts - 1, -1))
> +   return 1;
> +   }
> +  return 0;
>
> I'm defering to Richard whether this is the correct way to check for a vector
> reversing mask (I wonder how constructing such mask is even possible)
Hi Richard,
Thanks for the suggestions, I have updated the patch accordingly.

The following hunk from svrev_impl::fold() constructs mask in reverse:
/* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }.  */
poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
vec_perm_builder builder (nelts, 1, 3);
for (int i = 0; i < 3; ++i)
  builder.quick_push (nelts - i - 1);
return fold_permute (f, builder);

To see if mask chooses elements in reverse, I borrowed it from function comment
for series_p in vec-perm-indices.cc:
/* Return true if index OUT_BASE + I * OUT_STEP selects input
   element IN_BASE + I * IN_STEP.  For example, the call to test
   whether a permute reverses a vector of N elements would be:

 series_p (0, 1, N - 1, -1)

   which would return true for { N - 1, N - 2, N - 3, ... }.  */

Thanks,
Prathamesh
>
> Richard.
>
> > Thanks,
> > Prathamesh
> > >
> > > Thanks,
> > > Prathamesh
> > > >
> > > > Richard.
> > > >
> > > > >
> > > > > Thanks,
> > > > > Prathamesh
gcc/ChangeLog:
* tree-ssa-forwprop.cc (is_combined_permutation_identity):
New parameter same_p.
Try to simplify two successive VEC_PERM_EXPRs with single operand
and same mask, where mask chooses elements in reverse order.

gcc/testesuite/ChangeLog:
* gcc.target/aarch64/sve/acle/general/rev-1.c: New test.

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c
new file mode 100644
index 000..e57ee67d716
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c
@@ -0,0 +1,12 

Re: [PATCH] RISC-V: Support chunk 128

2023-04-19 Thread Kito Cheng via Gcc-patches
Could you add more description? maybe like this:


RISC-V has provide different VLEN configuration by different ISA
extension like `zve32x`, `zve64x` and `v`
zve32x just guarantee the minimal VLEN is 32 bits,
zve64x guarantee the minimal VLEN is 64 bits,
and v guarantee the minimal VLEN is 128 bits,

Current status (without this patch):

Zve32x: Mode for one vector register mode is VNx1SImode and VNx1DImode
is invalid mode
- one vector register could hold 1 + 1x SImode where x is 0~n, so it
might hold just one SI

Zve64x: Mode for one vector register mode is VNx1DImode or VNx2SImode
- one vector register could hold 1 + 1x DImode where x is 0~n, so it
might hold just one DI.
- one vector register could hold 2 + 2x SImode where x is 0~n, so it
might hold just two SI.

However `v` extension guarantees the minimal VLEN is 128 bits.

We introduce another type/mode mapping for this configure:

v: Mode for one vector register mode is VNx2DImode or VNx4SImode
- one vector register could hold 2 + 2x DImode where x is 0~n, so it
will hold at least two DI
- one vector register could hold 4 + 4x SImode where x is 0~n, so it
will hold at least four DI

This patch model the mode more precisely for the RVV, and help some
middle-end optimization that assume number of element must be a
multiple of two.


Re: [PATCH] Transform more gmp/mpfr uses to use RAII

2023-04-19 Thread Jonathan Wakely via Gcc-patches
On Wed, 19 Apr 2023 at 10:36, Richard Biener wrote:
>
> The following picks up the coccinelle generated patch from Bernhard,
> leaving out the fortran frontend parts and fixing up the rest.
> In particular both gmp.h and mpfr.h contain macros like
>   #define mpfr_inf_p(_x)  ((_x)->_mpfr_exp == __MPFR_EXP_INF)
> for which I add operator-> overloads to the auto_* classes.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> CCed Jonathan just to make sure I'm not doing anything stupid C++ wise
> here.

Looks OK to me.



Re: [PATCH] Fix pointer sharing in Value_Range constructor.

2023-04-19 Thread Aldy Hernandez via Gcc-patches



On 4/18/23 18:40, Aldy Hernandez wrote:

I will push this when a final round of testing finishes on x86-64 Linux.

gcc/ChangeLog:

* value-range.h (Value_Range::Value_Range): Avoid pointer sharing.
---
  gcc/value-range.h | 13 -
  1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/value-range.h b/gcc/value-range.h
index 0eeea79b322..f97596cdd14 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -583,7 +583,18 @@ Value_Range::Value_Range (tree min, tree max, 
value_range_kind kind)
  inline
  Value_Range::Value_Range (const Value_Range &r)
  {
-  m_vrange = r.m_vrange;
+  if (r.m_vrange == &r.m_irange)
+{
+  m_irange = r.m_irange;
+  m_vrange = &m_irange;
+}
+  else if (r.m_vrange == &r.m_frange)
+{
+  m_frange = r.m_frange;
+  m_vrange = &m_frange;
+}
+  else
+m_vrange = &m_unsupported;
  }
  
  // Initialize object so it is possible to store temporaries of TYPE


Upon further thought I realized operator= will do all the right things, 
and makes the code easier to read.


Re-tested and pushed.From fc03caa0c94c9c11e0c1b1f7e7eba64233dbcfec Mon Sep 17 00:00:00 2001
From: Aldy Hernandez 
Date: Mon, 6 Mar 2023 13:53:15 +0100
Subject: [PATCH] Fix pointer sharing in Value_Range constructor.

gcc/ChangeLog:

	* value-range.h (Value_Range::Value_Range): Avoid pointer sharing.
---
 gcc/value-range.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/value-range.h b/gcc/value-range.h
index 0eeea79b322..33ef3b5b8d8 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -583,7 +583,7 @@ Value_Range::Value_Range (tree min, tree max, value_range_kind kind)
 inline
 Value_Range::Value_Range (const Value_Range &r)
 {
-  m_vrange = r.m_vrange;
+  *this = *r.m_vrange;
 }
 
 // Initialize object so it is possible to store temporaries of TYPE
-- 
2.40.0



[PATCH 04/10] RISC-V: Support chunk 128

2023-04-19 Thread juzhe . zhong
From: Juzhe-Zhong 

RISC-V has provide different VLEN configuration by different ISA
extension like `zve32x`, `zve64x` and `v`
zve32x just guarantee the minimal VLEN is 32 bits,
zve64x guarantee the minimal VLEN is 64 bits,
and v guarantee the minimal VLEN is 128 bits,

Current status (without this patch):

Zve32x: Mode for one vector register mode is VNx1SImode and VNx1DImode
is invalid mode
- one vector register could hold 1 + 1x SImode where x is 0~n, so it
might hold just one SI

Zve64x: Mode for one vector register mode is VNx1DImode or VNx2SImode
- one vector register could hold 1 + 1x DImode where x is 0~n, so it
might hold just one DI.
- one vector register could hold 2 + 2x SImode where x is 0~n, so it
might hold just two SI.

However `v` extension guarantees the minimal VLEN is 128 bits.

We introduce another type/mode mapping for this configure:

v: Mode for one vector register mode is VNx2DImode or VNx4SImode
- one vector register could hold 2 + 2x DImode where x is 0~n, so it
will hold at least two DI
- one vector register could hold 4 + 4x SImode where x is 0~n, so it
will hold at least four DI

This patch model the mode more precisely for the RVV, and help some
middle-end optimization that assume number of element must be a
multiple of two.

gcc/ChangeLog:

* config/riscv/riscv-modes.def (FLOAT_MODE): Add chunk 128 modes.
(VECTOR_BOOL_MODE): Ditto.
(ADJUST_NUNITS): Ditto.
(ADJUST_ALIGNMENT): Ditto.
(ADJUST_BYTESIZE): Ditto.
(ADJUST_PRECISION): Ditto.
(RVV_MODES): Ditto.
(VECTOR_MODE_WITH_PREFIX): Ditto.
* config/riscv/riscv-v.cc (ENTRY): Ditto.
(get_vlmul): Ditto.
(get_ratio): Ditto.
* config/riscv/riscv-vector-builtins.cc (DEF_RVV_TYPE): Ditto.
* config/riscv/riscv-vector-builtins.def (DEF_RVV_TYPE): Ditto.
(vbool64_t): Ditto.
(vbool32_t): Ditto.
(vbool16_t): Ditto.
(vbool8_t): Ditto.
(vbool4_t): Ditto.
(vbool2_t): Ditto.
(vbool1_t): Ditto.
(vint8mf8_t): Ditto.
(vuint8mf8_t): Ditto.
(vint8mf4_t): Ditto.
(vuint8mf4_t): Ditto.
(vint8mf2_t): Ditto.
(vuint8mf2_t): Ditto.
(vint8m1_t): Ditto.
(vuint8m1_t): Ditto.
(vint8m2_t): Ditto.
(vuint8m2_t): Ditto.
(vint8m4_t): Ditto.
(vuint8m4_t): Ditto.
(vint8m8_t): Ditto.
(vuint8m8_t): Ditto.
(vint16mf4_t): Ditto.
(vuint16mf4_t): Ditto.
(vint16mf2_t): Ditto.
(vuint16mf2_t): Ditto.
(vint16m1_t): Ditto.
(vuint16m1_t): Ditto.
(vint16m2_t): Ditto.
(vuint16m2_t): Ditto.
(vint16m4_t): Ditto.
(vuint16m4_t): Ditto.
(vint16m8_t): Ditto.
(vuint16m8_t): Ditto.
(vint32mf2_t): Ditto.
(vuint32mf2_t): Ditto.
(vint32m1_t): Ditto.
(vuint32m1_t): Ditto.
(vint32m2_t): Ditto.
(vuint32m2_t): Ditto.
(vint32m4_t): Ditto.
(vuint32m4_t): Ditto.
(vint32m8_t): Ditto.
(vuint32m8_t): Ditto.
(vint64m1_t): Ditto.
(vuint64m1_t): Ditto.
(vint64m2_t): Ditto.
(vuint64m2_t): Ditto.
(vint64m4_t): Ditto.
(vuint64m4_t): Ditto.
(vint64m8_t): Ditto.
(vuint64m8_t): Ditto.
(vfloat32mf2_t): Ditto.
(vfloat32m1_t): Ditto.
(vfloat32m2_t): Ditto.
(vfloat32m4_t): Ditto.
(vfloat32m8_t): Ditto.
(vfloat64m1_t): Ditto.
(vfloat64m2_t): Ditto.
(vfloat64m4_t): Ditto.
(vfloat64m8_t): Ditto.
* config/riscv/riscv-vector-switch.def (ENTRY): Ditto.
* config/riscv/riscv.cc (riscv_legitimize_poly_move): Ditto.
(riscv_convert_vector_bits): Ditto.
* config/riscv/riscv.md: Ditto.
* config/riscv/vector-iterators.md: Ditto.
* config/riscv/vector.md 
(@pred_indexed_store): Ditto.
(@pred_indexed_store): Ditto.
(@pred_indexed_store): Ditto.
(@pred_indexed_store): Ditto.
(@pred_indexed_store): Ditto.
(@pred_reduc_): Ditto.
(@pred_widen_reduc_plus): Ditto.
(@pred_reduc_plus): Ditto.
(@pred_widen_reduc_plus): Ditto.

---
 gcc/config/riscv/riscv-modes.def  |  89 +--
 gcc/config/riscv/riscv-v.cc   |  17 +-
 gcc/config/riscv/riscv-vector-builtins.cc |  11 +-
 gcc/config/riscv/riscv-vector-builtins.def| 172 +++---
 gcc/config/riscv/riscv-vector-switch.def  | 105 ++--
 gcc/config/riscv/riscv.cc |  12 +-
 gcc/config/riscv/riscv.md |  14 +-
 gcc/config/riscv/vector-iterators.md  | 571 +++---
 gcc/config/riscv/vector.md| 233 +--
 .../gcc.target/riscv/rvv/base/pr108185-4.c|   2 +-
 .../gcc.target/riscv/rvv/base/spill-1.c   |   2 +-
 .../gcc.target/riscv/rvv/base/spill-11.c  |   2 +-
 .../gcc.target/riscv/rvv/base/spill-2

Re: [match.pd] [SVE] Add pattern to transform svrev(svrev(v)) --> v

2023-04-19 Thread Richard Biener via Gcc-patches
On Wed, Apr 19, 2023 at 2:20 PM Prathamesh Kulkarni
 wrote:
>
> On Wed, 19 Apr 2023 at 16:17, Richard Biener  
> wrote:
> >
> > On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni
> >  wrote:
> > >
> > > On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni
> > >  wrote:
> > > >
> > > > On Tue, 11 Apr 2023 at 14:17, Richard Biener 
> > > >  wrote:
> > > > >
> > > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches
> > > > >  wrote:
> > > > > >
> > > > > > Hi,
> > > > > > For the following test:
> > > > > >
> > > > > > svint32_t f(svint32_t v)
> > > > > > {
> > > > > >   return svrev_s32 (svrev_s32 (v));
> > > > > > }
> > > > > >
> > > > > > We generate 2 rev instructions instead of nop:
> > > > > > f:
> > > > > > rev z0.s, z0.s
> > > > > > rev z0.s, z0.s
> > > > > > ret
> > > > > >
> > > > > > The attached patch tries to fix that by trying to recognize the 
> > > > > > following
> > > > > > pattern in match.pd:
> > > > > > v1 = VEC_PERM_EXPR (v0, v0, mask)
> > > > > > v2 = VEC_PERM_EXPR (v1, v1, mask)
> > > > > > -->
> > > > > > v2 = v0
> > > > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... }
> > > > > >
> > > > > > Code-gen with patch:
> > > > > > f:
> > > > > > ret
> > > > > >
> > > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in 
> > > > > > progress.
> > > > > > Does it look OK for stage-1 ?
> > > > >
> > > > > I didn't look at the patch but 
> > > > > tree-ssa-forwprop.cc:simplify_permutation should
> > > > > handle two consecutive permutes with the 
> > > > > is_combined_permutation_identity
> > > > > which might need tweaking for VLA vectors
> > > > Hi Richard,
> > > > Thanks for the suggestions. The attached patch modifies
> > > > is_combined_permutation_identity
> > > > to recognize the above pattern.
> > > > Does it look OK ?
> > > > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu.
> > > Hi,
> > > ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html
> >
> > Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2
> > and amend the function comment accordingly, say,
> >
> >   tem = VEC_PERM ;
> >   res = VEC_PERM ;
> >
> > SAME_P specifies whether op0 and op1 compare equal.  */
> >
> > +  if (def_stmt)
> > +gcc_checking_assert (is_gimple_assign (def_stmt)
> > +&& gimple_assign_rhs_code (def_stmt) == 
> > VEC_PERM_EXPR);
> > this is then unnecessary
> >
> >mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, 
> > mask2);
> > +
> > +  /* For VLA masks, check for the following pattern:
> > + v1 = VEC_PERM_EXPR (v0, v0, mask)
> > + v2 = VEC_PERM_EXPR (v1, v1, mask)
> > + -->
> > + v2 = v0
> >
> > you are not using 'mask' so please defer fold_ternary until after your
> > special-case.
> >
> > +  if (operand_equal_p (mask1, mask2, 0)
> > +  && !VECTOR_CST_NELTS (mask1).is_constant ()
> > +  && def_stmt
> > +  && operand_equal_p (gimple_assign_rhs1 (def_stmt),
> > + gimple_assign_rhs2 (def_stmt), 0))
> > +{
> > +  vec_perm_builder builder;
> > +  if (tree_to_vec_perm_builder (&builder, mask1))
> > +   {
> > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1));
> > + vec_perm_indices sel (builder, 1, nelts);
> > + if (sel.series_p (0, 1, nelts - 1, -1))
> > +   return 1;
> > +   }
> > +  return 0;
> >
> > I'm defering to Richard whether this is the correct way to check for a 
> > vector
> > reversing mask (I wonder how constructing such mask is even possible)
> Hi Richard,
> Thanks for the suggestions, I have updated the patch accordingly.
>
> The following hunk from svrev_impl::fold() constructs mask in reverse:
> /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }.  */
> poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
> vec_perm_builder builder (nelts, 1, 3);
> for (int i = 0; i < 3; ++i)
>   builder.quick_push (nelts - i - 1);
> return fold_permute (f, builder);
>
> To see if mask chooses elements in reverse, I borrowed it from function 
> comment
> for series_p in vec-perm-indices.cc:
> /* Return true if index OUT_BASE + I * OUT_STEP selects input
>element IN_BASE + I * IN_STEP.  For example, the call to test
>whether a permute reverses a vector of N elements would be:
>
>  series_p (0, 1, N - 1, -1)
>
>which would return true for { N - 1, N - 2, N - 3, ... }.  */

Looks good from my side now, but as said I defer to Richard for the check.

Richard.

> Thanks,
> Prathamesh
> >
> > Richard.
> >
> > > Thanks,
> > > Prathamesh
> > > >
> > > > Thanks,
> > > > Prathamesh
> > > > >
> > > > > Richard.
> > > > >
> > > > > >
> > > > > > Thanks,
> > > > > > Prathamesh


[PATCH] RISC-V: Support 128 bit vector chunk

2023-04-19 Thread juzhe . zhong
From: Juzhe-Zhong 

RISC-V has provide different VLEN configuration by different ISA
extension like `zve32x`, `zve64x` and `v`
zve32x just guarantee the minimal VLEN is 32 bits,
zve64x guarantee the minimal VLEN is 64 bits,
and v guarantee the minimal VLEN is 128 bits,

Current status (without this patch):

Zve32x: Mode for one vector register mode is VNx1SImode and VNx1DImode
is invalid mode
 - one vector register could hold 1 + 1x SImode where x is 0~n, so it
might hold just one SI

Zve64x: Mode for one vector register mode is VNx1DImode or VNx2SImode
 - one vector register could hold 1 + 1x DImode where x is 0~n, so it
might hold just one DI.
 - one vector register could hold 2 + 2x SImode where x is 0~n, so it
might hold just two SI.

However `v` extension guarantees the minimal VLEN is 128 bits.

We introduce another type/mode mapping for this configure:

v: Mode for one vector register mode is VNx2DImode or VNx4SImode
 - one vector register could hold 2 + 2x DImode where x is 0~n, so it
will hold at least two DI
 - one vector register could hold 4 + 4x SImode where x is 0~n, so it
will hold at least four DI

This patch model the mode more precisely for the RVV, and help some
middle-end optimization that assume number of element must be a
multiple of two.

gcc/ChangeLog:

* config/riscv/riscv-modes.def (FLOAT_MODE): Add chunk 128 support.
(VECTOR_BOOL_MODE): Ditto.
(ADJUST_NUNITS): Ditto.
(ADJUST_ALIGNMENT): Ditto.
(ADJUST_BYTESIZE): Ditto.
(ADJUST_PRECISION): Ditto.
(RVV_MODES): Ditto.
(VECTOR_MODE_WITH_PREFIX): Ditto.
* config/riscv/riscv-v.cc (ENTRY): Ditto.
(get_vlmul): Ditto.
(get_ratio): Ditto.
* config/riscv/riscv-vector-builtins.cc (DEF_RVV_TYPE): Ditto.
* config/riscv/riscv-vector-builtins.def (DEF_RVV_TYPE): Ditto.
(vbool64_t): Ditto.
(vbool32_t): Ditto.
(vbool16_t): Ditto.
(vbool8_t): Ditto.
(vbool4_t): Ditto.
(vbool2_t): Ditto.
(vbool1_t): Ditto.
(vint8mf8_t): Ditto.
(vuint8mf8_t): Ditto.
(vint8mf4_t): Ditto.
(vuint8mf4_t): Ditto.
(vint8mf2_t): Ditto.
(vuint8mf2_t): Ditto.
(vint8m1_t): Ditto.
(vuint8m1_t): Ditto.
(vint8m2_t): Ditto.
(vuint8m2_t): Ditto.
(vint8m4_t): Ditto.
(vuint8m4_t): Ditto.
(vint8m8_t): Ditto.
(vuint8m8_t): Ditto.
(vint16mf4_t): Ditto.
(vuint16mf4_t): Ditto.
(vint16mf2_t): Ditto.
(vuint16mf2_t): Ditto.
(vint16m1_t): Ditto.
(vuint16m1_t): Ditto.
(vint16m2_t): Ditto.
(vuint16m2_t): Ditto.
(vint16m4_t): Ditto.
(vuint16m4_t): Ditto.
(vint16m8_t): Ditto.
(vuint16m8_t): Ditto.
(vint32mf2_t): Ditto.
(vuint32mf2_t): Ditto.
(vint32m1_t): Ditto.
(vuint32m1_t): Ditto.
(vint32m2_t): Ditto.
(vuint32m2_t): Ditto.
(vint32m4_t): Ditto.
(vuint32m4_t): Ditto.
(vint32m8_t): Ditto.
(vuint32m8_t): Ditto.
(vint64m1_t): Ditto.
(vuint64m1_t): Ditto.
(vint64m2_t): Ditto.
(vuint64m2_t): Ditto.
(vint64m4_t): Ditto.
(vuint64m4_t): Ditto.
(vint64m8_t): Ditto.
(vuint64m8_t): Ditto.
(vfloat32mf2_t): Ditto.
(vfloat32m1_t): Ditto.
(vfloat32m2_t): Ditto.
(vfloat32m4_t): Ditto.
(vfloat32m8_t): Ditto.
(vfloat64m1_t): Ditto.
(vfloat64m2_t): Ditto.
(vfloat64m4_t): Ditto.
(vfloat64m8_t): Ditto.
* config/riscv/riscv-vector-switch.def (ENTRY): Ditto.
* config/riscv/riscv.cc (riscv_legitimize_poly_move): Ditto.
(riscv_convert_vector_bits): Ditto.
* config/riscv/riscv.md:
* config/riscv/vector-iterators.md:
* config/riscv/vector.md 
(@pred_indexed_store): Ditto.
(@pred_indexed_store): Ditto.
(@pred_indexed_store): Ditto.
(@pred_indexed_store): Ditto.
(@pred_indexed_store): Ditto.
(@pred_reduc_): Ditto.
(@pred_widen_reduc_plus): Ditto.
(@pred_reduc_plus): Ditto.
(@pred_widen_reduc_plus): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr108185-4.c: Adapt testcase.
* gcc.target/riscv/rvv/base/spill-1.c: Ditto.
* gcc.target/riscv/rvv/base/spill-11.c: Ditto.
* gcc.target/riscv/rvv/base/spill-2.c: Ditto.
* gcc.target/riscv/rvv/base/spill-3.c: Ditto.
* gcc.target/riscv/rvv/base/spill-5.c: Ditto.
* gcc.target/riscv/rvv/base/spill-9.c: Ditto.
---
 gcc/config/riscv/riscv-modes.def  |  89 +--
 gcc/config/riscv/riscv-v.cc   |  17 +-
 gcc/config/riscv/riscv-vector-builtins.cc |  11 +-
 gcc/config/riscv/riscv-vector-builtins.def| 172 +++---
 gcc/config/riscv/riscv-vector-switch.def  | 105 ++--
 gcc/config/riscv/riscv.cc   

Re: [PATCH v3] libgfortran: Replace mutex with rwlock

2023-04-19 Thread Bernhard Reutner-Fischer via Gcc-patches
On 19 April 2023 09:06:28 CEST, Lipeng Zhu via Fortran  
wrote:

>+#ifdef __GTHREAD_RWLOCK_INIT
>+#define RWLOCK_DEBUG_ADD(rwlock) do { \
>+aio_rwlock_debug *n;  \
>+n = malloc (sizeof(aio_rwlock_debug));\

My malloc can fail.

>+n->prev = TAIL_RWLOCK_DEBUG_QUEUE;\
>+if (n->prev)  \
>+  n->prev->next = n;  \
>+n->next = NULL;   \
>+n->line = __LINE__;   \
>+n->func = __FUNCTION__;   \
>+n->rw = rwlock;   \
>+if (!aio_rwlock_debug_head) { \
>+  aio_rwlock_debug_head = n;  \
>+} \
>+  } while (0)
>+



[PATCH] RISC-V: Add tuple type vget/vset intrinsics

2023-04-19 Thread juzhe . zhong
From: Juzhe-Zhong 

gcc/ChangeLog:

* config/riscv/genrvv-type-indexer.cc (valid_type): Adapt for tuple 
type support.
(inttype): Ditto.
(floattype): Ditto.
(main): Ditto.
* config/riscv/riscv-vector-builtins-bases.cc: Ditto.
* config/riscv/riscv-vector-builtins-functions.def (vset): Add tuple 
type vset.
(vget): Add tuple type vget.
* config/riscv/riscv-vector-builtins-types.def (DEF_RVV_TUPLE_OPS): New 
macro.
(vint8mf8x2_t): Ditto.
(vuint8mf8x2_t): Ditto.
(vint8mf8x3_t): Ditto.
(vuint8mf8x3_t): Ditto.
(vint8mf8x4_t): Ditto.
(vuint8mf8x4_t): Ditto.
(vint8mf8x5_t): Ditto.
(vuint8mf8x5_t): Ditto.
(vint8mf8x6_t): Ditto.
(vuint8mf8x6_t): Ditto.
(vint8mf8x7_t): Ditto.
(vuint8mf8x7_t): Ditto.
(vint8mf8x8_t): Ditto.
(vuint8mf8x8_t): Ditto.
(vint8mf4x2_t): Ditto.
(vuint8mf4x2_t): Ditto.
(vint8mf4x3_t): Ditto.
(vuint8mf4x3_t): Ditto.
(vint8mf4x4_t): Ditto.
(vuint8mf4x4_t): Ditto.
(vint8mf4x5_t): Ditto.
(vuint8mf4x5_t): Ditto.
(vint8mf4x6_t): Ditto.
(vuint8mf4x6_t): Ditto.
(vint8mf4x7_t): Ditto.
(vuint8mf4x7_t): Ditto.
(vint8mf4x8_t): Ditto.
(vuint8mf4x8_t): Ditto.
(vint8mf2x2_t): Ditto.
(vuint8mf2x2_t): Ditto.
(vint8mf2x3_t): Ditto.
(vuint8mf2x3_t): Ditto.
(vint8mf2x4_t): Ditto.
(vuint8mf2x4_t): Ditto.
(vint8mf2x5_t): Ditto.
(vuint8mf2x5_t): Ditto.
(vint8mf2x6_t): Ditto.
(vuint8mf2x6_t): Ditto.
(vint8mf2x7_t): Ditto.
(vuint8mf2x7_t): Ditto.
(vint8mf2x8_t): Ditto.
(vuint8mf2x8_t): Ditto.
(vint8m1x2_t): Ditto.
(vuint8m1x2_t): Ditto.
(vint8m1x3_t): Ditto.
(vuint8m1x3_t): Ditto.
(vint8m1x4_t): Ditto.
(vuint8m1x4_t): Ditto.
(vint8m1x5_t): Ditto.
(vuint8m1x5_t): Ditto.
(vint8m1x6_t): Ditto.
(vuint8m1x6_t): Ditto.
(vint8m1x7_t): Ditto.
(vuint8m1x7_t): Ditto.
(vint8m1x8_t): Ditto.
(vuint8m1x8_t): Ditto.
(vint8m2x2_t): Ditto.
(vuint8m2x2_t): Ditto.
(vint8m2x3_t): Ditto.
(vuint8m2x3_t): Ditto.
(vint8m2x4_t): Ditto.
(vuint8m2x4_t): Ditto.
(vint8m4x2_t): Ditto.
(vuint8m4x2_t): Ditto.
(vint16mf4x2_t): Ditto.
(vuint16mf4x2_t): Ditto.
(vint16mf4x3_t): Ditto.
(vuint16mf4x3_t): Ditto.
(vint16mf4x4_t): Ditto.
(vuint16mf4x4_t): Ditto.
(vint16mf4x5_t): Ditto.
(vuint16mf4x5_t): Ditto.
(vint16mf4x6_t): Ditto.
(vuint16mf4x6_t): Ditto.
(vint16mf4x7_t): Ditto.
(vuint16mf4x7_t): Ditto.
(vint16mf4x8_t): Ditto.
(vuint16mf4x8_t): Ditto.
(vint16mf2x2_t): Ditto.
(vuint16mf2x2_t): Ditto.
(vint16mf2x3_t): Ditto.
(vuint16mf2x3_t): Ditto.
(vint16mf2x4_t): Ditto.
(vuint16mf2x4_t): Ditto.
(vint16mf2x5_t): Ditto.
(vuint16mf2x5_t): Ditto.
(vint16mf2x6_t): Ditto.
(vuint16mf2x6_t): Ditto.
(vint16mf2x7_t): Ditto.
(vuint16mf2x7_t): Ditto.
(vint16mf2x8_t): Ditto.
(vuint16mf2x8_t): Ditto.
(vint16m1x2_t): Ditto.
(vuint16m1x2_t): Ditto.
(vint16m1x3_t): Ditto.
(vuint16m1x3_t): Ditto.
(vint16m1x4_t): Ditto.
(vuint16m1x4_t): Ditto.
(vint16m1x5_t): Ditto.
(vuint16m1x5_t): Ditto.
(vint16m1x6_t): Ditto.
(vuint16m1x6_t): Ditto.
(vint16m1x7_t): Ditto.
(vuint16m1x7_t): Ditto.
(vint16m1x8_t): Ditto.
(vuint16m1x8_t): Ditto.
(vint16m2x2_t): Ditto.
(vuint16m2x2_t): Ditto.
(vint16m2x3_t): Ditto.
(vuint16m2x3_t): Ditto.
(vint16m2x4_t): Ditto.
(vuint16m2x4_t): Ditto.
(vint16m4x2_t): Ditto.
(vuint16m4x2_t): Ditto.
(vint32mf2x2_t): Ditto.
(vuint32mf2x2_t): Ditto.
(vint32mf2x3_t): Ditto.
(vuint32mf2x3_t): Ditto.
(vint32mf2x4_t): Ditto.
(vuint32mf2x4_t): Ditto.
(vint32mf2x5_t): Ditto.
(vuint32mf2x5_t): Ditto.
(vint32mf2x6_t): Ditto.
(vuint32mf2x6_t): Ditto.
(vint32mf2x7_t): Ditto.
(vuint32mf2x7_t): Ditto.
(vint32mf2x8_t): Ditto.
(vuint32mf2x8_t): Ditto.
(vint32m1x2_t): Ditto.
(vuint32m1x2_t): Ditto.
(vint32m1x3_t): Ditto.
(vuint32m1x3_t): Ditto.
(vint32m1x4_t): Ditto.
(vuint32m1x4_t): Ditto.
(vint32m1x5_t): Ditto.
(vuint32m1x5_t): Ditto.
(vint32m1x6_t): Ditto.
(vuint32m1x6_t): Ditto.
(vint32m1x7_t): Ditto.
(vuint32m1x7_t): Ditto.
(vint32m1x8_t): Ditto.
(vuint32m1x8_t): Ditto.
(vint32m2x2_t): Di

[PATCH 1/2] Split out solve_add_graph_edge

2023-04-19 Thread Richard Biener via Gcc-patches
Split out a worker with all the special-casings when adding a graph
edge during solving.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-ssa-structalias.cc (solve_add_graph_edge): New function,
split out from ...
(do_sd_constraint): ... here.
---
 gcc/tree-ssa-structalias.cc | 35 ---
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index 39c342fae41..84c625d53e5 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -1640,6 +1640,29 @@ topo_visit (constraint_graph_t graph, struct topo_info 
*ti,
   ti->topo_order.safe_push (n);
 }
 
+/* Add a copy edge FROM -> TO, optimizing special cases.  Returns TRUE
+   if the solution of TO changed.  */
+
+static bool
+solve_add_graph_edge (constraint_graph_t graph, unsigned int to,
+ unsigned int from)
+{
+  /* Adding edges from the special vars is pointless.
+ They don't have sets that can change.  */
+  if (get_varinfo (from)->is_special_var)
+return bitmap_ior_into (get_varinfo (to)->solution,
+   get_varinfo (from)->solution);
+  /* Merging the solution from ESCAPED needlessly increases
+ the set.  Use ESCAPED as representative instead.  */
+  else if (from == find (escaped_id))
+return bitmap_set_bit (get_varinfo (to)->solution, escaped_id);
+  else if (get_varinfo (from)->may_have_pointers
+  && add_graph_edge (graph, to, from))
+return bitmap_ior_into (get_varinfo (to)->solution,
+   get_varinfo (from)->solution);
+  return false;
+}
+
 /* Process a constraint C that represents x = *(y + off), using DELTA as the
starting solution for y.  */
 
@@ -1700,17 +1723,7 @@ do_sd_constraint (constraint_graph_t graph, constraint_t 
c,
{
  t = find (v->id);
 
- /* Adding edges from the special vars is pointless.
-They don't have sets that can change.  */
- if (get_varinfo (t)->is_special_var)
-   flag |= bitmap_ior_into (sol, get_varinfo (t)->solution);
- /* Merging the solution from ESCAPED needlessly increases
-the set.  Use ESCAPED as representative instead.  */
- else if (t == find (escaped_id))
-   flag |= bitmap_set_bit (sol, escaped_id);
- else if (v->may_have_pointers
-  && add_graph_edge (graph, lhs, t))
-   flag |= bitmap_ior_into (sol, get_varinfo (t)->solution);
+ flag |= solve_add_graph_edge (graph, lhs, t);
 
  if (v->is_full_var
  || v->next == 0)
-- 
2.35.3



[PATCH 2/2] Use solve_add_graph_edge in more places

2023-04-19 Thread Richard Biener via Gcc-patches
The following makes sure to use solve_add_graph_edge and honoring
special-cases, especially edges from escaped, in the remaining places
the solver adds edges.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-ssa-structalias.cc (do_ds_constraint): Use
solve_add_graph_edge.
---
 gcc/tree-ssa-structalias.cc | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index 84c625d53e5..47808a3d813 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -1767,11 +1767,8 @@ do_ds_constraint (constraint_t c, bitmap delta, bitmap 
*expanded_delta)
   if (bitmap_bit_p (delta, anything_id))
 {
   unsigned t = find (storedanything_id);
-  if (add_graph_edge (graph, t, rhs))
-   {
- if (bitmap_ior_into (get_varinfo (t)->solution, sol))
-   bitmap_set_bit (changed, t);
-   }
+  if (solve_add_graph_edge (graph, t, rhs))
+   bitmap_set_bit (changed, t);
   return;
 }
 
@@ -1825,8 +1822,8 @@ do_ds_constraint (constraint_t c, bitmap delta, bitmap 
*expanded_delta)
break;
 
  t = find (v->id);
- if (add_graph_edge (graph, t, rhs)
- && bitmap_ior_into (get_varinfo (t)->solution, sol))
+
+ if (solve_add_graph_edge  (graph, t, rhs))
bitmap_set_bit (changed, t);
}
 
-- 
2.35.3


[PATCH] Docs, OpenMP: Small fixes to internal OMP_FOR doc

2023-04-19 Thread Frederik Harwath via Gcc-patches

Hi Sandra,
the OMP_FOR documentation says that the loop index variable
must be signed and it does not list "!=" in the allowed conditional
expressions. But there is nothing that would automatically cast an 
unsigned variable

to signed or that converts the "!=" as you can see from the dump
for this program:

int main ()
{
#pragma omp for
for (unsigned i = 0; i != 10; i++) {}
}

The 005t.gimple dump is:

int __GIMPLE ()
{
  int D_2064;

  {
    {
  unsigned int i;

  #pragma omp for private(i)
  for (i = 0u; i != 10u; i = i + 1u)
    }
  }
  D_2064 = 0;
  return D_2064;
}

(Strictly speaking, the OMP_FOR is represented as a gomp_for at this point,
but this does not really matter.)

Can I commit the patch?

Best regards,
Frederik
From 8af01114c295086526a67f56f6256fc945b1ccb5 Mon Sep 17 00:00:00 2001
From: Frederik Harwath 
Date: Wed, 19 Apr 2023 13:18:55 +0200
Subject: [PATCH] Docs, OpenMP: Small fixes to internal OMP_FOR doc.

gcc/ChangeLog:

	* doc/generic.texi (OpenMP): Add != to allowed
	conditions and state that vars can be unsigned.

	* tree.def (OMP_FOR): Likewise.
---
 gcc/doc/generic.texi | 4 ++--
 gcc/tree.def | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index 2c14b7abce2..8b2882da4fe 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -2323,7 +2323,7 @@ Operand @code{OMP_FOR_INIT} is a vector containing iteration
 variable initializations of the form @code{VAR = N1}.
 
 Operand @code{OMP_FOR_COND} is vector containing loop
-conditional expressions of the form @code{VAR @{<,>,<=,>=@} N2}.
+conditional expressions of the form @code{VAR @{<,>,<=,>=,!=@} N2}.
 
 Operand @code{OMP_FOR_INCR} is a vector containing loop index
 increment expressions of the form @code{VAR @{+=,-=@} INCR}.
@@ -2349,7 +2349,7 @@ adjust their data-sharing attributes and diagnose errors.
 @code{OMP_FOR_ORIG_DECLS} is a vector field, with each element holding
 a list of @code{VAR_DECLS} for the corresponding collapse level.
 
-The loop index variable @code{VAR} must be a signed integer variable,
+The loop index variable @code{VAR} must be an integer variable,
 which is implicitly private to each thread.  For rectangular loops,
 the bounds @code{N1} and @code{N2} and the increment expression
 @code{INCR} are required to be loop-invariant integer expressions
diff --git a/gcc/tree.def b/gcc/tree.def
index ee02754354f..90ceeec0b51 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1159,7 +1159,7 @@ DEFTREECODE (OMP_TASK, "omp_task", tcc_statement, 2)
variable initializations of the form VAR = N1.
 
Operand 3: OMP_FOR_COND is vector containing loop
-   conditional expressions of the form VAR {<,>,<=,>=} N2.
+   conditional expressions of the form VAR {<,>,<=,>=,!=} N2.
 
Operand 4: OMP_FOR_INCR is a vector containing loop index
increment expressions of the form VAR {+=,-=} INCR.
@@ -1185,7 +1185,7 @@ DEFTREECODE (OMP_TASK, "omp_task", tcc_statement, 2)
OMP_FOR_ORIG_DECLS is a vector field, with each element holding
a list of VAR_DECLS for the corresponding collapse level.
 
-   The loop index variable VAR must be a signed integer variable,
+   The loop index variable VAR must be an integer variable,
which is implicitly private to each thread.  For rectangular loops,
the bounds N1 and N2 and the increment expression
INCR are required to be loop-invariant integer expressions
-- 
2.36.1



[wwwdocs] gcc-13: Add release note for RISC-V

2023-04-19 Thread Kito Cheng via Gcc-patches
---
 htdocs/gcc-13/changes.html | 31 ++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index f6941534..5427f805 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -636,9 +636,32 @@ a work-in-progress.
 
 RISC-V
 
-New ISA extension support for zawrs.
+Supports vector intrinsics as specified in https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/v0.11.x";>
+   version 0.11 of the RISC-V vector intrinsic specification,
+   thanks Ju-Zhe Zhong from https://rivai-ic.com.cn/";>RiVAI
+   for contributing most of implementation.
+
 Support for the following vendor extensions has been added:
   
+   Zawrs
+   Zicbom
+   Zicboz
+   Zicbop
+   Zfh
+   Zfhmin
+   Zmmul
+   Zdinx
+   Zfinx
+   Zhinx
+   Zhinxmin
+   Zksh
+   Zksed
+   Zknd
+   Zkne
+   Zbkb
+   Zbkc
+   Zbkx
 XTheadBa
 XTheadBb
 XTheadBs
@@ -657,8 +680,14 @@ a work-in-progress.
   option (GCC identifiers in parentheses).
   
 T-Head's XuanTie C906 (thead-c906).
+Ventana's VT1 (ventana-vt1).
   
 
+Improves the multi-lib selection mechanism for the bare-metal toolchain
+   (riscv*-elf*). GCC will now automatically select the best-fit multi-lib
+   candidate instead of requiring all possible reuse rules to be listed at
+   build time.
+
 
 
 
-- 
2.39.2



Re: [wwwdocs] gcc-13: Add release note for RISC-V

2023-04-19 Thread 钟居哲
LGTM。



juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2023-04-19 21:53
To: gcc-patches; kito.cheng; palmer; juzhe.zhong; jeffreyalaw
CC: Kito Cheng
Subject: [wwwdocs] gcc-13: Add release note for RISC-V
---
htdocs/gcc-13/changes.html | 31 ++-
1 file changed, 30 insertions(+), 1 deletion(-)
 
diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index f6941534..5427f805 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -636,9 +636,32 @@ a work-in-progress.
RISC-V

-New ISA extension support for zawrs.
+Supports vector intrinsics as specified in https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/v0.11.x";>
+ version 0.11 of the RISC-V vector intrinsic specification,
+ thanks Ju-Zhe Zhong from https://rivai-ic.com.cn/";>RiVAI
+ for contributing most of implementation.
+
 Support for the following vendor extensions has been added:
   
+ Zawrs
+ Zicbom
+ Zicboz
+ Zicbop
+ Zfh
+ Zfhmin
+ Zmmul
+ Zdinx
+ Zfinx
+ Zhinx
+ Zhinxmin
+ Zksh
+ Zksed
+ Zknd
+ Zkne
+ Zbkb
+ Zbkc
+ Zbkx
 XTheadBa
 XTheadBb
 XTheadBs
@@ -657,8 +680,14 @@ a work-in-progress.
   option (GCC identifiers in parentheses).
   
 T-Head's XuanTie C906 (thead-c906).
+Ventana's VT1 (ventana-vt1).
   
 
+Improves the multi-lib selection mechanism for the bare-metal toolchain
+ (riscv*-elf*). GCC will now automatically select the best-fit multi-lib
+ candidate instead of requiring all possible reuse rules to be listed at
+ build time.
+


-- 
2.39.2
 
 


Re: [wwwdocs] gcc-13: Add release note for RISC-V

2023-04-19 Thread Palmer Dabbelt

On Wed, 19 Apr 2023 06:53:51 PDT (-0700), kito.ch...@sifive.com wrote:

---
 htdocs/gcc-13/changes.html | 31 ++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index f6941534..5427f805 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -636,9 +636,32 @@ a work-in-progress.

 RISC-V
 
-New ISA extension support for zawrs.
+Supports vector intrinsics as specified in 

Maybe "Support for vector intrinsics"?  That matches how the other ones 
are written.



+   href="https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/v0.11.x";>
+   version 0.11 of the RISC-V vector intrinsic specification,
+   thanks Ju-Zhe Zhong from https://rivai-ic.com.cn/";>RiVAI
+   for contributing most of implementation.
+
 Support for the following vendor extensions has been added:
   
+   Zawrs


These aren't vendor extensions.  We should probably have another header.


+   Zicbom
+   Zicboz
+   Zicbop
+   Zfh
+   Zfhmin
+   Zmmul
+   Zdinx
+   Zfinx
+   Zhinx
+   Zhinxmin
+   Zksh
+   Zksed
+   Zknd
+   Zkne
+   Zbkb
+   Zbkc
+   Zbkx
 XTheadBa
 XTheadBb
 XTheadBs
@@ -657,8 +680,14 @@ a work-in-progress.
   option (GCC identifiers in parentheses).
   
 T-Head's XuanTie C906 (thead-c906).
+Ventana's VT1 (ventana-vt1).


IIRC we didn't actually merge the VT1 stuff, it was deferred for gcc-14.


   
 
+Improves the multi-lib selection mechanism for the bare-metal toolchain
+   (riscv*-elf*). GCC will now automatically select the best-fit multi-lib
+   candidate instead of requiring all possible reuse rules to be listed at
+   build time.
+
 

 


[PATCH][committed] aarch64: Factorise widening add/sub high-half expanders with iterators

2023-04-19 Thread Kyrylo Tkachov via Gcc-patches
Hi all,

I noticed these define_expand are almost identical modulo some string 
substitutions.
This patch compresses them together with a couple of code iterators.
No functional change intended.
Bootstrapped and tested on aarch64-none-linux-gnu.

Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (aarch64_saddw2): Delete.
(aarch64_uaddw2): Delete.
(aarch64_ssubw2): Delete.
(aarch64_usubw2): Delete.
(aarch64_w2): New define_expand.


factor.patch
Description: factor.patch


Re: [PATCH v3] libgfortran: Replace mutex with rwlock

2023-04-19 Thread Bernhard Reutner-Fischer via Gcc-patches
On Wed, 19 Apr 2023 at 14:51, Bernhard Reutner-Fischer
 wrote:
>
> On 19 April 2023 09:06:28 CEST, Lipeng Zhu via Fortran  
> wrote:
>
> >+#ifdef __GTHREAD_RWLOCK_INIT
> >+#define RWLOCK_DEBUG_ADD(rwlock) do { \
> >+aio_rwlock_debug *n;  \
> >+n = malloc (sizeof(aio_rwlock_debug));\
>
> My malloc can fail.

Sorry, i hit send too early.
Please use xmalloc as defined in libgfortran/runtime/memory.c

PS: IIRC we have likely() / unlikely() macros in libgfortran, so you
may want to check if you want to annotate some conditions accordingly
if predict gets it wrong.
thanks,
>
> >+n->prev = TAIL_RWLOCK_DEBUG_QUEUE;\
> >+if (n->prev)  \
> >+  n->prev->next = n;  \
> >+n->next = NULL;   \
> >+n->line = __LINE__;   \
> >+n->func = __FUNCTION__;   \
> >+n->rw = rwlock;   \
> >+if (!aio_rwlock_debug_head) { \
> >+  aio_rwlock_debug_head = n;  \
> >+} \
> >+  } while (0)
> >+
>


[PATCH] i386: Emit compares between high registers and memory

2023-04-19 Thread Uros Bizjak via Gcc-patches
Following code:

typedef __SIZE_TYPE__ size_t;

struct S1s
{
  char pad1;
  char val;
  short pad2;
};

extern char ts[256];

_Bool foo (struct S1s a, size_t i)
{
  return (ts[i] > a.val);
}

compiles with -O2 to:

movl%edi, %eax
movsbl  %ah, %edi
cmpb%dil, ts(%rsi)
setg%al
ret

the compare could use high register %ah instead of %dil:

movl%edi, %eax
cmpbts(%rsi), %ah
setl%al
ret

Use any_extract code iterator to handle signed and unsigned extracts
from high register and introduce peephole2 patterns to propagate
norex memory operand into the compare insn.

gcc/ChangeLog:

PR target/78904
PR target/78952
* config/i386/i386.md (*cmpqi_ext_1_mem_rex64): New insn pattern.
(*cmpqi_ext_1): Use nonimmediate_operand predicate
for operand 0. Use any_extract code iterator.
(*cmpqi_ext_1 peephole2): New peephole2 pattern.
(*cmpqi_ext_2): Use any_extract code iterator.
(*cmpqi_ext_3_mem_rex64): New insn pattern.
(*cmpqi_ext_1): Use general_operand predicate
for operand 1. Use any_extract code iterator.
(*cmpqi_ext_3 peephole2): New peephole2 pattern.
(*cmpqi_ext_4): Use any_extract code iterator.

gcc/testsuite/ChangeLog:

PR target/78904
PR target/78952
* gcc.target/i386/pr78952-3.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1419ea4cff3..0f95d8e8918 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1005,6 +1005,9 @@ (define_code_attr absneg_mnemonic [(abs "fabs") (neg 
"fchs")])
 ;; Mapping of extend operators
 (define_code_iterator any_extend [sign_extend zero_extend])
 
+;; Mapping of extract operators
+(define_code_iterator any_extract [sign_extract zero_extract])
+
 ;; Mapping of highpart multiply operators
 (define_code_iterator any_mul_highpart [smul_highpart umul_highpart])
 
@@ -1454,12 +1457,27 @@ (define_insn "*cmp_minus_1"
   [(set_attr "type" "icmp")
(set_attr "mode" "")])
 
+(define_insn "*cmpqi_ext_1_mem_rex64"
+  [(set (reg FLAGS_REG)
+   (compare
+ (match_operand:QI 0 "norex_memory_operand" "Bn")
+ (subreg:QI
+   (any_extract:SWI248
+ (match_operand 1 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)))]
+  "TARGET_64BIT && reload_completed
+   && ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%h1, %0|%0, %h1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
 (define_insn "*cmpqi_ext_1"
   [(set (reg FLAGS_REG)
(compare
- (match_operand:QI 0 "nonimm_x64constmem_operand" "QBc,m")
+ (match_operand:QI 0 "nonimmediate_operand" "QBc,m")
  (subreg:QI
-   (zero_extract:SWI248
+   (any_extract:SWI248
  (match_operand 1 "int248_register_operand" "Q,Q")
  (const_int 8)
  (const_int 8)) 0)))]
@@ -1469,11 +1487,33 @@ (define_insn "*cmpqi_ext_1"
(set_attr "type" "icmp")
(set_attr "mode" "QI")])
 
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand")
+   (match_operand:QI 1 "norex_memory_operand"))
+   (set (match_operand 3 "flags_reg_operand")
+   (match_operator 4 "compare_operator"
+ [(match_dup 0)
+  (subreg:QI
+(any_extract:SWI248
+  (match_operand 2 "int248_register_operand")
+  (const_int 8)
+  (const_int 8)) 0)]))]
+  "TARGET_64BIT
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 3)
+   (match_op_dup 4
+ [(match_dup 1)
+  (subreg:QI
+(any_extract:SWI248
+  (match_dup 2)
+  (const_int 8)
+  (const_int 8)) 0)]))])
+
 (define_insn "*cmpqi_ext_2"
   [(set (reg FLAGS_REG)
(compare
  (subreg:QI
-   (zero_extract:SWI248
+   (any_extract:SWI248
  (match_operand 0 "int248_register_operand" "Q")
  (const_int 8)
  (const_int 8)) 0)
@@ -1494,31 +1534,68 @@ (define_expand "cmpqi_ext_3"
  (const_int 8)) 0)
  (match_operand:QI 1 "const_int_operand")))])
 
+(define_insn "*cmpqi_ext_3_mem_rex64"
+  [(set (reg FLAGS_REG)
+   (compare
+ (subreg:QI
+   (any_extract:SWI248
+ (match_operand 0 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_operand:QI 1 "norex_memory_operand" "Bn")))]
+  "TARGET_64BIT && reload_completed
+   && ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
 (define_insn "*cmpqi_ext_3"
   [(set (reg FLAGS_REG)
(compare
  (subreg:QI
-   (zero_extract:SWI248
+   (any_extract:SWI248
  (match_operand 0 "int248_register_operand" "Q,Q")
  (const_int 8)
   

Re: [wwwdocs] gcc-13: Add release note for RISC-V

2023-04-19 Thread Kito Cheng via Gcc-patches
> On Wed, 19 Apr 2023 06:53:51 PDT (-0700), kito.ch...@sifive.com wrote:
> > ---
> >  htdocs/gcc-13/changes.html | 31 ++-
> >  1 file changed, 30 insertions(+), 1 deletion(-)
> >
> > diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
> > index f6941534..5427f805 100644
> > --- a/htdocs/gcc-13/changes.html
> > +++ b/htdocs/gcc-13/changes.html
> > @@ -636,9 +636,32 @@ a work-in-progress.
> >
> >  RISC-V
> >  
> > -New ISA extension support for zawrs.
> > +Supports vector intrinsics as specified in 
> Maybe "Support for vector intrinsics"?  That matches how the other ones
> are written.


Thanks, seems like I can’t rely on chatgpt to proofread too much :p


>
> > + href="
> https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/v0.11.x";>
> > + version 0.11 of the RISC-V vector intrinsic specification,
> > + thanks Ju-Zhe Zhong from https://rivai-ic.com.cn/
> ">RiVAI
> > + for contributing most of implementation.
> > +
> >  Support for the following vendor extensions has been added:
> >
> > + Zawrs
>
> These aren't vendor extensions.  We should probably have another header.



Oh damm, good catch.


>
> > + Zicbom
> > + Zicboz
> > + Zicbop
> > + Zfh
> > + Zfhmin
> > + Zmmul
> > + Zdinx
> > + Zfinx
> > + Zhinx
> > + Zhinxmin
> > + Zksh
> > + Zksed
> > + Zknd
> > + Zkne
> > + Zbkb
> > + Zbkc
> > + Zbkx
> >  XTheadBa
> >  XTheadBb
> >  XTheadBs
> > @@ -657,8 +680,14 @@ a work-in-progress.
> >option (GCC identifiers in parentheses).
> >
> >  T-Head's XuanTie C906 (thead-c906).
> > +Ventana's VT1 (ventana-vt1).
>
> IIRC we didn't actually merge the VT1 stuff, it was deferred for gcc-14.


Ok, I guess it because i just grab that by tig —grep ^RISC and missed the
revert commit.


> >
> >  
> > +Improves the multi-lib selection mechanism for the bare-metal
> toolchain
> > + (riscv*-elf*). GCC will now automatically select the best-fit
> multi-lib
> > + candidate instead of requiring all possible reuse rules to be
> listed at
> > + build time.
> > +
> >  
> >
> >  
>


Re: [PATCH] testsuite: fix scan-tree-dump patterns [PR83904, PR100297]

2023-04-19 Thread Bernhard Reutner-Fischer via Gcc-patches
On Wed, 19 Apr 2023 at 03:03, Jerry D via Fortran  wrote:
>
> On 4/18/23 12:39 PM, Harald Anlauf via Fortran wrote:
> > Dear all,
> >
> > the attached patch adjusts the scan-tree-dump patterns of the
> > reported testcases which likely were run in a location such
> > that a path in an error message showing in the tree-dump might
> > have accidentally matched "free" or "data", respectively.
> >
> > For the testcase gfortran.dg/reshape_8.f90 I checked with a
> > failing gfortran-11 that the pattern is appropriate.
> >
> > OK for mainline?
> >
> > Thanks,
> > Harald
> >
> Yes, OK

I'm certainly not opposed to this specific incarnation of such a fix.
These failures are really unpleasant :)
As proposed in 
https://inbox.sourceware.org/gcc-patches/20220426010029.2b476337@nbbrfq/
we could add a -fno-file to suppress the assembler .file output
(whatever the prefix looks like depends on the assembler dialect). Or
we could nuke the .file directives by a sed(1), but that would
probably be cumbersome for remote targets. I don't have a better idea
than -fno-file or -ffile=foo.c .
Fixing them case-by-case does not scale all that well IMHO.

Thoughts?


Re: [PATCH] RISC-V: Support 128 bit vector chunk

2023-04-19 Thread Kito Cheng via Gcc-patches
Committed to trunk, thanks!

On Wed, Apr 19, 2023 at 8:34 PM  wrote:
>
> From: Juzhe-Zhong 
>
> RISC-V has provide different VLEN configuration by different ISA
> extension like `zve32x`, `zve64x` and `v`
> zve32x just guarantee the minimal VLEN is 32 bits,
> zve64x guarantee the minimal VLEN is 64 bits,
> and v guarantee the minimal VLEN is 128 bits,
>
> Current status (without this patch):
>
> Zve32x: Mode for one vector register mode is VNx1SImode and VNx1DImode
> is invalid mode
>  - one vector register could hold 1 + 1x SImode where x is 0~n, so it
> might hold just one SI
>
> Zve64x: Mode for one vector register mode is VNx1DImode or VNx2SImode
>  - one vector register could hold 1 + 1x DImode where x is 0~n, so it
> might hold just one DI.
>  - one vector register could hold 2 + 2x SImode where x is 0~n, so it
> might hold just two SI.
>
> However `v` extension guarantees the minimal VLEN is 128 bits.
>
> We introduce another type/mode mapping for this configure:
>
> v: Mode for one vector register mode is VNx2DImode or VNx4SImode
>  - one vector register could hold 2 + 2x DImode where x is 0~n, so it
> will hold at least two DI
>  - one vector register could hold 4 + 4x SImode where x is 0~n, so it
> will hold at least four DI
>
> This patch model the mode more precisely for the RVV, and help some
> middle-end optimization that assume number of element must be a
> multiple of two.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-modes.def (FLOAT_MODE): Add chunk 128 support.
> (VECTOR_BOOL_MODE): Ditto.
> (ADJUST_NUNITS): Ditto.
> (ADJUST_ALIGNMENT): Ditto.
> (ADJUST_BYTESIZE): Ditto.
> (ADJUST_PRECISION): Ditto.
> (RVV_MODES): Ditto.
> (VECTOR_MODE_WITH_PREFIX): Ditto.
> * config/riscv/riscv-v.cc (ENTRY): Ditto.
> (get_vlmul): Ditto.
> (get_ratio): Ditto.
> * config/riscv/riscv-vector-builtins.cc (DEF_RVV_TYPE): Ditto.
> * config/riscv/riscv-vector-builtins.def (DEF_RVV_TYPE): Ditto.
> (vbool64_t): Ditto.
> (vbool32_t): Ditto.
> (vbool16_t): Ditto.
> (vbool8_t): Ditto.
> (vbool4_t): Ditto.
> (vbool2_t): Ditto.
> (vbool1_t): Ditto.
> (vint8mf8_t): Ditto.
> (vuint8mf8_t): Ditto.
> (vint8mf4_t): Ditto.
> (vuint8mf4_t): Ditto.
> (vint8mf2_t): Ditto.
> (vuint8mf2_t): Ditto.
> (vint8m1_t): Ditto.
> (vuint8m1_t): Ditto.
> (vint8m2_t): Ditto.
> (vuint8m2_t): Ditto.
> (vint8m4_t): Ditto.
> (vuint8m4_t): Ditto.
> (vint8m8_t): Ditto.
> (vuint8m8_t): Ditto.
> (vint16mf4_t): Ditto.
> (vuint16mf4_t): Ditto.
> (vint16mf2_t): Ditto.
> (vuint16mf2_t): Ditto.
> (vint16m1_t): Ditto.
> (vuint16m1_t): Ditto.
> (vint16m2_t): Ditto.
> (vuint16m2_t): Ditto.
> (vint16m4_t): Ditto.
> (vuint16m4_t): Ditto.
> (vint16m8_t): Ditto.
> (vuint16m8_t): Ditto.
> (vint32mf2_t): Ditto.
> (vuint32mf2_t): Ditto.
> (vint32m1_t): Ditto.
> (vuint32m1_t): Ditto.
> (vint32m2_t): Ditto.
> (vuint32m2_t): Ditto.
> (vint32m4_t): Ditto.
> (vuint32m4_t): Ditto.
> (vint32m8_t): Ditto.
> (vuint32m8_t): Ditto.
> (vint64m1_t): Ditto.
> (vuint64m1_t): Ditto.
> (vint64m2_t): Ditto.
> (vuint64m2_t): Ditto.
> (vint64m4_t): Ditto.
> (vuint64m4_t): Ditto.
> (vint64m8_t): Ditto.
> (vuint64m8_t): Ditto.
> (vfloat32mf2_t): Ditto.
> (vfloat32m1_t): Ditto.
> (vfloat32m2_t): Ditto.
> (vfloat32m4_t): Ditto.
> (vfloat32m8_t): Ditto.
> (vfloat64m1_t): Ditto.
> (vfloat64m2_t): Ditto.
> (vfloat64m4_t): Ditto.
> (vfloat64m8_t): Ditto.
> * config/riscv/riscv-vector-switch.def (ENTRY): Ditto.
> * config/riscv/riscv.cc (riscv_legitimize_poly_move): Ditto.
> (riscv_convert_vector_bits): Ditto.
> * config/riscv/riscv.md:
> * config/riscv/vector-iterators.md:
> * config/riscv/vector.md 
> (@pred_indexed_store): Ditto.
> (@pred_indexed_store): Ditto.
> (@pred_indexed_store): Ditto.
> (@pred_indexed_store): Ditto.
> (@pred_indexed_store): Ditto.
> (@pred_reduc_): Ditto.
> (@pred_widen_reduc_plus): Ditto.
> (@pred_reduc_plus): Ditto.
> (@pred_widen_reduc_plus): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/pr108185-4.c: Adapt testcase.
> * gcc.target/riscv/rvv/base/spill-1.c: Ditto.
> * gcc.target/riscv/rvv/base/spill-11.c: Ditto.
> * gcc.target/riscv/rvv/base/spill-2.c: Ditto.
> * gcc.target/riscv/rvv/base/spill-3.c: Ditto.
> * gcc.target/riscv/rvv/base/spill-5.c: Ditto.
> * gcc.target/riscv/rvv/base/spill-9.c: Ditto.
> -

Re: [PATCH v3] RISC-V: Align IOR optimization MODE_CLASS condition to AND.

2023-04-19 Thread Kito Cheng via Gcc-patches
Committed, thanks :)

On Wed, Apr 19, 2023 at 5:19 PM Pan Li via Gcc-patches
 wrote:
>
> From: Pan Li 
>
> This patch aligned the MODE_CLASS condition of the IOR to the AND. Then
> more MODE_CLASS besides SCALAR_INT can able to perform the optimization
> A | (~A) -> -1 similar to AND operator. For example as below sample code.
>
> vbool32_t test_shortcut_for_riscv_vmorn_case_5(vbool32_t v1, size_t vl)
> {
>   return __riscv_vmorn_mm_b32(v1, v1, vl);
> }
>
> Before this patch:
> vsetvli  a5,zero,e8,mf4,ta,ma
> vlm.vv24,0(a1)
> vsetvli  zero,a2,e8,mf4,ta,ma
> vmorn.mm v24,v24,v24
> vsetvli  a5,zero,e8,mf4,ta,ma
> vsm.vv24,0(a0)
> ret
>
> After this patch:
> vsetvli zero,a2,e8,mf4,ta,ma
> vmset.m v24
> vsetvli a5,zero,e8,mf4,ta,ma
> vsm.v   v24,0(a0)
> ret
>
> Or in RTL's perspective,
> from:
> (ior:VNx2BI (reg/v:VNx2BI 137 [ v1 ]) (not:VNx2BI (reg/v:VNx2BI 137 [ v1 ])))
> to:
> (const_vector:VNx2BI repeat [ (const_int 1 [0x1]) ])
>
> The similar optimization like VMANDN has enabled already. There should
> be no difference execpt the operator when compare the VMORN and VMANDN
> for such kind of optimization. The patch aligns the IOR MODE_CLASS condition
> of the simplification to the AND operator.
>
> gcc/ChangeLog:
>
> * simplify-rtx.cc (simplify_context::simplify_binary_operation_1):
>   Align IOR (A | (~A) -> -1) optimization MODE_CLASS condition to AND.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/mask_insn_shortcut.c: Update check
>   condition.
> * gcc.target/riscv/simplify_ior_optimization.c: New test.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/simplify-rtx.cc   |  4 +-
>  .../riscv/rvv/base/mask_insn_shortcut.c   |  3 +-
>  .../riscv/simplify_ior_optimization.c | 50 +++
>  3 files changed, 53 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c
>
> diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
> index c57ff3320ee..d4aeebc7a5f 100644
> --- a/gcc/simplify-rtx.cc
> +++ b/gcc/simplify-rtx.cc
> @@ -3370,8 +3370,8 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
> code,
>if (((GET_CODE (op0) == NOT && rtx_equal_p (XEXP (op0, 0), op1))
>|| (GET_CODE (op1) == NOT && rtx_equal_p (XEXP (op1, 0), op0)))
>   && ! side_effects_p (op0)
> - && SCALAR_INT_MODE_P (mode))
> -   return constm1_rtx;
> + && GET_MODE_CLASS (mode) != MODE_CC)
> +   return CONSTM1_RTX (mode);
>
>/* (ior A C) is C if all bits of A that might be nonzero are on in C.  
> */
>if (CONST_INT_P (op1)
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> index 83cc4a1b5a5..57d0241675a 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c
> @@ -233,9 +233,8 @@ vbool64_t test_shortcut_for_riscv_vmxnor_case_6(vbool64_t 
> v1, size_t vl) {
>  /* { dg-final { scan-assembler-not {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+} } } */
>  /* { dg-final { scan-assembler-not {vmor\.mm\s+v[0-9]+,\s*v[0-9]+} } } */
>  /* { dg-final { scan-assembler-not {vmnor\.mm\s+v[0-9]+,\s*v[0-9]+} } } */
> -/* { dg-final { scan-assembler-times 
> {vmorn\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 7 } } */
>  /* { dg-final { scan-assembler-not {vmxnor\.mm\s+v[0-9]+,\s*v[0-9]+} } } */
>  /* { dg-final { scan-assembler-times {vmclr\.m\s+v[0-9]+} 14 } } */
> -/* { dg-final { scan-assembler-times {vmset\.m\s+v[0-9]+} 7 } } */
> +/* { dg-final { scan-assembler-times {vmset\.m\s+v[0-9]+} 14 } } */
>  /* { dg-final { scan-assembler-times {vmmv\.m\s+v[0-9]+,\s*v[0-9]+} 14 } } */
>  /* { dg-final { scan-assembler-times {vmnot\.m\s+v[0-9]+,\s*v[0-9]+} 14 } } 
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c 
> b/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c
> new file mode 100644
> index 000..ec3bd0baf03
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c
> @@ -0,0 +1,50 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gc -mabi=lp64 -O2" } */
> +
> +#include 
> +
> +uint8_t test_simplify_ior_scalar_case_0 (uint8_t a)
> +{
> +  return a | ~a;
> +}
> +
> +uint16_t test_simplify_ior_scalar_case_1 (uint16_t a)
> +{
> +  return a | ~a;
> +}
> +
> +uint32_t test_simplify_ior_scalar_case_2 (uint32_t a)
> +{
> +  return a | ~a;
> +}
> +
> +uint64_t test_simplify_ior_scalar_case_3 (uint64_t a)
> +{
> +  return a | ~a;
> +}
> +
> +int8_t test_simplify_ior_scalar_case_4 (int8_t a)
> +{
> +  return a | ~a;
> +}
> +
> +int16_t test_simplify_ior_scalar_case_5 (int16_t a)
> +{
> +  return a | ~a;
> +}
> +
> +int32_t test_simplify_ior_scalar_case_6 (int32_t a)
> +{
> +  return a | ~a;
> +}
> +
> +int64_t test_simplify_ior_scalar_case_7 (int64_t a)
> +{
> +  return a | ~a;
> +}
> +
> +/* { dg-final {

[13 PATCH RFA] c++: fix 'unsigned __int128_t' semantics [PR108099]

2023-04-19 Thread Jason Merrill via Gcc-patches
When I was backporting the earlier 108099 patch I finally saw your comments on
the PR about the meaning of this pattern with the patch being wrong (and a
regression from 11).  This fixes that regression; fixing the broader issues can
wait.

Tested x86_64-pc-linux-gnu, OK for 13.1 or wait for 13.2?

-- 8< --
My earlier patch for 108099 made us accept this non-standard pattern but
messed up the semantics, so that e.g. unsigned __int128_t was not a 128-bit
type.

PR c++/108099

gcc/cp/ChangeLog:

* decl.cc (grokdeclarator): Keep typedef_decl for __int128_t.

gcc/testsuite/ChangeLog:

* g++.dg/ext/int128-8.C: New test.
---
 gcc/cp/decl.cc  |  6 --
 gcc/testsuite/g++.dg/ext/int128-8.C | 24 
 2 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/int128-8.C

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 772c059dc2c..ab5cb69b2ae 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -12482,12 +12482,14 @@ grokdeclarator (const cp_declarator *declarator,
   key, typedef_decl);
  ok = !flag_pedantic_errors;
  if (is_typedef_decl (typedef_decl))
-   type = DECL_ORIGINAL_TYPE (typedef_decl);
+   {
+ type = DECL_ORIGINAL_TYPE (typedef_decl);
+ typedef_decl = NULL_TREE;
+   }
  else
/* PR108099: __int128_t comes from c_common_nodes_and_builtins,
   and is not built as a typedef.  */
type = TREE_TYPE (typedef_decl);
- typedef_decl = NULL_TREE;
}
  else if (declspecs->decltype_p)
error_at (loc, "%qs specified with %", key);
diff --git a/gcc/testsuite/g++.dg/ext/int128-8.C 
b/gcc/testsuite/g++.dg/ext/int128-8.C
new file mode 100644
index 000..14bbc49f5c3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/int128-8.C
@@ -0,0 +1,24 @@
+// PR c++/108099
+// { dg-do compile { target c++11 } }
+// { dg-options "" }
+
+using u128 = unsigned __int128_t;
+using s128 = signed __int128_t;
+template  struct integral_constant {
+  static constexpr T value = v;
+};
+typedef integral_constant  false_type;
+typedef integral_constant  true_type;
+template 
+struct is_same : false_type {};
+template 
+struct is_same  : true_type {};
+static_assert (is_same <__int128, s128>::value, "");
+static_assert (is_same ::value, "");
+static_assert (is_same <__int128_t, s128>::value, "");
+static_assert (is_same ::value, ""); // { dg-bogus "" 
"" { xfail *-*-* } }
+static_assert (is_same <__uint128_t, u128>::value, "");  // { 
dg-bogus "" "" { xfail *-*-* } }
+static_assert (sizeof (s128) == sizeof (__int128), "");
+static_assert (sizeof (u128) == sizeof (unsigned __int128), "");
+static_assert (s128(-1) < 0, "");
+static_assert (u128(-1) > 0, "");

base-commit: 57aecdbc118d4c1900d651cb3ada2c9632a67ad0
-- 
2.31.1



Re: [13 PATCH RFA] c++: fix 'unsigned __int128_t' semantics [PR108099]

2023-04-19 Thread Jakub Jelinek via Gcc-patches
On Wed, Apr 19, 2023 at 11:20:09AM -0400, Jason Merrill wrote:
> When I was backporting the earlier 108099 patch I finally saw your comments on
> the PR about the meaning of this pattern with the patch being wrong (and a
> regression from 11).  This fixes that regression; fixing the broader issues 
> can
> wait.
> 
> Tested x86_64-pc-linux-gnu, OK for 13.1 or wait for 13.2?

I'd wait for 13.2.  We've been burned with trying to rush stuff out at the
last minute once this week already ;)

> -- 8< --
> My earlier patch for 108099 made us accept this non-standard pattern but
> messed up the semantics, so that e.g. unsigned __int128_t was not a 128-bit
> type.
> 
>   PR c++/108099
> 
> gcc/cp/ChangeLog:
> 
>   * decl.cc (grokdeclarator): Keep typedef_decl for __int128_t.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/ext/int128-8.C: New test.

Jakub



[PATCH] c++: bad ggc_free in try_class_unification [PR109556]

2023-04-19 Thread Patrick Palka via Gcc-patches
Aside from correcting how try_class_unification copies multi-dimensional
'targs', r13-377-g3e948d645bc908 also made it ggc_free this copy as an
optimization.  But this is potentially wrong since the call to unify
within might've captured the args in persistent memory such as the
satisfaction cache (during constrained auto deduction).

Bootstrapped and regtested on x86_64-pc-linux, does this look OK for
trunk/13?  No testcase yet since the reduction is still in progress.
The plan would be to push this with a reduced testcase, but I figured
I'd send the actual fix for review now.  Would this be OK for 13.1 or
shall it wait until 13.2?

gcc/cp/ChangeLog:

* pt.cc (try_class_unification): Don't ggc_free the copy of
'targs'.
---
 gcc/cp/pt.cc | 5 -
 1 file changed, 5 deletions(-)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index e065ace5c55..68a056acf8b 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -23895,11 +23895,6 @@ try_class_unification (tree tparms, tree targs, tree 
parm, tree arg,
 err = unify (tparms, targs, CLASSTYPE_TI_ARGS (parm),
 CLASSTYPE_TI_ARGS (arg), UNIFY_ALLOW_NONE, explain_p);
 
-  if (TMPL_ARGS_HAVE_MULTIPLE_LEVELS (targs))
-for (tree level : tree_vec_range (targs))
-  ggc_free (level);
-  ggc_free (targs);
-
   return err ? NULL_TREE : arg;
 }
 
-- 
2.40.0.352.g667fcf4e15



[PATCH] recog.cc: Correct comments referring to parameter match_len

2023-04-19 Thread Hans-Peter Nilsson via Gcc-patches
I'll commit this as obvious, so it doesn't trick anyone else
anymore.
-- >8 --
* recog.cc (peep2_attempt, peep2_update_life): Correct
head-comment description of parameter match_len.
---
 gcc/recog.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/recog.cc b/gcc/recog.cc
index 3ddeab59d924..fd09145d45e5 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -3850,7 +3850,7 @@ copy_frame_info_to_split_insn (rtx_insn *old_insn, 
rtx_insn *new_insn)
   maybe_copy_prologue_epilogue_insn (old_insn, new_insn);
 }
 
-/* While scanning basic block BB, we found a match of length MATCH_LEN,
+/* While scanning basic block BB, we found a match of length MATCH_LEN + 1,
starting at INSN.  Perform the replacement, removing the old insns and
replacing them with ATTEMPT.  Returns the last insn emitted, or NULL
if the replacement is rejected.  */
@@ -4036,7 +4036,7 @@ peep2_attempt (basic_block bb, rtx_insn *insn, int 
match_len, rtx_insn *attempt)
 /* After performing a replacement in basic block BB, fix up the life
information in our buffer.  LAST is the last of the insns that we
emitted as a replacement.  PREV is the insn before the start of
-   the replacement.  MATCH_LEN is the number of instructions that were
+   the replacement.  MATCH_LEN + 1 is the number of instructions that were
matched, and which now need to be replaced in the buffer.  */
 
 static void
-- 
2.30.2



Re: [PATCH] Docs, OpenMP: Small fixes to internal OMP_FOR doc

2023-04-19 Thread Sandra Loosemore

On 4/19/23 07:51, Frederik Harwath wrote:

Hi Sandra,
the OMP_FOR documentation says that the loop index variable
must be signed and it does not list "!=" in the allowed conditional
expressions. But there is nothing that would automatically cast an 
unsigned variable

to signed or that converts the "!=" as you can see from the dump
for this program:

int main ()
{
#pragma omp for
for (unsigned i = 0; i != 10; i++) {}
}

The 005t.gimple dump is:

int __GIMPLE ()
{
   int D_2064;

   {
     {
   unsigned int i;

   #pragma omp for private(i)
   for (i = 0u; i != 10u; i = i + 1u)
     }
   }
   D_2064 = 0;
   return D_2064;
}

(Strictly speaking, the OMP_FOR is represented as a gomp_for at this point,
but this does not really matter.)

Can I commit the patch?


The patch is fine for the trunk, but GCC 13 is frozen for the release 
right now and even doc fixes require RM approval.  I'd have no objection 
to putting it on the release branch too when it is unfrozen, but as this 
isn't user-facing documentation there's no particularly urgent argument 
for doing that.


-Sandra


Re: [PATCH] c++: bad ggc_free in try_class_unification [PR109556]

2023-04-19 Thread Patrick Palka via Gcc-patches
On Wed, 19 Apr 2023, Patrick Palka wrote:

> Aside from correcting how try_class_unification copies multi-dimensional
> 'targs', r13-377-g3e948d645bc908 also made it ggc_free this copy as an
> optimization.  But this is potentially wrong since the call to unify
> within might've captured the args in persistent memory such as the
> satisfaction cache (during constrained auto deduction).
> 
> Bootstrapped and regtested on x86_64-pc-linux, does this look OK for
> trunk/13?  No testcase yet since the reduction is still in progress.
> The plan would be to push this with a reduced testcase, but I figured
> I'd send the actual fix for review now.  Would this be OK for 13.1 or
> shall it wait until 13.2?

Now with a reduced testcase:

-- >8 --

Subject: [PATCH] c++: bad ggc_free in try_class_unification [PR109556]

Aside from correcting how try_class_unification copies multi-dimensional
'targs', r13-377-g3e948d645bc908 also made it ggc_free this copy as an
optimization.  But this is potentially wrong since the call to unify
within might've captured the args in persistent memory such as the
satisfaction cache (during constrained auto deduction).

gcc/cp/ChangeLog:

* pt.cc (try_class_unification): Don't ggc_free the copy of
'targs'.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-placeholder13.C: New test.
---
 gcc/cp/pt.cc |  5 -
 .../g++.dg/cpp2a/concepts-placeholder13.C| 16 
 2 files changed, 16 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index e065ace5c55..68a056acf8b 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -23895,11 +23895,6 @@ try_class_unification (tree tparms, tree targs, tree 
parm, tree arg,
 err = unify (tparms, targs, CLASSTYPE_TI_ARGS (parm),
 CLASSTYPE_TI_ARGS (arg), UNIFY_ALLOW_NONE, explain_p);
 
-  if (TMPL_ARGS_HAVE_MULTIPLE_LEVELS (targs))
-for (tree level : tree_vec_range (targs))
-  ggc_free (level);
-  ggc_free (targs);
-
   return err ? NULL_TREE : arg;
 }
 
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C
new file mode 100644
index 000..fd4a05c05e1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C
@@ -0,0 +1,16 @@
+// PR c++/109556
+// { dg-do compile { target c++20 } }
+
+template
+concept C = (N != 0);
+
+template
+struct A { };
+
+template auto M>
+void f(A);
+
+int main() {
+  f(A<1, 42>{});
+  f(A<2, 42>{});
+}
-- 
2.40.0.352.g667fcf4e15



[PATCH v2] Leveraging the use of STP instruction for vec_duplicate

2023-04-19 Thread Victor L. Do Nascimento via Gcc-patches
The backend pattern for storing a pair of identical values in 32 and
64-bit modes with the machine instruction STP was missing, and
multiple instructions were needed to reproduce this behavior as a
result of failed RTL pattern match in the combine pass.

For the test case:

typedef long long v2di __attribute__((vector_size (16)));
typedef int v2si __attribute__((vector_size (8)));

void
foo (v2di *x, long long a)
{
  v2di tmp = {a, a};
  *x = tmp;
}

void
foo2 (v2si *x, int a)
{
  v2si tmp = {a, a};
  *x = tmp;
}

at -O2 on aarch64 gives:

foo
  stp x1, x1, [x0]
  ret
foo2:
  stp w1, w1, [x0]
  ret

instead of:

foo:
  dup v0.2d, x1
  str q0, [x0]
  ret
foo2:
  dup v0.2s, w1
  str d0, [x0]
  ret

Bootstrapped and regtested on aarch64-none-linux-gnu.  Ok to install?

gcc/
* config/aarch64/aarch64-simd.md(aarch64_simd_stp): New.
* config/aarch64/constraints.md: Make "Umn" relaxed memory
constraint.
* config/aarch64/iterators.md(ldpstp_vel_sz): New.

gcc/testsuite/
* gcc.target/aarch64/stp_vec_dup_32_64-1.c:
---
 gcc/config/aarch64/aarch64-simd.md| 10 
 gcc/config/aarch64/constraints.md |  2 +-
 gcc/config/aarch64/iterators.md   |  3 +
 .../gcc.target/aarch64/stp_vec_dup_32_64-1.c  | 57 +++
 4 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index de2b7383749..8b5e67bd100 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -229,6 +229,16 @@
   [(set_attr "type" "neon_stp")]
 )
 
+(define_insn "aarch64_simd_stp"
+  [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand" "=Umn,Umn")
+   (vec_duplicate:VP_2E (match_operand: 1 "register_operand" "w,r")))]
+  "TARGET_SIMD"
+  "@
+   stp\\t%1, %1, %y0
+   stp\\t%1, %1, %y0"
+  [(set_attr "type" "neon_stp, store_")]
+)
+
 (define_insn "load_pair"
   [(set (match_operand:VQ 0 "register_operand" "=w")
(match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
diff --git a/gcc/config/aarch64/constraints.md 
b/gcc/config/aarch64/constraints.md
index 5b20abc27e5..6df1dbec2a8 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -287,7 +287,7 @@
 ;; Used for storing or loading pairs in an AdvSIMD register using an STP/LDP
 ;; as a vector-concat.  The address mode uses the same constraints as if it
 ;; were for a single value.
-(define_memory_constraint "Umn"
+(define_relaxed_memory_constraint "Umn"
   "@internal
   A memory address suitable for a load/store pair operation."
   (and (match_code "mem")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 6cbc97cc82c..980dacb8025 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1017,6 +1017,9 @@
 ;; Likewise for load/store pair.
 (define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
 
+;; Size of element access for STP/LDP-generated vectors.
+(define_mode_attr ldpstp_vel_sz [(V2SI "8") (V2SF "8") (V2DI "16") (V2DF 
"16")])
+
 ;; For inequal width int to float conversion
 (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
 (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
diff --git a/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c 
b/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c
new file mode 100644
index 000..fc2c1ea39e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long v2di __attribute__((vector_size (16)));
+typedef int v2si __attribute__((vector_size (8)));
+
+#define TESTV2DI(lab, idx) \
+  void \
+  stpv2di_##lab (v2di *x, long long a) \
+  {\
+v2di tmp = {a, a}; \
+x[idx] = tmp;  \
+  }
+
+
+#define TESTV2SI(lab, idx) \
+  void \
+  stpv2si_##lab (v2si *x, int a)   \
+  {\
+v2si tmp = {a, a}; \
+x[idx] = tmp;  \
+  }\
+
+/* Core test, no imm assembler offset:  */
+
+TESTV2SI(0, 0)
+TESTV2DI(0, 0)
+/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+\]} } } */
+/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+\]} } } */
+
+/* Lower offset bounds:  */
+
+/* Vaid offsets:  */
+TESTV2SI(1, -32)
+TESTV2DI(1, -32)
+/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, -256\]} } } 
*/
+/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, -512\]} } } 
*/
+/* Invalid offsets:  */
+TESTV2SI(2, -33)
+TESTV2DI(2, -33)
+/* { dg-final { s

Re: [PATCH] testsuite: fix scan-tree-dump patterns [PR83904, PR100297]

2023-04-19 Thread Harald Anlauf via Gcc-patches

On 4/19/23 17:14, Bernhard Reutner-Fischer via Gcc-patches wrote:

On Wed, 19 Apr 2023 at 03:03, Jerry D via Fortran  wrote:


On 4/18/23 12:39 PM, Harald Anlauf via Fortran wrote:

Dear all,

the attached patch adjusts the scan-tree-dump patterns of the
reported testcases which likely were run in a location such
that a path in an error message showing in the tree-dump might
have accidentally matched "free" or "data", respectively.

For the testcase gfortran.dg/reshape_8.f90 I checked with a
failing gfortran-11 that the pattern is appropriate.

OK for mainline?

Thanks,
Harald


Yes, OK


I'm certainly not opposed to this specific incarnation of such a fix.
These failures are really unpleasant :)
As proposed in 
https://inbox.sourceware.org/gcc-patches/20220426010029.2b476337@nbbrfq/
we could add a -fno-file to suppress the assembler .file output
(whatever the prefix looks like depends on the assembler dialect). Or
we could nuke the .file directives by a sed(1), but that would
probably be cumbersome for remote targets. I don't have a better idea
than -fno-file or -ffile=foo.c .
Fixing them case-by-case does not scale all that well IMHO.

Thoughts?



?

It wasn't the tree-dumps being at fault, it was the scan patterns.



[PATCH 0/3] RISC-V: Basic enable RVV auto-vectorizaiton

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

PATCH 1: Add compile option for RVV auto-vectorization.
PATCH 2: Enable basic RVV auto-vectorization.
PATCH 3: Add sanity testcases.

*** BLURB HERE ***

Ju-Zhe Zhong (3):
  RISC-V: Add auto-vectorization compile option for RVV
  RISC-V: Enable basic auto-vectorization for RVV
  RISC-V: Add sanity testcases for RVV auto-vectorization

 gcc/config/riscv/autovec.md   |  49 
 gcc/config/riscv/riscv-opts.h |  15 +++
 gcc/config/riscv/riscv-protos.h   |   1 +
 gcc/config/riscv/riscv-v.cc   |  53 +
 gcc/config/riscv/riscv.cc |  24 +++-
 gcc/config/riscv/riscv.opt|  37 ++
 gcc/config/riscv/vector.md|   4 +-
 .../rvv/autovec/partial/single_rgroup-1.c |   8 ++
 .../rvv/autovec/partial/single_rgroup-1.h | 106 ++
 .../rvv/autovec/partial/single_rgroup_run-1.c |  19 
 .../gcc.target/riscv/rvv/autovec/template-1.h |  68 +++
 .../gcc.target/riscv/rvv/autovec/v-1.c|   4 +
 .../gcc.target/riscv/rvv/autovec/v-2.c|   6 +
 .../gcc.target/riscv/rvv/autovec/zve32f-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve32f-2.c   |   5 +
 .../gcc.target/riscv/rvv/autovec/zve32f-3.c   |   6 +
 .../riscv/rvv/autovec/zve32f_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve32f_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve32x-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve32x-2.c   |   6 +
 .../gcc.target/riscv/rvv/autovec/zve32x-3.c   |   6 +
 .../riscv/rvv/autovec/zve32x_zvl128b-1.c  |   5 +
 .../riscv/rvv/autovec/zve32x_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64d-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64d-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64d-3.c   |   6 +
 .../riscv/rvv/autovec/zve64d_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64d_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64f-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64f-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64f-3.c   |   6 +
 .../riscv/rvv/autovec/zve64f_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64f_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64x-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64x-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64x-3.c   |   6 +
 .../riscv/rvv/autovec/zve64x_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64x_zvl128b-2.c  |   6 +
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp|  16 +++
 39 files changed, 532 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/riscv/autovec.md
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup_run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/template-1.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x_zvl

[PATCH 1/3] RISC-V: Add auto-vectorization compile option for RVV

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch is adding 2 compile option for RVV auto-vectorization.
1. -param=riscv-autovec-preference=
   This option is to specify the auto-vectorization approach for RVV.
   Currently, we only support scalable and fixed-vlmax.

- scalable means VLA auto-vectorization. The vector-length to compiler is
  unknown and runtime invariant. Such approach can allow us compile the code
  run on any vector-length RVV CPU.

- fixed-vlmax means the compile known the RVV CPU vector-length, compile 
option
  in fixed-length VLS auto-vectorization. Meaning if we specify 
vector-length=512.
  The execution file can only run on vector-length = 512 RVV CPU.

- TODO: we may need to support min-length VLS auto-vectorization, means the 
execution
  file can run on larger length RVV CPU.  

gcc/ChangeLog:

* config/riscv/riscv-opts.h (enum riscv_autovec_preference_enum): Add 
enum for auto-vectorization preference.
(enum riscv_autovec_lmul_enum): Add enum for choosing LMUL of RVV 
auto-vectorization.
* config/riscv/riscv.opt: Add compile option for RVV auto-vectorization.

---
 gcc/config/riscv/riscv-opts.h | 15 ++
 gcc/config/riscv/riscv.opt| 37 +++
 2 files changed, 52 insertions(+)

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index cf0cd669be4..4207db240ea 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -67,6 +67,21 @@ enum stack_protector_guard {
   SSP_GLOBAL   /* global canary */
 };
 
+/* RISC-V auto-vectorization preference.  */
+enum riscv_autovec_preference_enum {
+  NO_AUTOVEC,
+  RVV_SCALABLE,
+  RVV_FIXED_VLMAX
+};
+
+/* RISC-V auto-vectorization RVV LMUL.  */
+enum riscv_autovec_lmul_enum {
+  RVV_M1 = 1,
+  RVV_M2 = 2,
+  RVV_M4 = 4,
+  RVV_M8 = 8
+};
+
 #define MASK_ZICSR(1 << 0)
 #define MASK_ZIFENCEI (1 << 1)
 
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index ff1dd4ddd4f..ef1bdfcfe28 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -254,3 +254,40 @@ Enum(isa_spec_class) String(20191213) 
Value(ISA_SPEC_CLASS_20191213)
 misa-spec=
 Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) 
Init(TARGET_DEFAULT_ISA_SPEC)
 Set the version of RISC-V ISA spec.
+
+Enum
+Name(riscv_autovec_preference) Type(enum riscv_autovec_preference_enum)
+The RISC-V auto-vectorization preference:
+
+EnumValue
+Enum(riscv_autovec_preference) String(none) Value(NO_AUTOVEC)
+
+EnumValue
+Enum(riscv_autovec_preference) String(scalable) Value(RVV_SCALABLE)
+
+EnumValue
+Enum(riscv_autovec_preference) String(fixed-vlmax) Value(RVV_FIXED_VLMAX)
+
+-param=riscv-autovec-preference=
+Target RejectNegative Joined Enum(riscv_autovec_preference) 
Var(riscv_autovec_preference) Init(NO_AUTOVEC)
+-param=riscv-autovec-preference=   Set the preference of 
auto-vectorization in the RISC-V port.
+
+Enum
+Name(riscv_autovec_lmul) Type(enum riscv_autovec_lmul_enum)
+The RVV possible LMUL:
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m1) Value(RVV_M1)
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m2) Value(RVV_M2)
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m4) Value(RVV_M4)
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m8) Value(RVV_M8)
+
+-param=riscv-autovec-lmul=
+Target RejectNegative Joined Enum(riscv_autovec_lmul) Var(riscv_autovec_lmul) 
Init(RVV_M1)
+-param=riscv-autovec-lmul= Set the RVV LMUL of auto-vectorization 
in the RISC-V port.
-- 
2.36.3



[PATCH 2/3] RISC-V: Enable basic auto-vectorization for RVV

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch enables auto-vectorization accurately according to '-march'
And add len_load/len_store pattern.

For example, for -march=rv32gc_zve32x, we should allow SEW = 64 RVV 
auto-vectorization.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (preferred_simd_mode): Enable basic 
auto-vectorization support.
* config/riscv/riscv-v.cc (autovec_use_vlmax_p): New function.
(preferred_simd_mode): Ditto.
* config/riscv/riscv.cc (riscv_convert_vector_bits): Enable basic 
auto-vectorization support.
(riscv_preferred_simd_mode): New function.
(TARGET_VECTORIZE_PREFERRED_SIMD_MODE): New target hook.
* config/riscv/vector.md: include autovec.md
* config/riscv/autovec.md: New file.

---
 gcc/config/riscv/autovec.md | 49 ++
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv-v.cc | 53 +
 gcc/config/riscv/riscv.cc   | 24 ++-
 gcc/config/riscv/vector.md  |  4 ++-
 5 files changed, 129 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/riscv/autovec.md

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
new file mode 100644
index 000..b5d46ff57ab
--- /dev/null
+++ b/gcc/config/riscv/autovec.md
@@ -0,0 +1,49 @@
+;; Machine description for auto-vectorization using RVV for GNU compiler.
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+;; Contributed by Juzhe Zhong (juzhe.zh...@rivai.ai), RiVAI Technologies Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+;; =
+;; == Loads/Stores
+;; =
+
+;; len_load/len_store is a sub-optimal pattern for RVV auto-vectorization 
support.
+;; We will replace them when len_maskload/len_maskstore is supported in loop 
vectorizer.
+(define_expand "len_load_"
+  [(match_operand:V 0 "register_operand")
+   (match_operand:V 1 "memory_operand")
+   (match_operand 2 "vector_length_operand")
+   (match_operand 3 "const_0_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_nonvlmax_op (code_for_pred_mov (mode), operands[0],
+ operands[1], operands[2], mode);
+  DONE;
+})
+
+(define_expand "len_store_"
+  [(match_operand:V 0 "memory_operand")
+   (match_operand:V 1 "register_operand")
+   (match_operand 2 "vector_length_operand")
+   (match_operand 3 "const_0_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_nonvlmax_op (code_for_pred_mov (mode), operands[0],
+ operands[1], operands[2], mode);
+  DONE;
+})
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5244e8dcbf0..2de9d40be46 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -207,6 +207,7 @@ enum vlen_enum
 bool slide1_sew64_helper (int, machine_mode, machine_mode,
  machine_mode, rtx *);
 rtx gen_avl_for_scalar_move (rtx);
+machine_mode preferred_simd_mode (scalar_mode);
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 99c414cc910..5e69427ac54 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -43,6 +43,7 @@
 #include "optabs.h"
 #include "tm-constrs.h"
 #include "rtx-vector-builder.h"
+#include "targhooks.h"
 
 using namespace riscv_vector;
 
@@ -742,4 +743,56 @@ gen_avl_for_scalar_move (rtx avl)
 }
 }
 
+/* SCALABLE means that the vector-length is agnostic (run-time invariant and
+   compile-time unknown). FIXED meands that the vector-length is specific
+   (compile-time known). Both RVV_SCALABLE and RVV_FIXED_VLMAX are doing
+   auto-vectorization using VLMAX vsetvl configuration.  */
+static bool
+autovec_use_vlmax_p (void)
+{
+  return riscv_autovec_preference == RVV_SCALABLE
+|| riscv_autovec_preference == RVV_FIXED_VLMAX;
+}
+
+/* Return the vectorization machine mode for RVV according to LMUL.  */
+machine_mode
+preferred_simd_mode (scalar_mode mode)
+{
+  /* We only enable auto-vectorization when TARGET_MIN_VLEN >= 128
+ which is -march=rv64gcv. Since GCC loop vectorizer report ICE
+ when we enable -march=rv64gc_zve32* and -march=rv32gc_zve64*.
+ in the 'can_dupl

[PATCH 0/3] RISC-V: Basic enable RVV auto-vectorizaiton

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

PATCH 1: Add compile option for RVV auto-vectorization.
PATCH 2: Enable basic RVV auto-vectorization.
PATCH 3: Add sanity testcases.

*** BLURB HERE ***

Ju-Zhe Zhong (3):
  RISC-V: Add auto-vectorization compile option for RVV
  RISC-V: Enable basic auto-vectorization for RVV
  RISC-V: Add sanity testcases for RVV auto-vectorization

 gcc/config/riscv/autovec.md   |  49 
 gcc/config/riscv/riscv-opts.h |  15 +++
 gcc/config/riscv/riscv-protos.h   |   1 +
 gcc/config/riscv/riscv-v.cc   |  53 +
 gcc/config/riscv/riscv.cc |  24 +++-
 gcc/config/riscv/riscv.opt|  37 ++
 gcc/config/riscv/vector.md|   4 +-
 .../rvv/autovec/partial/single_rgroup-1.c |   8 ++
 .../rvv/autovec/partial/single_rgroup-1.h | 106 ++
 .../rvv/autovec/partial/single_rgroup_run-1.c |  19 
 .../gcc.target/riscv/rvv/autovec/template-1.h |  68 +++
 .../gcc.target/riscv/rvv/autovec/v-1.c|   4 +
 .../gcc.target/riscv/rvv/autovec/v-2.c|   6 +
 .../gcc.target/riscv/rvv/autovec/zve32f-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve32f-2.c   |   5 +
 .../gcc.target/riscv/rvv/autovec/zve32f-3.c   |   6 +
 .../riscv/rvv/autovec/zve32f_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve32f_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve32x-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve32x-2.c   |   6 +
 .../gcc.target/riscv/rvv/autovec/zve32x-3.c   |   6 +
 .../riscv/rvv/autovec/zve32x_zvl128b-1.c  |   5 +
 .../riscv/rvv/autovec/zve32x_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64d-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64d-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64d-3.c   |   6 +
 .../riscv/rvv/autovec/zve64d_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64d_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64f-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64f-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64f-3.c   |   6 +
 .../riscv/rvv/autovec/zve64f_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64f_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64x-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64x-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64x-3.c   |   6 +
 .../riscv/rvv/autovec/zve64x_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64x_zvl128b-2.c  |   6 +
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp|  16 +++
 39 files changed, 532 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/riscv/autovec.md
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup_run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/template-1.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x_zvl

[PATCH 2/3] RISC-V: Enable basic auto-vectorization for RVV

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch enables auto-vectorization accurately according to '-march'
And add len_load/len_store pattern.

For example, for -march=rv32gc_zve32x, we should allow SEW = 64 RVV 
auto-vectorization.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (preferred_simd_mode): Enable basic 
auto-vectorization support.
* config/riscv/riscv-v.cc (autovec_use_vlmax_p): New function.
(preferred_simd_mode): Ditto.
* config/riscv/riscv.cc (riscv_convert_vector_bits): Enable basic 
auto-vectorization support.
(riscv_preferred_simd_mode): New function.
(TARGET_VECTORIZE_PREFERRED_SIMD_MODE): New target hook.
* config/riscv/vector.md: include autovec.md
* config/riscv/autovec.md: New file.

---
 gcc/config/riscv/autovec.md | 49 ++
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv-v.cc | 53 +
 gcc/config/riscv/riscv.cc   | 24 ++-
 gcc/config/riscv/vector.md  |  4 ++-
 5 files changed, 129 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/riscv/autovec.md

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
new file mode 100644
index 000..b5d46ff57ab
--- /dev/null
+++ b/gcc/config/riscv/autovec.md
@@ -0,0 +1,49 @@
+;; Machine description for auto-vectorization using RVV for GNU compiler.
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+;; Contributed by Juzhe Zhong (juzhe.zh...@rivai.ai), RiVAI Technologies Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+;; =
+;; == Loads/Stores
+;; =
+
+;; len_load/len_store is a sub-optimal pattern for RVV auto-vectorization 
support.
+;; We will replace them when len_maskload/len_maskstore is supported in loop 
vectorizer.
+(define_expand "len_load_"
+  [(match_operand:V 0 "register_operand")
+   (match_operand:V 1 "memory_operand")
+   (match_operand 2 "vector_length_operand")
+   (match_operand 3 "const_0_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_nonvlmax_op (code_for_pred_mov (mode), operands[0],
+ operands[1], operands[2], mode);
+  DONE;
+})
+
+(define_expand "len_store_"
+  [(match_operand:V 0 "memory_operand")
+   (match_operand:V 1 "register_operand")
+   (match_operand 2 "vector_length_operand")
+   (match_operand 3 "const_0_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_nonvlmax_op (code_for_pred_mov (mode), operands[0],
+ operands[1], operands[2], mode);
+  DONE;
+})
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5244e8dcbf0..2de9d40be46 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -207,6 +207,7 @@ enum vlen_enum
 bool slide1_sew64_helper (int, machine_mode, machine_mode,
  machine_mode, rtx *);
 rtx gen_avl_for_scalar_move (rtx);
+machine_mode preferred_simd_mode (scalar_mode);
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 99c414cc910..5e69427ac54 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -43,6 +43,7 @@
 #include "optabs.h"
 #include "tm-constrs.h"
 #include "rtx-vector-builder.h"
+#include "targhooks.h"
 
 using namespace riscv_vector;
 
@@ -742,4 +743,56 @@ gen_avl_for_scalar_move (rtx avl)
 }
 }
 
+/* SCALABLE means that the vector-length is agnostic (run-time invariant and
+   compile-time unknown). FIXED meands that the vector-length is specific
+   (compile-time known). Both RVV_SCALABLE and RVV_FIXED_VLMAX are doing
+   auto-vectorization using VLMAX vsetvl configuration.  */
+static bool
+autovec_use_vlmax_p (void)
+{
+  return riscv_autovec_preference == RVV_SCALABLE
+|| riscv_autovec_preference == RVV_FIXED_VLMAX;
+}
+
+/* Return the vectorization machine mode for RVV according to LMUL.  */
+machine_mode
+preferred_simd_mode (scalar_mode mode)
+{
+  /* We only enable auto-vectorization when TARGET_MIN_VLEN >= 128
+ which is -march=rv64gcv. Since GCC loop vectorizer report ICE
+ when we enable -march=rv64gc_zve32* and -march=rv32gc_zve64*.
+ in the 'can_dupl

[PATCH 1/3] RISC-V: Add auto-vectorization compile option for RVV

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch is adding 2 compile option for RVV auto-vectorization.
1. -param=riscv-autovec-preference=
   This option is to specify the auto-vectorization approach for RVV.
   Currently, we only support scalable and fixed-vlmax.

- scalable means VLA auto-vectorization. The vector-length to compiler is
  unknown and runtime invariant. Such approach can allow us compile the code
  run on any vector-length RVV CPU.

- fixed-vlmax means the compile known the RVV CPU vector-length, compile 
option
  in fixed-length VLS auto-vectorization. Meaning if we specify 
vector-length=512.
  The execution file can only run on vector-length = 512 RVV CPU.

- TODO: we may need to support min-length VLS auto-vectorization, means the 
execution
  file can run on larger length RVV CPU.  

gcc/ChangeLog:

* config/riscv/riscv-opts.h (enum riscv_autovec_preference_enum): Add 
enum for auto-vectorization preference.
(enum riscv_autovec_lmul_enum): Add enum for choosing LMUL of RVV 
auto-vectorization.
* config/riscv/riscv.opt: Add compile option for RVV auto-vectorization.

---
 gcc/config/riscv/riscv-opts.h | 15 ++
 gcc/config/riscv/riscv.opt| 37 +++
 2 files changed, 52 insertions(+)

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index cf0cd669be4..4207db240ea 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -67,6 +67,21 @@ enum stack_protector_guard {
   SSP_GLOBAL   /* global canary */
 };
 
+/* RISC-V auto-vectorization preference.  */
+enum riscv_autovec_preference_enum {
+  NO_AUTOVEC,
+  RVV_SCALABLE,
+  RVV_FIXED_VLMAX
+};
+
+/* RISC-V auto-vectorization RVV LMUL.  */
+enum riscv_autovec_lmul_enum {
+  RVV_M1 = 1,
+  RVV_M2 = 2,
+  RVV_M4 = 4,
+  RVV_M8 = 8
+};
+
 #define MASK_ZICSR(1 << 0)
 #define MASK_ZIFENCEI (1 << 1)
 
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index ff1dd4ddd4f..ef1bdfcfe28 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -254,3 +254,40 @@ Enum(isa_spec_class) String(20191213) 
Value(ISA_SPEC_CLASS_20191213)
 misa-spec=
 Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) 
Init(TARGET_DEFAULT_ISA_SPEC)
 Set the version of RISC-V ISA spec.
+
+Enum
+Name(riscv_autovec_preference) Type(enum riscv_autovec_preference_enum)
+The RISC-V auto-vectorization preference:
+
+EnumValue
+Enum(riscv_autovec_preference) String(none) Value(NO_AUTOVEC)
+
+EnumValue
+Enum(riscv_autovec_preference) String(scalable) Value(RVV_SCALABLE)
+
+EnumValue
+Enum(riscv_autovec_preference) String(fixed-vlmax) Value(RVV_FIXED_VLMAX)
+
+-param=riscv-autovec-preference=
+Target RejectNegative Joined Enum(riscv_autovec_preference) 
Var(riscv_autovec_preference) Init(NO_AUTOVEC)
+-param=riscv-autovec-preference=   Set the preference of 
auto-vectorization in the RISC-V port.
+
+Enum
+Name(riscv_autovec_lmul) Type(enum riscv_autovec_lmul_enum)
+The RVV possible LMUL:
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m1) Value(RVV_M1)
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m2) Value(RVV_M2)
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m4) Value(RVV_M4)
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m8) Value(RVV_M8)
+
+-param=riscv-autovec-lmul=
+Target RejectNegative Joined Enum(riscv_autovec_lmul) Var(riscv_autovec_lmul) 
Init(RVV_M1)
+-param=riscv-autovec-lmul= Set the RVV LMUL of auto-vectorization 
in the RISC-V port.
-- 
2.36.3



[PATCH 3/3] RISC-V: Add sanity testcases for RVV auto-vectorization

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch adds sanity tests for basic enabling auto-vectorization.
We should make sure compiler enable auto-vectorization strictly according
to '-march'

For example, '-march=rv32gc_zve32x' can not allow INT64 auto-vectorization.
Since SEW = 64 RVV instructions are illegal instructions in this situation.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/rvv.exp: Add auto-vectorization tests.
* gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.c: New test.
* gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.h: New test.
* gcc.target/riscv/rvv/autovec/partial/single_rgroup_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/template-1.h: New test.
* gcc.target/riscv/rvv/autovec/v-1.c: New test.
* gcc.target/riscv/rvv/autovec/v-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f_zvl128b-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x_zvl128b-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d_zvl128b-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f_zvl128b-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x_zvl128b-2.c: New test.


---
 .../rvv/autovec/partial/single_rgroup-1.c |   8 ++
 .../rvv/autovec/partial/single_rgroup-1.h | 106 ++
 .../rvv/autovec/partial/single_rgroup_run-1.c |  19 
 .../gcc.target/riscv/rvv/autovec/template-1.h |  68 +++
 .../gcc.target/riscv/rvv/autovec/v-1.c|   4 +
 .../gcc.target/riscv/rvv/autovec/v-2.c|   6 +
 .../gcc.target/riscv/rvv/autovec/zve32f-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve32f-2.c   |   5 +
 .../gcc.target/riscv/rvv/autovec/zve32f-3.c   |   6 +
 .../riscv/rvv/autovec/zve32f_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve32f_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve32x-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve32x-2.c   |   6 +
 .../gcc.target/riscv/rvv/autovec/zve32x-3.c   |   6 +
 .../riscv/rvv/autovec/zve32x_zvl128b-1.c  |   5 +
 .../riscv/rvv/autovec/zve32x_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64d-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64d-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64d-3.c   |   6 +
 .../riscv/rvv/autovec/zve64d_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64d_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64f-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64f-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64f-3.c   |   6 +
 .../riscv/rvv/autovec/zve64f_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64f_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64x-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64x-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64x-3.c   |   6 +
 .../riscv/rvv/autovec/zve64x_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64x_zvl128b-2.c  |   6 +
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp|  16 +++
 32 files changed, 351 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup_run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/template-1.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-3.c
 create mode 100

[PATCH 3/3] RISC-V: Add sanity testcases for RVV auto-vectorization

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch adds sanity tests for basic enabling auto-vectorization.
We should make sure compiler enable auto-vectorization strictly according
to '-march'

For example, '-march=rv32gc_zve32x' can not allow INT64 auto-vectorization.
Since SEW = 64 RVV instructions are illegal instructions in this situation.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/rvv.exp: Add auto-vectorization tests.
* gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.c: New test.
* gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.h: New test.
* gcc.target/riscv/rvv/autovec/partial/single_rgroup_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/template-1.h: New test.
* gcc.target/riscv/rvv/autovec/v-1.c: New test.
* gcc.target/riscv/rvv/autovec/v-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f_zvl128b-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x_zvl128b-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d_zvl128b-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f_zvl128b-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x_zvl128b-2.c: New test.


---
 .../rvv/autovec/partial/single_rgroup-1.c |   8 ++
 .../rvv/autovec/partial/single_rgroup-1.h | 106 ++
 .../rvv/autovec/partial/single_rgroup_run-1.c |  19 
 .../gcc.target/riscv/rvv/autovec/template-1.h |  68 +++
 .../gcc.target/riscv/rvv/autovec/v-1.c|   4 +
 .../gcc.target/riscv/rvv/autovec/v-2.c|   6 +
 .../gcc.target/riscv/rvv/autovec/zve32f-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve32f-2.c   |   5 +
 .../gcc.target/riscv/rvv/autovec/zve32f-3.c   |   6 +
 .../riscv/rvv/autovec/zve32f_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve32f_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve32x-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve32x-2.c   |   6 +
 .../gcc.target/riscv/rvv/autovec/zve32x-3.c   |   6 +
 .../riscv/rvv/autovec/zve32x_zvl128b-1.c  |   5 +
 .../riscv/rvv/autovec/zve32x_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64d-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64d-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64d-3.c   |   6 +
 .../riscv/rvv/autovec/zve64d_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64d_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64f-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64f-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64f-3.c   |   6 +
 .../riscv/rvv/autovec/zve64f_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64f_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64x-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64x-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64x-3.c   |   6 +
 .../riscv/rvv/autovec/zve64x_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64x_zvl128b-2.c  |   6 +
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp|  16 +++
 32 files changed, 351 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup_run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/template-1.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-3.c
 create mode 100

[C PATCH - backport 12] Fix ICE related to implicit access attributes for VLA arguments [PR105660]

2023-04-19 Thread Martin Uecker via Gcc-patches


Ok to cherrypick for 12?   It is in GCC 13 and fixes an
annoying ICE.

Martin



Here is a fix for PR105660.

Bootstrapped and regression tested on x86-64.


Fix ICE related to implicit access attributes for VLA arguments [PR105660]

When constructing the specifier string when merging an access attribute
that encodes information about VLA arguments, the string was constructed
in random order by iterating through a hash table. Fix this by iterating
though the list of arguments.

PR c/105660

gcc/Changelog
* c-family/c-attribs.cc (append_access_attr): Use order
of arguments when construction string.
(append_access_attr_idxs): Rename and make static.
* c-familty/c-warn.cc (warn_parm_array_mismatch): Add assertion.

gcc/testsuite/ChangeLog:
* gcc.dg/pr105660-1.c: New test.
* gcc.dg/pr105660-2.c: New test.


diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index 4667f6de311..072cfb69147 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -4728,22 +4728,27 @@ append_access_attr (tree node[3], tree attrs, const 
char *attrstr,
   rdwr_map cur_idxs;
   init_attr_rdwr_indices (&cur_idxs, attrs);
 
+  tree args = TYPE_ARG_TYPES (node[0]);
+  int argpos = 0;
   std::string spec;
-  for (auto it = new_idxs.begin (); it != new_idxs.end (); ++it)
+  for (tree arg = args; arg; arg = TREE_CHAIN (arg), argpos++)
 {
-  const auto &newaxsref = *it;
+  const attr_access* const newa = new_idxs.get (argpos);
+
+  if (!newa)
+   continue;
 
   /* The map has two equal entries for each pointer argument that
 has an associated size argument.  Process just the entry for
 the former.  */
-  if ((unsigned)newaxsref.first != newaxsref.second.ptrarg)
+  if ((unsigned)argpos != newa->ptrarg)
continue;
 
-  const attr_access* const cura = cur_idxs.get (newaxsref.first);
+  const attr_access* const cura = cur_idxs.get (argpos);
   if (!cura)
{
  /* The new attribute needs to be added.  */
- tree str = newaxsref.second.to_internal_string ();
+ tree str = newa->to_internal_string ();
  spec += TREE_STRING_POINTER (str);
  continue;
}
@@ -4751,7 +4756,6 @@ append_access_attr (tree node[3], tree attrs, const char 
*attrstr,
   /* The new access spec refers to an array/pointer argument for
 which an access spec already exists.  Check and diagnose any
 conflicts.  If no conflicts are found, merge the two.  */
-  const attr_access* const newa = &newaxsref.second;
 
   if (!attrstr)
{
@@ -4886,7 +4890,7 @@ append_access_attr (tree node[3], tree attrs, const char 
*attrstr,
continue;
 
   /* Merge the CURA and NEWA.  */
-  attr_access merged = newaxsref.second;
+  attr_access merged = *newa;
 
   /* VLA seen in a declaration takes precedence.  */
   if (cura->minsize == HOST_WIDE_INT_M1U)
@@ -4912,9 +4916,9 @@ append_access_attr (tree node[3], tree attrs, const char 
*attrstr,
 
 /* Convenience wrapper for the above.  */
 
-tree
-append_access_attr (tree node[3], tree attrs, const char *attrstr,
-   char code, HOST_WIDE_INT idxs[2])
+static tree
+append_access_attr_idxs (tree node[3], tree attrs, const char *attrstr,
+char code, HOST_WIDE_INT idxs[2])
 {
   char attrspec[80];
   int n = sprintf (attrspec, "%c%u", code, (unsigned) idxs[0] - 1);
@@ -5204,7 +5208,7 @@ handle_access_attribute (tree node[3], tree name, tree 
args, int flags,
  attributes specified on previous declarations of the same type
  and if not, concatenate the two.  */
   const char code = attr_access::mode_chars[mode];
-  tree new_attrs = append_access_attr (node, attrs, attrstr, code, idxs);
+  tree new_attrs = append_access_attr_idxs (node, attrs, attrstr, code, idxs);
   if (!new_attrs)
 return NULL_TREE;
 
@@ -5217,7 +5221,7 @@ handle_access_attribute (tree node[3], tree name, tree 
args, int flags,
 {
   /* Repeat for the previously declared type.  */
   attrs = TYPE_ATTRIBUTES (TREE_TYPE (node[1]));
-  new_attrs = append_access_attr (node, attrs, attrstr, code, idxs);
+  new_attrs = append_access_attr_idxs (node, attrs, attrstr, code, idxs);
   if (!new_attrs)
return NULL_TREE;
 
diff --git a/gcc/c-family/c-warn.cc b/gcc/c-family/c-warn.cc
index 29efce3f2c0..a6fb95b1e80 100644
--- a/gcc/c-family/c-warn.cc
+++ b/gcc/c-family/c-warn.cc
@@ -3617,6 +3617,8 @@ warn_parm_array_mismatch (location_t origloc, tree 
fndecl, tree newparms)
   for (tree newvbl = newa->size, curvbl = cura->size; newvbl;
   newvbl = TREE_CHAIN (newvbl), curvbl = TREE_CHAIN (curvbl))
{
+ gcc_assert (curvbl);
+
  tree newpos = TREE_PURPOSE (newvbl);
  tree curpos = TREE_PURPOSE (curvbl);
 
diff --git a/gcc/testsuite/gcc.dg/pr105660-1.

[PATCH 0/3 V2] RISC-V: Basic enable RVV auto-vectorizaiton

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

PATCH 1: Add compile option for RVV auto-vectorization.
PATCH 2: Enable basic RVV auto-vectorization.
PATCH 3: Add sanity testcases.

*** BLURB HERE ***

Ju-Zhe Zhong (3):
  RISC-V: Add auto-vectorization compile option for RVV
  RISC-V: Enable basic auto-vectorization for RVV
  RISC-V: Add sanity testcases for RVV auto-vectorization

 gcc/config/riscv/autovec.md   |  49 
 gcc/config/riscv/riscv-opts.h |  15 +++
 gcc/config/riscv/riscv-protos.h   |   1 +
 gcc/config/riscv/riscv-v.cc   |  53 +
 gcc/config/riscv/riscv.cc |  24 +++-
 gcc/config/riscv/riscv.opt|  37 ++
 gcc/config/riscv/vector.md|   4 +-
 .../rvv/autovec/partial/single_rgroup-1.c |   8 ++
 .../rvv/autovec/partial/single_rgroup-1.h | 106 ++
 .../rvv/autovec/partial/single_rgroup_run-1.c |  19 
 .../gcc.target/riscv/rvv/autovec/template-1.h |  68 +++
 .../gcc.target/riscv/rvv/autovec/v-1.c|   4 +
 .../gcc.target/riscv/rvv/autovec/v-2.c|   6 +
 .../gcc.target/riscv/rvv/autovec/zve32f-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve32f-2.c   |   5 +
 .../gcc.target/riscv/rvv/autovec/zve32f-3.c   |   6 +
 .../riscv/rvv/autovec/zve32f_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve32f_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve32x-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve32x-2.c   |   6 +
 .../gcc.target/riscv/rvv/autovec/zve32x-3.c   |   6 +
 .../riscv/rvv/autovec/zve32x_zvl128b-1.c  |   5 +
 .../riscv/rvv/autovec/zve32x_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64d-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64d-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64d-3.c   |   6 +
 .../riscv/rvv/autovec/zve64d_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64d_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64f-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64f-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64f-3.c   |   6 +
 .../riscv/rvv/autovec/zve64f_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64f_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64x-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64x-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64x-3.c   |   6 +
 .../riscv/rvv/autovec/zve64x_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64x_zvl128b-2.c  |   6 +
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp|  16 +++
 39 files changed, 532 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/riscv/autovec.md
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup_run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/template-1.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x_zvl

[PATCH 2/3 V2] RISC-V: Enable basic auto-vectorization for RVV

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch enables auto-vectorization accurately according to '-march'
And add len_load/len_store pattern.

For example, for -march=rv32gc_zve32x, we should allow SEW = 64 RVV 
auto-vectorization.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (preferred_simd_mode): Enable basic 
auto-vectorization support.
* config/riscv/riscv-v.cc (autovec_use_vlmax_p): New function.
(preferred_simd_mode): Ditto.
* config/riscv/riscv.cc (riscv_convert_vector_bits): Enable basic 
auto-vectorization support.
(riscv_preferred_simd_mode): New function.
(TARGET_VECTORIZE_PREFERRED_SIMD_MODE): New target hook.
* config/riscv/vector.md: include autovec.md
* config/riscv/autovec.md: New file.

---
 gcc/config/riscv/autovec.md | 49 ++
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv-v.cc | 53 +
 gcc/config/riscv/riscv.cc   | 24 ++-
 gcc/config/riscv/vector.md  |  4 ++-
 5 files changed, 129 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/riscv/autovec.md

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
new file mode 100644
index 000..b5d46ff57ab
--- /dev/null
+++ b/gcc/config/riscv/autovec.md
@@ -0,0 +1,49 @@
+;; Machine description for auto-vectorization using RVV for GNU compiler.
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+;; Contributed by Juzhe Zhong (juzhe.zh...@rivai.ai), RiVAI Technologies Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+;; =
+;; == Loads/Stores
+;; =
+
+;; len_load/len_store is a sub-optimal pattern for RVV auto-vectorization 
support.
+;; We will replace them when len_maskload/len_maskstore is supported in loop 
vectorizer.
+(define_expand "len_load_"
+  [(match_operand:V 0 "register_operand")
+   (match_operand:V 1 "memory_operand")
+   (match_operand 2 "vector_length_operand")
+   (match_operand 3 "const_0_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_nonvlmax_op (code_for_pred_mov (mode), operands[0],
+ operands[1], operands[2], mode);
+  DONE;
+})
+
+(define_expand "len_store_"
+  [(match_operand:V 0 "memory_operand")
+   (match_operand:V 1 "register_operand")
+   (match_operand 2 "vector_length_operand")
+   (match_operand 3 "const_0_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_nonvlmax_op (code_for_pred_mov (mode), operands[0],
+ operands[1], operands[2], mode);
+  DONE;
+})
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5244e8dcbf0..2de9d40be46 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -207,6 +207,7 @@ enum vlen_enum
 bool slide1_sew64_helper (int, machine_mode, machine_mode,
  machine_mode, rtx *);
 rtx gen_avl_for_scalar_move (rtx);
+machine_mode preferred_simd_mode (scalar_mode);
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 99c414cc910..5e69427ac54 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -43,6 +43,7 @@
 #include "optabs.h"
 #include "tm-constrs.h"
 #include "rtx-vector-builder.h"
+#include "targhooks.h"
 
 using namespace riscv_vector;
 
@@ -742,4 +743,56 @@ gen_avl_for_scalar_move (rtx avl)
 }
 }
 
+/* SCALABLE means that the vector-length is agnostic (run-time invariant and
+   compile-time unknown). FIXED meands that the vector-length is specific
+   (compile-time known). Both RVV_SCALABLE and RVV_FIXED_VLMAX are doing
+   auto-vectorization using VLMAX vsetvl configuration.  */
+static bool
+autovec_use_vlmax_p (void)
+{
+  return riscv_autovec_preference == RVV_SCALABLE
+|| riscv_autovec_preference == RVV_FIXED_VLMAX;
+}
+
+/* Return the vectorization machine mode for RVV according to LMUL.  */
+machine_mode
+preferred_simd_mode (scalar_mode mode)
+{
+  /* We only enable auto-vectorization when TARGET_MIN_VLEN >= 128
+ which is -march=rv64gcv. Since GCC loop vectorizer report ICE
+ when we enable -march=rv64gc_zve32* and -march=rv32gc_zve64*.
+ in the 'can_dupl

[PATCH 1/3 V2] RISC-V: Add auto-vectorization compile option for RVV

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch is adding 2 compile option for RVV auto-vectorization.
1. -param=riscv-autovec-preference=
   This option is to specify the auto-vectorization approach for RVV.
   Currently, we only support scalable and fixed-vlmax.

- scalable means VLA auto-vectorization. The vector-length to compiler is
  unknown and runtime invariant. Such approach can allow us compile the code
  run on any vector-length RVV CPU.

- fixed-vlmax means the compile known the RVV CPU vector-length, compile 
option
  in fixed-length VLS auto-vectorization. Meaning if we specify 
vector-length=512.
  The execution file can only run on vector-length = 512 RVV CPU.

- TODO: we may need to support min-length VLS auto-vectorization, means the 
execution
  file can run on larger length RVV CPU.  
2. -param=riscv-autovec-lmul=
   Specify LMUL choosing for RVV auto-vectorization. 

gcc/ChangeLog:

* config/riscv/riscv-opts.h (enum riscv_autovec_preference_enum): Add 
enum for auto-vectorization preference.
(enum riscv_autovec_lmul_enum): Add enum for choosing LMUL of RVV 
auto-vectorization.
* config/riscv/riscv.opt: Add compile option for RVV auto-vectorization.

---
 gcc/config/riscv/riscv-opts.h | 15 ++
 gcc/config/riscv/riscv.opt| 37 +++
 2 files changed, 52 insertions(+)

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index cf0cd669be4..4207db240ea 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -67,6 +67,21 @@ enum stack_protector_guard {
   SSP_GLOBAL   /* global canary */
 };
 
+/* RISC-V auto-vectorization preference.  */
+enum riscv_autovec_preference_enum {
+  NO_AUTOVEC,
+  RVV_SCALABLE,
+  RVV_FIXED_VLMAX
+};
+
+/* RISC-V auto-vectorization RVV LMUL.  */
+enum riscv_autovec_lmul_enum {
+  RVV_M1 = 1,
+  RVV_M2 = 2,
+  RVV_M4 = 4,
+  RVV_M8 = 8
+};
+
 #define MASK_ZICSR(1 << 0)
 #define MASK_ZIFENCEI (1 << 1)
 
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index ff1dd4ddd4f..ef1bdfcfe28 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -254,3 +254,40 @@ Enum(isa_spec_class) String(20191213) 
Value(ISA_SPEC_CLASS_20191213)
 misa-spec=
 Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) 
Init(TARGET_DEFAULT_ISA_SPEC)
 Set the version of RISC-V ISA spec.
+
+Enum
+Name(riscv_autovec_preference) Type(enum riscv_autovec_preference_enum)
+The RISC-V auto-vectorization preference:
+
+EnumValue
+Enum(riscv_autovec_preference) String(none) Value(NO_AUTOVEC)
+
+EnumValue
+Enum(riscv_autovec_preference) String(scalable) Value(RVV_SCALABLE)
+
+EnumValue
+Enum(riscv_autovec_preference) String(fixed-vlmax) Value(RVV_FIXED_VLMAX)
+
+-param=riscv-autovec-preference=
+Target RejectNegative Joined Enum(riscv_autovec_preference) 
Var(riscv_autovec_preference) Init(NO_AUTOVEC)
+-param=riscv-autovec-preference=   Set the preference of 
auto-vectorization in the RISC-V port.
+
+Enum
+Name(riscv_autovec_lmul) Type(enum riscv_autovec_lmul_enum)
+The RVV possible LMUL:
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m1) Value(RVV_M1)
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m2) Value(RVV_M2)
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m4) Value(RVV_M4)
+
+EnumValue
+Enum(riscv_autovec_lmul) String(m8) Value(RVV_M8)
+
+-param=riscv-autovec-lmul=
+Target RejectNegative Joined Enum(riscv_autovec_lmul) Var(riscv_autovec_lmul) 
Init(RVV_M1)
+-param=riscv-autovec-lmul= Set the RVV LMUL of auto-vectorization 
in the RISC-V port.
-- 
2.36.3



[PATCH 3/3 V2] RISC-V: Add sanity testcases for RVV auto-vectorization

2023-04-19 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch adds sanity tests for basic enabling auto-vectorization.
We should make sure compiler enable auto-vectorization strictly according
to '-march'

For example, '-march=rv32gc_zve32x' can not allow INT64 auto-vectorization.
Since SEW = 64 RVV instructions are illegal instructions in this situation.

Also, testing auto-vectoriztion for all combinations of LMUL = 1/2/4/8
gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/rvv.exp: Add auto-vectorization tests.
* gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.c: New test.
* gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.h: New test.
* gcc.target/riscv/rvv/autovec/partial/single_rgroup_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/template-1.h: New test.
* gcc.target/riscv/rvv/autovec/v-1.c: New test.
* gcc.target/riscv/rvv/autovec/v-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve32f_zvl128b-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve32x_zvl128b-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64d_zvl128b-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64f_zvl128b-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x-2.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x-3.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c: New test.
* gcc.target/riscv/rvv/autovec/zve64x_zvl128b-2.c: New test.


---
 .../rvv/autovec/partial/single_rgroup-1.c |   8 ++
 .../rvv/autovec/partial/single_rgroup-1.h | 106 ++
 .../rvv/autovec/partial/single_rgroup_run-1.c |  19 
 .../gcc.target/riscv/rvv/autovec/template-1.h |  68 +++
 .../gcc.target/riscv/rvv/autovec/v-1.c|   4 +
 .../gcc.target/riscv/rvv/autovec/v-2.c|   6 +
 .../gcc.target/riscv/rvv/autovec/zve32f-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve32f-2.c   |   5 +
 .../gcc.target/riscv/rvv/autovec/zve32f-3.c   |   6 +
 .../riscv/rvv/autovec/zve32f_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve32f_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve32x-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve32x-2.c   |   6 +
 .../gcc.target/riscv/rvv/autovec/zve32x-3.c   |   6 +
 .../riscv/rvv/autovec/zve32x_zvl128b-1.c  |   5 +
 .../riscv/rvv/autovec/zve32x_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64d-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64d-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64d-3.c   |   6 +
 .../riscv/rvv/autovec/zve64d_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64d_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64f-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64f-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64f-3.c   |   6 +
 .../riscv/rvv/autovec/zve64f_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64f_zvl128b-2.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/zve64x-1.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64x-2.c   |   4 +
 .../gcc.target/riscv/rvv/autovec/zve64x-3.c   |   6 +
 .../riscv/rvv/autovec/zve64x_zvl128b-1.c  |   4 +
 .../riscv/rvv/autovec/zve64x_zvl128b-2.c  |   6 +
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp|  16 +++
 32 files changed, 351 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup_run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/template-1.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-2.c
 create mode 100644

Re: [PATCH] [i386] Support type _Float16/__bf16 independent of SSE2.

2023-04-19 Thread Mike Stump via Gcc-patches
LLM, machine learning and AI likes coding with data types that are weird, 
float16, bf16, 8 bit float and 4 bit floats. Longer term, would be nice to 
natively support these everywhere. Would be nice to trial run them in the 
compiler, sort it all out, so that the implementation experience can drive 
language adoption. A little speculative and a little narrow focus for the 
field, but, AI isn't going away in the next 20 years I don't think. Anyway, I 
like the direction.

On Apr 19, 2023, at 12:15 AM, liuhongt via Gcc-patches 
 wrote:
> That said, these fundamental types whose presence/absence depends on ISA flags
> are quite problematic IMHO, as they are incompatible with the target
> attribute/pragmas. Whether they are available or not available depends on
> whether in this case SSE2 is enabled during compiler initialization (aka after
> parsing command line options) and then they are available or unavailable to
> everything else based on that.
> -comments end--
> 
> Enable _Float16 and __bf16 all the time but issue errors when the


Re: [PATCH 0/3] RISC-V: Basic enable RVV auto-vectorizaiton

2023-04-19 Thread 钟居哲
Sorry for sending messy patches.
Ignore those messy patches and these following patches are the real patches:
https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616222.html 
https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616225.html 
https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616223.html 
https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616224.html 

Thanks.


juzhe.zh...@rivai.ai
 
From: juzhe.zhong
Date: 2023-04-20 00:36
To: gcc-patches
CC: kito.cheng; palmer; jeffreyalaw; Ju-Zhe Zhong
Subject: [PATCH 0/3] RISC-V: Basic enable RVV auto-vectorizaiton
From: Ju-Zhe Zhong 
 
PATCH 1: Add compile option for RVV auto-vectorization.
PATCH 2: Enable basic RVV auto-vectorization.
PATCH 3: Add sanity testcases.
 
*** BLURB HERE ***
 
Ju-Zhe Zhong (3):
  RISC-V: Add auto-vectorization compile option for RVV
  RISC-V: Enable basic auto-vectorization for RVV
  RISC-V: Add sanity testcases for RVV auto-vectorization
 
gcc/config/riscv/autovec.md   |  49 
gcc/config/riscv/riscv-opts.h |  15 +++
gcc/config/riscv/riscv-protos.h   |   1 +
gcc/config/riscv/riscv-v.cc   |  53 +
gcc/config/riscv/riscv.cc |  24 +++-
gcc/config/riscv/riscv.opt|  37 ++
gcc/config/riscv/vector.md|   4 +-
.../rvv/autovec/partial/single_rgroup-1.c |   8 ++
.../rvv/autovec/partial/single_rgroup-1.h | 106 ++
.../rvv/autovec/partial/single_rgroup_run-1.c |  19 
.../gcc.target/riscv/rvv/autovec/template-1.h |  68 +++
.../gcc.target/riscv/rvv/autovec/v-1.c|   4 +
.../gcc.target/riscv/rvv/autovec/v-2.c|   6 +
.../gcc.target/riscv/rvv/autovec/zve32f-1.c   |   4 +
.../gcc.target/riscv/rvv/autovec/zve32f-2.c   |   5 +
.../gcc.target/riscv/rvv/autovec/zve32f-3.c   |   6 +
.../riscv/rvv/autovec/zve32f_zvl128b-1.c  |   4 +
.../riscv/rvv/autovec/zve32f_zvl128b-2.c  |   6 +
.../gcc.target/riscv/rvv/autovec/zve32x-1.c   |   4 +
.../gcc.target/riscv/rvv/autovec/zve32x-2.c   |   6 +
.../gcc.target/riscv/rvv/autovec/zve32x-3.c   |   6 +
.../riscv/rvv/autovec/zve32x_zvl128b-1.c  |   5 +
.../riscv/rvv/autovec/zve32x_zvl128b-2.c  |   6 +
.../gcc.target/riscv/rvv/autovec/zve64d-1.c   |   4 +
.../gcc.target/riscv/rvv/autovec/zve64d-2.c   |   4 +
.../gcc.target/riscv/rvv/autovec/zve64d-3.c   |   6 +
.../riscv/rvv/autovec/zve64d_zvl128b-1.c  |   4 +
.../riscv/rvv/autovec/zve64d_zvl128b-2.c  |   6 +
.../gcc.target/riscv/rvv/autovec/zve64f-1.c   |   4 +
.../gcc.target/riscv/rvv/autovec/zve64f-2.c   |   4 +
.../gcc.target/riscv/rvv/autovec/zve64f-3.c   |   6 +
.../riscv/rvv/autovec/zve64f_zvl128b-1.c  |   4 +
.../riscv/rvv/autovec/zve64f_zvl128b-2.c  |   6 +
.../gcc.target/riscv/rvv/autovec/zve64x-1.c   |   4 +
.../gcc.target/riscv/rvv/autovec/zve64x-2.c   |   4 +
.../gcc.target/riscv/rvv/autovec/zve64x-3.c   |   6 +
.../riscv/rvv/autovec/zve64x_zvl128b-1.c  |   4 +
.../riscv/rvv/autovec/zve64x_zvl128b-2.c  |   6 +
gcc/testsuite/gcc.target/riscv/rvv/rvv.exp|  16 +++
39 files changed, 532 insertions(+), 2 deletions(-)
create mode 100644 gcc/config/riscv/autovec.md
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup-1.h
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/single_rgroup_run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/template-1.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/v-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-2.c
create mode 100644 gcc

Re: [13 PATCH RFA] c++: fix 'unsigned __int128_t' semantics [PR108099]

2023-04-19 Thread Jason Merrill via Gcc-patches

On 4/19/23 11:26, Jakub Jelinek wrote:

On Wed, Apr 19, 2023 at 11:20:09AM -0400, Jason Merrill wrote:

When I was backporting the earlier 108099 patch I finally saw your comments on
the PR about the meaning of this pattern with the patch being wrong (and a
regression from 11).  This fixes that regression; fixing the broader issues can
wait.

Tested x86_64-pc-linux-gnu, OK for 13.1 or wait for 13.2?


I'd wait for 13.2.  We've been burned with trying to rush stuff out at the
last minute once this week already ;)


Fair, though this is much more straightforward than that issue.

I might revert the previous patch in that case, though; a wrong-code 
regression seems worse than an ICE.



-- 8< --
My earlier patch for 108099 made us accept this non-standard pattern but
messed up the semantics, so that e.g. unsigned __int128_t was not a 128-bit
type.

PR c++/108099

gcc/cp/ChangeLog:

* decl.cc (grokdeclarator): Keep typedef_decl for __int128_t.

gcc/testsuite/ChangeLog:

* g++.dg/ext/int128-8.C: New test.


Jakub





Re: [PATCH] c++: bad ggc_free in try_class_unification [PR109556]

2023-04-19 Thread Jason Merrill via Gcc-patches

On 4/19/23 12:05, Patrick Palka wrote:

On Wed, 19 Apr 2023, Patrick Palka wrote:


Aside from correcting how try_class_unification copies multi-dimensional
'targs', r13-377-g3e948d645bc908 also made it ggc_free this copy as an
optimization.  But this is potentially wrong since the call to unify
within might've captured the args in persistent memory such as the
satisfaction cache (during constrained auto deduction).

Bootstrapped and regtested on x86_64-pc-linux, does this look OK for
trunk/13?


OK.


No testcase yet since the reduction is still in progress.
The plan would be to push this with a reduced testcase, but I figured
I'd send the actual fix for review now.  Would this be OK for 13.1 or
shall it wait until 13.2?


Jakub's call, but this regression seems like a blocker to me.


Now with a reduced testcase:

-- >8 --

Subject: [PATCH] c++: bad ggc_free in try_class_unification [PR109556]

Aside from correcting how try_class_unification copies multi-dimensional
'targs', r13-377-g3e948d645bc908 also made it ggc_free this copy as an
optimization.  But this is potentially wrong since the call to unify
within might've captured the args in persistent memory such as the
satisfaction cache (during constrained auto deduction).

gcc/cp/ChangeLog:

* pt.cc (try_class_unification): Don't ggc_free the copy of
'targs'.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-placeholder13.C: New test.
---
  gcc/cp/pt.cc |  5 -
  .../g++.dg/cpp2a/concepts-placeholder13.C| 16 
  2 files changed, 16 insertions(+), 5 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index e065ace5c55..68a056acf8b 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -23895,11 +23895,6 @@ try_class_unification (tree tparms, tree targs, tree 
parm, tree arg,
  err = unify (tparms, targs, CLASSTYPE_TI_ARGS (parm),
 CLASSTYPE_TI_ARGS (arg), UNIFY_ALLOW_NONE, explain_p);
  
-  if (TMPL_ARGS_HAVE_MULTIPLE_LEVELS (targs))

-for (tree level : tree_vec_range (targs))
-  ggc_free (level);
-  ggc_free (targs);
-
return err ? NULL_TREE : arg;
  }
  
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C b/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C

new file mode 100644
index 000..fd4a05c05e1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C
@@ -0,0 +1,16 @@
+// PR c++/109556
+// { dg-do compile { target c++20 } }
+
+template
+concept C = (N != 0);
+
+template
+struct A { };
+
+template auto M>
+void f(A);
+
+int main() {
+  f(A<1, 42>{});
+  f(A<2, 42>{});
+}




Re: [13 PATCH RFA] c++: fix 'unsigned __int128_t' semantics [PR108099]

2023-04-19 Thread Jakub Jelinek via Gcc-patches
On Wed, Apr 19, 2023 at 12:48:42PM -0400, Jason Merrill wrote:
> On 4/19/23 11:26, Jakub Jelinek wrote:
> > On Wed, Apr 19, 2023 at 11:20:09AM -0400, Jason Merrill wrote:
> > > When I was backporting the earlier 108099 patch I finally saw your 
> > > comments on
> > > the PR about the meaning of this pattern with the patch being wrong (and a
> > > regression from 11).  This fixes that regression; fixing the broader 
> > > issues can
> > > wait.
> > > 
> > > Tested x86_64-pc-linux-gnu, OK for 13.1 or wait for 13.2?
> > 
> > I'd wait for 13.2.  We've been burned with trying to rush stuff out at the
> > last minute once this week already ;)
> 
> Fair, though this is much more straightforward than that issue.
> 
> I might revert the previous patch in that case, though; a wrong-code
> regression seems worse than an ICE.

It is wrong code on invalid source that we happen to tollerate, we don't
even know if it is from some real-world code or just some bad reduction.
And I believe in that area other cases just do something that user wouldn't
expect, so I wouldn't worry much about this particular PR for 13.1.

Jakub



Re: [PATCH] c++: bad ggc_free in try_class_unification [PR109556]

2023-04-19 Thread Jakub Jelinek via Gcc-patches
On Wed, Apr 19, 2023 at 12:52:50PM -0400, Jason Merrill wrote:
> On 4/19/23 12:05, Patrick Palka wrote:
> > On Wed, 19 Apr 2023, Patrick Palka wrote:
> > 
> > > Aside from correcting how try_class_unification copies multi-dimensional
> > > 'targs', r13-377-g3e948d645bc908 also made it ggc_free this copy as an
> > > optimization.  But this is potentially wrong since the call to unify
> > > within might've captured the args in persistent memory such as the
> > > satisfaction cache (during constrained auto deduction).
> > > 
> > > Bootstrapped and regtested on x86_64-pc-linux, does this look OK for
> > > trunk/13?
> 
> OK.
> 
> > > No testcase yet since the reduction is still in progress.
> > > The plan would be to push this with a reduced testcase, but I figured
> > > I'd send the actual fix for review now.  Would this be OK for 13.1 or
> > > shall it wait until 13.2?
> 
> Jakub's call, but this regression seems like a blocker to me.

Not doing ggc_free shouldn't really break stuff except increase memory
consumption, so I think this is ok for 13.1.

> > Now with a reduced testcase:
> > 
> > -- >8 --
> > 
> > Subject: [PATCH] c++: bad ggc_free in try_class_unification [PR109556]
> > 
> > Aside from correcting how try_class_unification copies multi-dimensional
> > 'targs', r13-377-g3e948d645bc908 also made it ggc_free this copy as an
> > optimization.  But this is potentially wrong since the call to unify
> > within might've captured the args in persistent memory such as the
> > satisfaction cache (during constrained auto deduction).
> > 
> > gcc/cp/ChangeLog:
> > 
> > * pt.cc (try_class_unification): Don't ggc_free the copy of
> > 'targs'.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp2a/concepts-placeholder13.C: New test.
> > ---
> >   gcc/cp/pt.cc |  5 -
> >   .../g++.dg/cpp2a/concepts-placeholder13.C| 16 
> >   2 files changed, 16 insertions(+), 5 deletions(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C
> > 
> > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> > index e065ace5c55..68a056acf8b 100644
> > --- a/gcc/cp/pt.cc
> > +++ b/gcc/cp/pt.cc
> > @@ -23895,11 +23895,6 @@ try_class_unification (tree tparms, tree targs, 
> > tree parm, tree arg,
> >   err = unify (tparms, targs, CLASSTYPE_TI_ARGS (parm),
> >  CLASSTYPE_TI_ARGS (arg), UNIFY_ALLOW_NONE, explain_p);
> > -  if (TMPL_ARGS_HAVE_MULTIPLE_LEVELS (targs))
> > -for (tree level : tree_vec_range (targs))
> > -  ggc_free (level);
> > -  ggc_free (targs);
> > -
> > return err ? NULL_TREE : arg;
> >   }
> > diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C 
> > b/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C
> > new file mode 100644
> > index 000..fd4a05c05e1
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C
> > @@ -0,0 +1,16 @@
> > +// PR c++/109556
> > +// { dg-do compile { target c++20 } }
> > +
> > +template
> > +concept C = (N != 0);
> > +
> > +template
> > +struct A { };
> > +
> > +template auto M>
> > +void f(A);
> > +
> > +int main() {
> > +  f(A<1, 42>{});
> > +  f(A<2, 42>{});
> > +}

Jakub



Re: [PATCH v3] doc: Document order of define_peephole2 scanning

2023-04-19 Thread Hans-Peter Nilsson via Gcc-patches
> From: Hans-Peter Nilsson 
> Date: Wed, 19 Apr 2023 06:06:27 +0200
> 
> Patch retracted, at least temporarily.  My "understanding"
> may be clouded by looking at an actual bug.  Sigh.

Mea culpa.  I was looking at the result of one
define_peephole2 and thinking it was due to another, and
also tricked by incorrect code comments (patch posted, will
commit).

TL;DR: Matching indeed does resume with attempting to match
the *first* define_peephole2 replacement insn.  But the
match-and-replacement order is largely undocumented.

Anyway, the missing-context problem I ran into remains: if
you have an insn sequence {foo bar} and a define_peephole2
matching and replacing {bar} into {baz}, the resulting {foo
baz} *will not be matched* against a define_peephole2
looking for {foo baz}.  But, I'm not trying to document this
caveat specifically, though at least it'll now be implied by
the documentation.

This could be fixed by always backing up MAX_INSNS_PER_PEEP2
- 1 insns after a successful replacement.  I'm somewhat
worries that this would also mean lots of futile re-match
attempts.  Thoughts?

(I could also just restart at the BB start, but I see all
this support for backing-up live info by single insns being
used.  Taking notes about a partial match for the first insn
of a failed attempt, as the maximum need to back-up to,
doesn't look like it'd fly, judging from the nonspecific
looking (set dest src) patterns being the first in i386
define_peephole2's match sequences.)

So again: Approvers: pdf output reviewed.  Ok to commit?
-- >8 --
I was a bit surprised when my newly-added define_peephole2 didn't
match, but it was because it was expected to partially match the
generated output of a previous define_peephole2, which matched and
modified the last insn of a sequence to be matched.  I had assumed
that the algorithm backed-up the size of the match-buffer, thereby
exposing newly created opportunities *with sufficient context* to all
define_peephole2's.  While things can change in that direction, let's
start with documenting the current state.

* doc/md.texi (define_peephole2): Document order of scanning.
---
 gcc/doc/md.texi | 9 +
 1 file changed, 9 insertions(+)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 07bf8bdebffb..300d104d58ab 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -9362,6 +9362,15 @@ If the preparation falls through (invokes neither 
@code{DONE} nor
 @code{FAIL}), then the @code{define_peephole2} uses the replacement
 template.
 
+Insns are scanned in forward order from beginning to end for each basic
+block.  Matches are attempted in order of @code{define_peephole2}
+appearance in the @file{md} file.  After a successful replacement,
+scanning for further opportunities for @code{define_peephole2}, resumes
+with the first generated replacement insn as the first insn to be
+matched against all @code{define_peephole2}.  For the example above,
+after its successful replacement, the first insn that can be matched by
+a @code{define_peephole2} is @code{(set (match_dup 4) (match_dup 1))}.
+
 @end ifset
 @ifset INTERNALS
 @node Insn Attributes
-- 
2.30.2



[PATCH v3 1/4] ree: Default ree pass for O2 and above for rs6000 target.

2023-04-19 Thread Ajit Agarwal via Gcc-patches
Hello All:

This is the patch-1 for improving ree pass for rs6000 target.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target.

Add ree pass as a default pass for rs6000 target.

2023-04-19  Ajit Kumar Agarwal  

gcc/ChangeLog:

* common/config/rs6000/rs6000-common.cc: Add REE pass as a
default rs6000 target pass for O2 and above.
---
 gcc/common/config/rs6000/rs6000-common.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/common/config/rs6000/rs6000-common.cc 
b/gcc/common/config/rs6000/rs6000-common.cc
index 2140c442ba9..968db215028 100644
--- a/gcc/common/config/rs6000/rs6000-common.cc
+++ b/gcc/common/config/rs6000/rs6000-common.cc
@@ -34,6 +34,8 @@ static const struct default_options 
rs6000_option_optimization_table[] =
 { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
 /* Enable -fsched-pressure for first pass instruction scheduling.  */
 { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
+/* Enable -free for zero extension and sign extension elimination.*/
+{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
 /* Enable -munroll-only-small-loops with -funroll-loops to unroll small
loops at -O2 and above by default.  */
 { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
-- 
2.31.1



[PATCH v3 2/4] ree : Code movement to avoid adding prototype to improve ree pass for rs6000 target.

2023-04-19 Thread Ajit Agarwal via Gcc-patches
Hello All:

This is the patch-2 to improve ree pass for rs6000 target.
Bootstrapped and regtested on powerpc64-gnu-linux.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target.

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
 such redundant zero and sign extension.

2023-04-19  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (get_defs): Move function to avoid prototype.
---
 gcc/ree.cc | 94 +++---
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..93d300a9e96 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,53 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Get all the reaching definitions of an instruction.  The definitions are
+   desired for REG used in INSN.  Return the definition list or NULL if a
+   definition is missing.  If DEST is non-NULL, additionally push the INSN
+   of the definitions onto DEST.  */
+
+static struct df_link *
+get_defs (rtx_insn *insn, rtx reg, vec *dest)
+{
+  df_ref use;
+  struct df_link *ref_chain, *ref_link;
+
+  FOR_EACH_INSN_USE (use, insn)
+{
+  if (GET_CODE (DF_REF_REG (use)) == SUBREG)
+   return NULL;
+  if (REGNO (DF_REF_REG (use)) == REGNO (reg))
+   break;
+}
+
+  if (use == NULL)
+return NULL;
+
+  ref_chain = DF_REF_CHAIN (use);
+
+  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+{
+  /* Problem getting some definition for this instruction.  */
+  if (ref_link->ref == NULL)
+   return NULL;
+  if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
+   return NULL;
+  /* As global regs are assumed to be defined at each function call
+dataflow can report a call_insn as being a definition of REG.
+But we can't do anything with that in this pass so proceed only
+if the instruction really sets REG in a way that can be deduced
+from the RTL structure.  */
+  if (global_regs[REGNO (reg)]
+ && !set_of (reg, DF_REF_INSN (ref_link->ref)))
+   return NULL;
+}
+
+  if (dest)
+for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+  dest->safe_push (DF_REF_INSN (ref_link->ref));
+
+  return ref_chain;
+}
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -454,53 +501,6 @@ transform_ifelse (ext_cand *cand, rtx_insn *def_insn)
   return false;
 }
 
-/* Get all the reaching definitions of an instruction.  The definitions are
-   desired for REG used in INSN.  Return the definition list or NULL if a
-   definition is missing.  If DEST is non-NULL, additionally push the INSN
-   of the definitions onto DEST.  */
-
-static struct df_link *
-get_defs (rtx_insn *insn, rtx reg, vec *dest)
-{
-  df_ref use;
-  struct df_link *ref_chain, *ref_link;
-
-  FOR_EACH_INSN_USE (use, insn)
-{
-  if (GET_CODE (DF_REF_REG (use)) == SUBREG)
-return NULL;
-  if (REGNO (DF_REF_REG (use)) == REGNO (reg))
-   break;
-}
-
-  gcc_assert (use != NULL);
-
-  ref_chain = DF_REF_CHAIN (use);
-
-  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
-{
-  /* Problem getting some definition for this instruction.  */
-  if (ref_link->ref == NULL)
-return NULL;
-  if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
-return NULL;
-  /* As global regs are assumed to be defined at each function call
-dataflow can report a call_insn as being a definition of REG.
-But we can't do anything with that in this pass so proceed only
-if the instruction really sets REG in a way that can be deduced
-from the RTL structure.  */
-  if (global_regs[REGNO (reg)]
- && !set_of (reg, DF_REF_INSN (ref_link->ref)))
-   return NULL;
-}
-
-  if (dest)
-for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
-  dest->safe_push (DF_REF_INSN (ref_link->ref));
-
-  return ref_chain;
-}
-
 /* Get all the reaching uses of an instruction.  The uses are desired for REG
set in INSN.  Return use list or NULL if a use is missing or irregular.  */
 
-- 
2.31.1



[PATCH v3 3/4] ree: Main functionality to Improve ree pass for rs6000 target

2023-04-19 Thread Ajit Agarwal via Gcc-patches
Hello All:

This is patch-3 to improve ree pass for rs6000 target.
Main functionality routines to imprve ree pass.

Bootstrapped and regtested on powerpc64-gnu-linux.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target.

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
such redundant zero and sign extension. Support of
zero_extend/sign_extend/AND.

2023-04-19  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (eliminate_across_bbs_p): Add checks to enable extension
elimination across and within basic blocks.
(def_arith_p): New function to check definition has arithmetic
operation.
(combine_set_extension): Modification to incorporate AND
and current zero_extend and sign_extend instruction.
(merge_def_and_ext): Add calls to eliminate_across_bbs_p and
zero_extend sign_extend and AND instruction.
(rtx_is_zext_p): New function.
(reg_used_set_between_p): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim.C: New testcase.
* g++.target/powerpc/zext-elim-1.C: New testcase.
* g++.target/powerpc/zext-elim-2.C: New testcase.
* g++.target/powerpc/sext-elim.C: New testcase.
---
 gcc/ree.cc| 451 --
 gcc/testsuite/g++.target/powerpc/sext-elim.C  |  18 +
 .../g++.target/powerpc/zext-elim-1.C  |  19 +
 .../g++.target/powerpc/zext-elim-2.C  |  11 +
 gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
 5 files changed, 482 insertions(+), 47 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..053db2e8ff3 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,71 @@ struct ext_cand
 
 static int max_insn_uid;
 
+bool
+reg_used_set_between_p (rtx set, rtx_insn *def_insn, rtx_insn *insn)
+{
+  if (reg_used_between_p (set, def_insn, insn)
+  || reg_set_between_p (set, def_insn, insn))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static unsigned int
+rtx_is_zext_p (rtx insn)
+{
+  if (GET_CODE (insn) == AND)
+{
+  rtx set = XEXP (insn, 0);
+  if (REG_P (set))
+   {
+ if (XEXP (insn, 1) == const1_rtx)
+   return 1;
+   }
+  else
+   return 0;
+}
+
+  return 0;
+}
+
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static unsigned int
+rtx_is_zext_p (rtx_insn *insn)
+{
+  rtx body = single_set (insn);
+
+  if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) == AND)
+   {
+ rtx set = XEXP (SET_SRC (body), 0);
+
+ if (REG_P (set) && GET_MODE (SET_DEST (body)) == GET_MODE (set))
+   {
+if (XEXP (SET_SRC (body), 1) == const1_rtx)
+  return 1;
+   }
+ else
+  return 0;
+   }
+
+   return 0;
+}
+
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -319,7 +384,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
-  rtx new_set;
+  rtx new_set = NULL_RTX;
   rtx cand_pat = single_set (cand->insn);
 
   /* If the extension's source/destination registers are not the same
@@ -359,27 +424,41 @@ combine_set_extension (ext_cand *cand, rtx_insn 
*curr_insn, rtx *orig_set)
   else if (GET_CODE (orig_src) == cand->code)
 {
   /* Here is a sequence of two extensions.  Try to merge them.  */
-  rtx temp_extension
-   = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
+  rtx temp_extension = NULL_RTX;
+  if (GET_CODE (SET_SRC (cand_pat)) == AND)
+   temp_extension
+   = gen_rtx_AND (cand->mode, XEXP (orig_src, 0), XEXP (orig_src, 1));
+  else
+   temp_extension
+= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
   rtx simplified_temp_extension = simplify_rtx (temp_extension);
   if (simplified_temp_extension)
 temp_extension = simplified_temp_extension;
+
   new_set = gen_rtx_SET (new_reg, temp_extension);
 }
   else if (GET_CODE (orig_src) == IF_THEN_ELSE)
 {
   /* On

[PATCH v3 4/4] ree: Using ABI interfaces to improve ree pass for rs6000 target.

2023-04-19 Thread Ajit Agarwal via Gcc-patches
Hello All:

This is patch-4 to improve ree pass for rs6000 target.
Use ABI interfaces support.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target.

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
such redundant zero and sign extension. Support of
ABI interfaces.

2023-04-19  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (combline_reaching_defs): Add zero_extend and sign_extend.
Add FUNCTION_ARG_REGNO_P abi interfaces calls and
FUNCTION_VALUE_REGNO_P support.
(add_removable_extension): Add FUNCTION_ARG_REGNO_P abi
interface calls.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim-3.C
---
 gcc/ree.cc| 127 +-
 .../g++.target/powerpc/zext-elim-3.C  |  16 +++
 2 files changed, 113 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..33c803f16ce 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -473,7 +473,8 @@ get_defs (rtx_insn *insn, rtx reg, vec *dest)
break;
 }
 
-  gcc_assert (use != NULL);
+  if (use == NULL)
+return NULL;
 
   ref_chain = DF_REF_CHAIN (use);
 
@@ -514,7 +515,8 @@ get_uses (rtx_insn *insn, rtx reg)
 if (REGNO (DF_REF_REG (def)) == REGNO (reg))
   break;
 
-  gcc_assert (def != NULL);
+  if (def == NULL)
+return NULL;
 
   ref_chain = DF_REF_CHAIN (def);
 
@@ -771,6 +773,58 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
   state->defs_list.truncate (0);
   state->copies_list.truncate (0);
 
+  if (cand->code == ZERO_EXTEND)
+{
+  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
+  rtx set = single_set (cand->insn);
+
+  if (!set)
+   return false;
+
+  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
+
+  if (!get_defs (cand->insn, orig_src, NULL))
+   {
+  bool copy_needed
+= (REGNO (SET_DEST (cand->expr)) != REGNO (XEXP (SET_SRC 
(cand->expr), 0)));
+
+ if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
+ && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+ && !FUNCTION_VALUE_REGNO_P (REGNO (orig_src)))
+{
+   if (side_effects_p (PATTERN (cand->insn)))
+ return false;
+
+   struct df_link *uses
+ = get_uses (cand->insn, SET_DEST (PATTERN (cand->insn)));
+
+   if (!uses) return false;
+
+   for (df_link *use = uses; use; use = use->next)
+ {
+   if (!use->ref)
+ return false;
+
+   if (BLOCK_FOR_INSN (cand->insn)
+   != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
+ return false;
+
+   rtx_insn *insn = DF_REF_INSN (use->ref);
+
+   if (GET_CODE (PATTERN (insn)) == SET)
+ {
+   rtx_code code = GET_CODE (SET_SRC (PATTERN (insn)));
+   if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
+   || GET_RTX_CLASS (code) == RTX_COMM_ARITH
+   || GET_RTX_CLASS (code) == RTX_UNARY)
+ return false;
+  }
+   }
+return true;
+}
+}
+}
+
   outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
 
   if (!outcome)
@@ -1112,26 +1166,35 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
   rtx reg = XEXP (src, 0);
   struct df_link *defs, *def;
   ext_cand *cand;
+  defs = get_defs (insn, reg, NULL);
 
   /* Zero-extension of an undefined value is partly defined (it's
 completely undefined for sign-extension, though).  So if there exists
 a path from the entry to this zero-extension that leaves this register
 uninitialized, removing the extension could change the behavior of
 correct programs.  So first, check it is not the case.  */
-  if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
+  if (!defs && code == ZERO_EXTEND && FUNCTION_ARG_REGNO_P (REGNO (reg)))
{
- if (dump_file)
-   {
- fprintf (dump_file, "Cannot eliminate extension:\n");
- print_rtl_single (dump_file, insn);
- fprintf (dump_file, " because it can operate on uninitialized"
- " data\n");
-   }
+ ext_cand e = {expr, code, mode, insn};
+ insn_list->safe_push (e);
  return;
}
 
+
+   if ((code == ZERO_EXTEND
+   && !bitmap_bit_p (init_regs, REGNO (reg
+ {
+   if (dump_file)
+ {
+   fprintf (dump_file, "Cannot eliminate extension:\n")

[COMMITTED 1/2] gcc: xtensa: add data alignment properties to dynconfig

2023-04-19 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/xtensa-dynconfig.cc (xtensa_get_config_v4): New
function.

include/
* xtensa-dynconfig.h (xtensa_config_v4): New struct.
(XCHAL_DATA_WIDTH, XCHAL_UNALIGNED_LOAD_EXCEPTION)
(XCHAL_UNALIGNED_STORE_EXCEPTION, XCHAL_UNALIGNED_LOAD_HW)
(XCHAL_UNALIGNED_STORE_HW, XTENSA_CONFIG_V4_ENTRY_LIST): New
definitions.
(XTENSA_CONFIG_INSTANCE_LIST): Add xtensa_config_v4 instance.
(XTENSA_CONFIG_ENTRY_LIST): Add XTENSA_CONFIG_V4_ENTRY_LIST.
---
 gcc/config/xtensa/xtensa-dynconfig.cc | 18 
 include/xtensa-dynconfig.h| 59 ++-
 2 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa-dynconfig.cc 
b/gcc/config/xtensa/xtensa-dynconfig.cc
index 9aea9f253c25..12dce4d1b2aa 100644
--- a/gcc/config/xtensa/xtensa-dynconfig.cc
+++ b/gcc/config/xtensa/xtensa-dynconfig.cc
@@ -182,6 +182,24 @@ const struct xtensa_config_v3 *xtensa_get_config_v3 (void)
   return config;
 }
 
+const struct xtensa_config_v4 *xtensa_get_config_v4 (void)
+{
+  static const struct xtensa_config_v4 *config;
+  static const struct xtensa_config_v4 def = {
+  16, /* xchal_data_width */
+  1,  /* xchal_unaligned_load_exception */
+  1,  /* xchal_unaligned_store_exception */
+  0,  /* xchal_unaligned_load_hw */
+  0,  /* xchal_unaligned_store_hw */
+  };
+
+  if (!config)
+config = (const struct xtensa_config_v4 *) xtensa_load_config 
("xtensa_config_v4",
+  
&xtensa_config_v4,
+  &def);
+  return config;
+}
+
 const char * const *xtensa_get_config_strings (void)
 {
   static const char * const *config_strings;
diff --git a/include/xtensa-dynconfig.h b/include/xtensa-dynconfig.h
index 2cc15cc99112..48877ebb6b61 100644
--- a/include/xtensa-dynconfig.h
+++ b/include/xtensa-dynconfig.h
@@ -112,6 +112,15 @@ struct xtensa_config_v3
   int xchal_have_xea3;
 };
 
+struct xtensa_config_v4
+{
+  int xchal_data_width;
+  int xchal_unaligned_load_exception;
+  int xchal_unaligned_store_exception;
+  int xchal_unaligned_load_hw;
+  int xchal_unaligned_store_hw;
+};
+
 typedef struct xtensa_isa_internal_struct xtensa_isa_internal;
 
 extern const void *xtensa_load_config (const char *name,
@@ -120,6 +129,7 @@ extern const void *xtensa_load_config (const char *name,
 extern const struct xtensa_config_v1 *xtensa_get_config_v1 (void);
 extern const struct xtensa_config_v2 *xtensa_get_config_v2 (void);
 extern const struct xtensa_config_v3 *xtensa_get_config_v3 (void);
+extern const struct xtensa_config_v4 *xtensa_get_config_v4 (void);
 
 #ifdef XTENSA_CONFIG_DEFINITION
 
@@ -207,6 +217,26 @@ extern const struct xtensa_config_v3 *xtensa_get_config_v3 
(void);
 #define XCHAL_HAVE_XEA3 0
 #endif
 
+#ifndef XCHAL_DATA_WIDTH
+#define XCHAL_DATA_WIDTH 16
+#endif
+
+#ifndef XCHAL_UNALIGNED_LOAD_EXCEPTION
+#define XCHAL_UNALIGNED_LOAD_EXCEPTION 1
+#endif
+
+#ifndef XCHAL_UNALIGNED_STORE_EXCEPTION
+#define XCHAL_UNALIGNED_STORE_EXCEPTION 1
+#endif
+
+#ifndef XCHAL_UNALIGNED_LOAD_HW
+#define XCHAL_UNALIGNED_LOAD_HW 0
+#endif
+
+#ifndef XCHAL_UNALIGNED_STORE_HW
+#define XCHAL_UNALIGNED_STORE_HW 0
+#endif
+
 #define XTENSA_CONFIG_ENTRY(a) a
 
 #define XTENSA_CONFIG_V1_ENTRY_LIST \
@@ -276,6 +306,13 @@ extern const struct xtensa_config_v3 *xtensa_get_config_v3 
(void);
 XTENSA_CONFIG_ENTRY(XCHAL_HAVE_EXCLUSIVE), \
 XTENSA_CONFIG_ENTRY(XCHAL_HAVE_XEA3)
 
+#define XTENSA_CONFIG_V4_ENTRY_LIST \
+XTENSA_CONFIG_ENTRY(XCHAL_DATA_WIDTH), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_LOAD_EXCEPTION), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_STORE_EXCEPTION), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_LOAD_HW), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_STORE_HW)
+
 #define XTENSA_CONFIG_INSTANCE_LIST \
 const struct xtensa_config_v1 xtensa_config_v1 = { \
 XTENSA_CONFIG_V1_ENTRY_LIST, \
@@ -285,12 +322,16 @@ const struct xtensa_config_v2 xtensa_config_v2 = { \
 }; \
 const struct xtensa_config_v3 xtensa_config_v3 = { \
 XTENSA_CONFIG_V3_ENTRY_LIST, \
+}; \
+const struct xtensa_config_v4 xtensa_config_v4 = { \
+XTENSA_CONFIG_V4_ENTRY_LIST, \
 }
 
 #define XTENSA_CONFIG_ENTRY_LIST \
 XTENSA_CONFIG_V1_ENTRY_LIST, \
 XTENSA_CONFIG_V2_ENTRY_LIST, \
-XTENSA_CONFIG_V3_ENTRY_LIST
+XTENSA_CONFIG_V3_ENTRY_LIST, \
+XTENSA_CONFIG_V4_ENTRY_LIST
 
 #else /* XTENSA_CONFIG_DEFINITION */
 
@@ -482,6 +523,22 @@ const struct xtensa_config_v3 xtensa_config_v3 = { \
 #undef XCHAL_HAVE_XEA3
 #define XCHAL_HAVE_XEA3(xtensa_get_config_v3 
()->xchal_have_xea3)
 
+
+#undef XCHAL_DATA_WIDTH
+#define XCHAL_DATA_WIDTH   (xtensa_get_config_v4 
()->xchal_data_width)
+
+#undef XCHAL_UNALIGNED_LOAD_EXCEPTION
+#define XCHAL_UNALIGNED_LOAD_EXCEPTION (xtensa_get_config_v4 
()->xchal_unaligned_load_exception)
+
+#undef XCHAL_UNALIGNED_STORE_EXCE

[COMMITTED 2/2] gcc: xtensa: add -m[no-]strict-align option

2023-04-19 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/xtensa-opts.h: New header.
* config/xtensa/xtensa.h (STRICT_ALIGNMENT): Redefine as
xtensa_strict_align.
* config/xtensa/xtensa.cc (xtensa_option_override): When
-m[no-]strict-align is not specified in the command line set
xtensa_strict_align to 0 if the hardware supports both unaligned
loads and stores or to 1 otherwise.
* config/xtensa/xtensa.opt (mstrict-align): New option.
* doc/invoke.texi (Xtensa Options): Document -m[no-]strict-align.
---
 gcc/config/xtensa/xtensa-opts.h | 28 
 gcc/config/xtensa/xtensa.cc |  4 
 gcc/config/xtensa/xtensa.h  |  2 +-
 gcc/config/xtensa/xtensa.opt|  7 +++
 gcc/doc/invoke.texi | 14 +-
 5 files changed, 53 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/xtensa/xtensa-opts.h

diff --git a/gcc/config/xtensa/xtensa-opts.h b/gcc/config/xtensa/xtensa-opts.h
new file mode 100644
index ..f0b8f5b3bfe7
--- /dev/null
+++ b/gcc/config/xtensa/xtensa-opts.h
@@ -0,0 +1,28 @@
+/* Definitions for option handling for Xtensa.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#ifndef XTENSA_OPTS_H
+#define XTENSA_OPTS_H
+
+/* Undefined state for the -mstrict-alignment option  */
+enum xtensa_strict_alignment_setting {
+  XTENSA_STRICT_ALIGNMENT_UNDEFINED = -1,
+};
+
+#endif
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 7287aa7a258a..9e5d314e143e 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2792,6 +2792,10 @@ xtensa_option_override (void)
   if (xtensa_windowed_abi == -1)
 xtensa_windowed_abi = TARGET_WINDOWED_ABI_DEFAULT;
 
+  if (xtensa_strict_alignment == XTENSA_STRICT_ALIGNMENT_UNDEFINED)
+xtensa_strict_alignment = !XCHAL_UNALIGNED_LOAD_HW
+  || !XCHAL_UNALIGNED_STORE_HW;
+
   if (! TARGET_THREADPTR)
 targetm.have_tls = false;
 
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 8ebf37cab33a..34e06afcff48 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -143,7 +143,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* Set this nonzero if move instructions will actually fail to work
when given unaligned data.  */
-#define STRICT_ALIGNMENT 1
+#define STRICT_ALIGNMENT (xtensa_strict_alignment)
 
 /* Promote integer modes smaller than a word to SImode.  Set UNSIGNEDP
for QImode, because there is no 8-bit load from memory with sign
diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt
index 3a129a4c0393..f16b53bf409f 100644
--- a/gcc/config/xtensa/xtensa.opt
+++ b/gcc/config/xtensa/xtensa.opt
@@ -18,6 +18,9 @@
 ; along with GCC; see the file COPYING3.  If not see
 ; .
 
+HeaderInclude
+config/xtensa/xtensa-opts.h
+
 mconst16
 Target Mask(CONST16)
 Use CONST16 instruction to load constants.
@@ -64,3 +67,7 @@ Use call0 ABI.
 mabi=windowed
 Target RejectNegative Var(xtensa_windowed_abi, 1)
 Use windowed registers ABI.
+
+mstrict-align
+Target Var(xtensa_strict_alignment) Init(XTENSA_STRICT_ALIGNMENT_UNDEFINED)
+Do not use unaligned memory references.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 57fb170ca4cc..54dcccbc148c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1474,7 +1474,8 @@ See RS/6000 and PowerPC Options.
 -mtarget-align  -mno-target-align
 -mlongcalls  -mno-longcalls
 -mabi=@var{abi-type}
--mextra-l32r-costs=@var{cycles}}
+-mextra-l32r-costs=@var{cycles}
+-mstrict-align  -mno-strict-align}
 
 @emph{zSeries Options}
 See S/390 and zSeries Options.
@@ -34401,6 +34402,17 @@ instructions, in clock cycles.  This affects, when 
optimizing for speed,
 whether loading a constant from literal pool using @code{L32R} or
 synthesizing the constant from a small one with a couple of arithmetic
 instructions.  The default value is 0.
+
+@opindex mstrict-align
+@opindex mno-strict-align
+@item -mstrict-align
+@itemx -mno-strict-align
+Avoid or allow generating memory accesses that may not be aligned on a natural
+object boundary as described in the architecture specification.
+The default is @option{-mno-strict-align} for cores that support both
+unaligned loads and stores in hardware and @option{-mstrict-align} for all
+other cores.
+
 @end table
 
 @n

  1   2   >