[PATCH] Improve DSE to handle stores before __builtin_unreachable ()

2023-06-20 Thread Richard Biener via Gcc-patches
DSE isn't good at identifying program points that end lifetime
of variables that are not associated with virtual operands.  But
at least for those that end basic-blocks we can handle the simple
case where this ending is in the same basic-block as the definition
we want to elide.  That should catch quite some common cases already.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

As you can see from the testcase I had to adjust this possibly can
lead to more severe issues when one forgets a return (the C++ frontend
places builtin_unreachable () there).  I'm still planning to push
this improvement unless I hear objections.

Thanks,
Richard.

* tree-ssa-dse.cc (dse_classify_store): When we found
no defs and the basic-block with the original definition
ends in __builtin_unreachable[_trap] the store is dead.

* gcc.dg/tree-ssa/ssa-dse-47.c: New testcase.
* c-c++-common/asan/pr106558.c: Avoid undefined behavior
due to missing return.
---
 gcc/testsuite/c-c++-common/asan/pr106558.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-47.c | 17 +
 gcc/tree-ssa-dse.cc| 21 -
 3 files changed, 38 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-47.c

diff --git a/gcc/testsuite/c-c++-common/asan/pr106558.c 
b/gcc/testsuite/c-c++-common/asan/pr106558.c
index d82b2dc7a83..c8cefdf09ff 100644
--- a/gcc/testsuite/c-c++-common/asan/pr106558.c
+++ b/gcc/testsuite/c-c++-common/asan/pr106558.c
@@ -8,7 +8,7 @@ int **c = &b;
 int d[1];
 int *e = &d[1];
 
-static int f(int *g) {
+static void f(int *g) {
   *b = e;
   *c = e;
   *b = 2;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-47.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-47.c
new file mode 100644
index 000..659f1d0d415
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-47.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-dse1-details" } */
+
+int a;
+int b[3];
+void test()
+{
+  if (a > 0)
+{
+  b[0] = 0;
+  b[1] = 1;
+  b[2] = 2;
+  __builtin_unreachable ();
+}
+}
+
+/* { dg-final { scan-tree-dump-times "Deleted dead store" 3 "dse1" } } */
diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
index eabe8ba4522..3c7a2e9992d 100644
--- a/gcc/tree-ssa-dse.cc
+++ b/gcc/tree-ssa-dse.cc
@@ -1118,7 +1118,26 @@ dse_classify_store (ao_ref *ref, gimple *stmt,
   if (defs.is_empty ())
{
  if (ref_may_alias_global_p (ref, false))
-   return DSE_STORE_LIVE;
+   {
+ basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (defvar));
+ /* Assume that BUILT_IN_UNREACHABLE and BUILT_IN_UNREACHABLE_TRAP
+do not need to keep (global) memory side-effects live.
+We do not have virtual operands on BUILT_IN_UNREACHABLE
+but we can do poor mans reachability when the last
+definition we want to elide is in the block that ends
+in such a call.  */
+ if (EDGE_COUNT (def_bb->succs) == 0)
+   if (gcall *last = dyn_cast  (*gsi_last_bb (def_bb)))
+ if (gimple_call_builtin_p (last, BUILT_IN_UNREACHABLE)
+ || gimple_call_builtin_p (last,
+   BUILT_IN_UNREACHABLE_TRAP))
+   {
+ if (by_clobber_p)
+   *by_clobber_p = false;
+ return DSE_STORE_DEAD;
+   }
+ return DSE_STORE_LIVE;
+   }
 
  if (by_clobber_p)
*by_clobber_p = false;
-- 
2.35.3


Re: [PATCH] debug/110295 - mixed up early/late debug for member DIEs

2023-06-20 Thread Richard Biener via Gcc-patches
On Mon, 19 Jun 2023, Jason Merrill wrote:

> On 6/19/23 06:15, Richard Biener wrote:
> > When we process a scope typedef during early debug creation and
> > we have already created a DIE for the type when the decl is
> > TYPE_DECL_IS_STUB and this DIE is still in limbo we end up
> > just re-parenting that type DIE instead of properly creating
> > a DIE for the decl, eventually picking up the now completed
> > type and creating DIEs for the members.  Instead this is currently
> > defered to the second time we come here, when we annotate the
> > DIEs with locations late where now the type DIE is no longer
> > in limbo and we fall through doing the job for the decl.
> > 
> > The following makes sure we perform the necessary early tasks
> > for this by continuing with the decl DIE creation after setting
> > a parent for the limbo type DIE.
> > 
> > [LTO] Bootstrapped on x86_64-unknown-linux-gnu.
> > 
> > OK for trunk?
> > 
> > Thanks,
> > Richard.
> > 
> >  PR debug/110295
> >  * dwarf2out.cc (process_scope_var): Continue processing
> >  the decl after setting a parent in case the existing DIE
> >  was in limbo.
> > 
> > * g++.dg/debug/pr110295.C: New testcase.
> > ---
> >   gcc/dwarf2out.cc  |  3 ++-
> >   gcc/testsuite/g++.dg/debug/pr110295.C | 19 +++
> >   2 files changed, 21 insertions(+), 1 deletion(-)
> >   create mode 100644 gcc/testsuite/g++.dg/debug/pr110295.C
> > 
> > diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
> > index d89ffa66847..e70c47cec8d 100644
> > --- a/gcc/dwarf2out.cc
> > +++ b/gcc/dwarf2out.cc
> > @@ -26533,7 +26533,8 @@ process_scope_var (tree stmt, tree decl, tree
> > origin, dw_die_ref context_die)
> >   
> > if (die != NULL && die->die_parent == NULL)
> >   add_child_die (context_die, die);
> 
> I wonder about reorganizing the function a bit to unify this parent setting
> with the one a bit below, which already falls through to gen_decl_die:
> 
> >   if (decl && DECL_P (decl))
> > {
> >   die = lookup_decl_die (decl);
> > 
> >   /* Early created DIEs do not have a parent as the decls refer
> >   to the function as DECL_CONTEXT rather than the BLOCK.  */
> >   if (die && die->die_parent == NULL)
> > {
> >   gcc_assert (in_lto_p);
> >   add_child_die (context_die, die);
> > }
> > }

Unfortunately that one can pick a DIE for the decl while the former
also handles the stub case in which case it is a different DIE, the
one for the type.
 
> OK either way.

Pushed.

Thanks,
Richard.


[PATCH v3] x86: make VPTERNLOG* usable on less than 512-bit operands with just AVX512F

2023-06-20 Thread Jan Beulich via Gcc-patches
There's no reason to constrain this to AVX512VL, unless instructed so by
-mprefer-vector-width=, as the wider operation is unusable for more
narrow operands only when the possible memory source is a non-broadcast
one. This way even the scalar copysign3 can benefit from the
operation being a single-insn one (leaving aside moves which the
compiler decides to insert for unclear reasons, and leaving aside the
fact that bcst_mem_operand() is too restrictive for broadcast to be
embedded right into VPTERNLOG*).

While there also bring *_vternlog_all's in sync with that
of the three splitters.

Along with this also request value duplication in
ix86_expand_copysign()'s call to ix86_build_signbit_mask(), eliminating
excess space allocation in .rodata.*, filled with zeros which are never
read.

gcc/

* config/i386/i386-expand.cc (ix86_expand_copysign): Request
value duplication by ix86_build_signbit_mask() when AVX512F and
not HFmode.
* config/i386/sse.md (*_vternlog_all): Convert to
2-alternative form. Adjust "mode" attribute. Add "enabled"
attribute.
(*_vpternlog_1): Also permit when TARGET_AVX512F
&& !TARGET_PREFER_AVX256.
(*_vpternlog_2): Likewise.
(*_vpternlog_3): Likewise.

gcc/testsuite/
* gcc.target/i386/avx512f-copysign.c: New test.
---
I haven't been able to find documentation on the dejagnu(?) regex syntax
(?:...). With ordinary (...) failing (producing twice as many matches),
I could only derive this from other scan-assembler patterns.

I guess the underlying pattern, going along the lines of what
one_cmpl2 uses, can be applied elsewhere
as well.

HFmode could use embedded broadcast too for copysign and alike, but that
would need to be V2HF -> V8HF (for which I don't think there are any
existing patterns).
---
v3: Adjust insn conditional as well. Add testcase.
v2: Respect -mprefer-vector-width=.

--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -2266,7 +2266,7 @@ ix86_expand_copysign (rtx operands[])
   else
 dest = NULL_RTX;
   op1 = lowpart_subreg (vmode, force_reg (mode, operands[2]), mode);
-  mask = ix86_build_signbit_mask (vmode, 0, 0);
+  mask = ix86_build_signbit_mask (vmode, TARGET_AVX512F && mode != HFmode, 0);
 
   if (CONST_DOUBLE_P (operands[1]))
 {
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -12399,22 +12399,35 @@
(set_attr "mode" "")])
 
 (define_insn "*_vternlog_all"
-  [(set (match_operand:V 0 "register_operand" "=v")
+  [(set (match_operand:V 0 "register_operand" "=v,v")
(unspec:V
- [(match_operand:V 1 "register_operand" "0")
-  (match_operand:V 2 "register_operand" "v")
-  (match_operand:V 3 "bcst_vector_operand" "vmBr")
+ [(match_operand:V 1 "register_operand" "0,0")
+  (match_operand:V 2 "register_operand" "v,v")
+  (match_operand:V 3 "bcst_vector_operand" "vBr,m")
   (match_operand:SI 4 "const_0_to_255_operand")]
  UNSPEC_VTERNLOG))]
-  "TARGET_AVX512F
+  "( == 64 || TARGET_AVX512VL
+|| (TARGET_AVX512F && !TARGET_PREFER_AVX256))
 /* Disallow embeded broadcast for vector HFmode since
it's not real AVX512FP16 instruction.  */
   && (GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4
  || GET_CODE (operands[3]) != VEC_DUPLICATE)"
-  "vpternlog\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+{
+  if (TARGET_AVX512VL)
+return "vpternlog\t{%4, %3, %2, %0|%0, %2, %3, %4}";
+  else
+return "vpternlog\t{%4, %g3, %g2, %g0|%g0, %g2, %g3, %4}";
+}
   [(set_attr "type" "sselog")
(set_attr "prefix" "evex")
-   (set_attr "mode" "")])
+   (set (attr "mode")
+(if_then_else (match_test "TARGET_AVX512VL")
+ (const_string "")
+ (const_string "XI")))
+   (set (attr "enabled")
+   (if_then_else (eq_attr "alternative" "1")
+ (symbol_ref " == 64 || TARGET_AVX512VL")
+ (const_string "*")))])
 
 ;; There must be lots of other combinations like
 ;;
@@ -12443,7 +12456,8 @@
  (any_logic2:V
(match_operand:V 3 "regmem_or_bitnot_regmem_operand")
(match_operand:V 4 "regmem_or_bitnot_regmem_operand"]
-  "( == 64 || TARGET_AVX512VL)
+  "( == 64 || TARGET_AVX512VL
+|| (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()
&& (rtx_equal_p (STRIP_UNARY (operands[1]),
STRIP_UNARY (operands[4]))
@@ -12527,7 +12541,8 @@
  (match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
(match_operand:V 3 "regmem_or_bitnot_regmem_operand"))
  (match_operand:V 4 "regmem_or_bitnot_regmem_operand")))]
-  "( == 64 || TARGET_AVX512VL)
+  "( == 64 || TARGET_AVX512VL
+|| (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()
&& (rtx_equal_p (STRIP_UNARY (operands[1]),
STRIP_UNARY (operands[4]))
@@ -12610,7 +12625,8 @@
(match_operand:V 1 "regmem_or_bitnot_regmem_o

Re: [PATCH] tree-optimization/110243 - kill off IVOPTs split_offset

2023-06-20 Thread Richard Biener via Gcc-patches
On Mon, 19 Jun 2023, Richard Sandiford wrote:

> Jeff Law  writes:
> > On 6/16/23 06:34, Richard Biener via Gcc-patches wrote:
> >> IVOPTs has strip_offset which suffers from the same issues regarding
> >> integer overflow that split_constant_offset did but the latter was
> >> fixed quite some time ago.  The following implements strip_offset
> >> in terms of split_constant_offset, removing the redundant and
> >> incorrect implementation.
> >> 
> >> The implementations are not exactly the same, strip_offset relies
> >> on ptrdiff_tree_p to fend off too large offsets while split_constant_offset
> >> simply assumes those do not happen and truncates them.  By
> >> the same means strip_offset also handles POLY_INT_CSTs but
> >> split_constant_offset does not.  Massaging the latter to
> >> behave like strip_offset in those cases might be the way to go?
> >> 
> >> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> >> 
> >> Comments?
> >> 
> >> Thanks,
> >> Richard.
> >> 
> >>PR tree-optimization/110243
> >>* tree-ssa-loop-ivopts.cc (strip_offset_1): Remove.
> >>(strip_offset): Make it a wrapper around split_constant_offset.
> >> 
> >>* gcc.dg/torture/pr110243.c: New testcase.
> > Your call -- IMHO you know this code far better than I.
> 
> +1, but LGTM FWIW.  I couldn't see anything obvious (and valid)
> that split_offset_1 handles and split_constant_offset doesn't.

I think it's only the INTEGER_CST vs. ptrdiff_tree_p where the
latter (used in split_offset_1) handles POLY_INT_CSTs.  split_offset
also computes the offset in poly_int64 and checks it fits
(to some extent) while split_constant_offset simply converts all
INTEGER_CSTs to ssizetype because it knows it starts from addresses
only.

An alternative fix would have been to rewrite signed arithmetic
to unsigned in strip_offset_1.

I wonder if we want to change split_constant_offset to record the
offset in a poly_int64 and have a wrapper converting it back to
a tree for data-ref analysis.  Then we can at least put
cst_and_fits_in_hwi checks in the code?  The code also tracks
a range so it doesn't look like handling POLY_INT_CSTs is easy
there - do you remember whether that was important for IVOPTs?

Thanks,
Richard.


[COMMITTED] ada: Fix edge case in Ada.Calendar.Formatting.Time_Of

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Ronan Desplanques 

Before this patch, Ada.Calendar.Formatting.Time_Of executed extra code
when passed a number of seconds equal to the number of seconds in a day.
This caused the result to be off, perhaps because a statement resetting
the number of seconds to zero was missing.

Instead of adding such a statement, this patch removes the special
handling of the problematic case, which gives the intended result.

gcc/ada/

* libgnat/a-calfor.adb (Time_Of): Fix handling of special case.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/a-calfor.adb | 31 +++
 1 file changed, 3 insertions(+), 28 deletions(-)

diff --git a/gcc/ada/libgnat/a-calfor.adb b/gcc/ada/libgnat/a-calfor.adb
index 3325e562746..18f4e7388df 100644
--- a/gcc/ada/libgnat/a-calfor.adb
+++ b/gcc/ada/libgnat/a-calfor.adb
@@ -590,10 +590,6 @@ package body Ada.Calendar.Formatting is
   Leap_Second : Boolean := False;
   Time_Zone   : Time_Zones.Time_Offset := 0) return Time
is
-  Adj_Year  : Year_Number  := Year;
-  Adj_Month : Month_Number := Month;
-  Adj_Day   : Day_Number   := Day;
-
   H  : constant Integer := 1;
   M  : constant Integer := 1;
   Se : constant Integer := 1;
@@ -612,32 +608,11 @@ package body Ada.Calendar.Formatting is
  raise Constraint_Error;
   end if;
 
-  --  A Seconds value of 86_400 denotes a new day. This case requires an
-  --  adjustment to the input values.
-
-  if Seconds = 86_400.0 then
- if Day < Days_In_Month (Month)
-   or else (Is_Leap (Year)
-  and then Month = 2)
- then
-Adj_Day := Day + 1;
- else
-Adj_Day := 1;
-
-if Month < 12 then
-   Adj_Month := Month + 1;
-else
-   Adj_Month := 1;
-   Adj_Year  := Year + 1;
-end if;
- end if;
-  end if;
-
   return
 Formatting_Operations.Time_Of
-  (Year => Adj_Year,
-   Month=> Adj_Month,
-   Day  => Adj_Day,
+  (Year => Year,
+   Month=> Month,
+   Day  => Day,
Day_Secs => Seconds,
Hour => H,
Minute   => M,
-- 
2.40.0



[COMMITTED] ada: Spurious error on package instantiation

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Javier Miranda 

The compiler reports spurious errors processing the instantation
of a generic package when the instantation is performed in the
the body of a package that has a private type T, a dispatching
primitive of T has the same name as a component of T, and
an extension of T is used as the actual parameter for a
formal derived type of T in the instantiation.

gcc/ada/

* sem_ch4.adb
(Try_Selected_Component_In_Instance): New subprogram; factorizes
existing code.
(Find_Component_In_Instance) Moved inside the new subprogram.
(Analyze_Selected_Component): Invoke the new subprogram before
trying the Object.Operation notation.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch4.adb | 247 ++--
 1 file changed, 146 insertions(+), 101 deletions(-)

diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb
index b4b158a3ff4..fafb7e63110 100644
--- a/gcc/ada/sem_ch4.adb
+++ b/gcc/ada/sem_ch4.adb
@@ -4913,16 +4913,6 @@ package body Sem_Ch4 is
   --  the discriminant values for a discriminant constraint)
   --  are unprefixed discriminant names.
 
-  procedure Find_Component_In_Instance (Rec : Entity_Id);
-  --  In an instance, a component of a private extension may not be visible
-  --  while it was visible in the generic. Search candidate scope for a
-  --  component with the proper identifier. This is only done if all other
-  --  searches have failed. If a match is found, the Etype of both N and
-  --  Sel are set from this component, and the entity of Sel is set to
-  --  reference this component. If no match is found, Entity (Sel) remains
-  --  unset. For a derived type that is an actual of the instance, the
-  --  desired component may be found in any ancestor.
-
   function Has_Mode_Conformant_Spec (Comp : Entity_Id) return Boolean;
   --  It is known that the parent of N denotes a subprogram call. Comp
   --  is an overloadable component of the concurrent type of the prefix.
@@ -4941,6 +4931,14 @@ package body Sem_Ch4 is
   --  _Procedure, and collect all its interpretations (since it may be an
   --  overloaded interface primitive); otherwise return False.
 
+  function Try_Selected_Component_In_Instance
+(Typ : Entity_Id) return Boolean;
+  --  If Typ is the actual for a formal derived type, or a derived type
+  --  thereof, the component inherited from the generic parent may not
+  --  be visible in the actual, but the selected component is legal. Climb
+  --  up the derivation chain of the generic parent type and return True if
+  --  we find the proper ancestor type; otherwise return False.
+
   --
   -- Constraint_Has_Unprefixed_Discriminant_Reference --
   --
@@ -4990,49 +4988,6 @@ package body Sem_Ch4 is
  return False;
   end Constraint_Has_Unprefixed_Discriminant_Reference;
 
-  
-  -- Find_Component_In_Instance --
-  
-
-  procedure Find_Component_In_Instance (Rec : Entity_Id) is
- Comp : Entity_Id;
- Typ  : Entity_Id;
-
-  begin
- Typ := Rec;
- while Present (Typ) loop
-Comp := First_Component (Typ);
-while Present (Comp) loop
-   if Chars (Comp) = Chars (Sel) then
-  Set_Entity_With_Checks (Sel, Comp);
-  Set_Etype (Sel, Etype (Comp));
-  Set_Etype (N,   Etype (Comp));
-  return;
-   end if;
-
-   Next_Component (Comp);
-end loop;
-
---  If not found, the component may be declared in the parent
---  type or its full view, if any.
-
-if Is_Derived_Type (Typ) then
-   Typ := Etype (Typ);
-
-   if Is_Private_Type (Typ) then
-  Typ := Full_View (Typ);
-   end if;
-
-else
-   return;
-end if;
- end loop;
-
- --  If we fall through, no match, so no changes made
-
- return;
-  end Find_Component_In_Instance;
-
   --
   -- Has_Mode_Conformant_Spec --
   --
@@ -5170,6 +5125,122 @@ package body Sem_Ch4 is
  return Present (Candidate);
   end Try_By_Protected_Procedure_Prefixed_View;
 
+  
+  -- Try_Selected_Component_In_Instance --
+  
+
+  function Try_Selected_Component_In_Instance
+(Typ : Entity_Id) return Boolean
+  is
+ procedure Find_Component_In_Instance (Rec : Entity_Id);
+ --  In an instance, a component of a private extension may not be
+ --  visible wh

[COMMITTED] ada: Fix internal error on aggregate within container aggregate

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

This just applies the same fix to Expand_Array_Aggregate as the one that was
recently applied to Convert_To_Assignments.

gcc/ada/

* exp_aggr.adb (Convert_To_Assignments): Tweak comment.
(Expand_Array_Aggregate): Do not delay the expansion if the parent
node is a container aggregate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_aggr.adb | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index fb5f404922f..5e22fefbc1d 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -4244,9 +4244,9 @@ package body Exp_Aggr is
   --  done top down from above.
 
   if
- --  Internal aggregate (transformed when expanding the parent)
- --  excluding the Container aggregate as these are transformed to
- --  procedure call later.
+ --  Internal aggregates (transformed when expanding the parent),
+ --  excluding container aggregates as these are transformed into
+ --  subprogram calls later.
 
  (Parent_Kind in
 N_Component_Association | N_Aggregate | N_Extension_Aggregate
@@ -6108,7 +6108,8 @@ package body Exp_Aggr is
   --  STEP 3
 
   --  Delay expansion for nested aggregates: it will be taken care of when
-  --  the parent aggregate is expanded.
+  --  the parent aggregate is expanded, excluding container aggregates as
+  --  these are transformed into subprogram calls later.
 
   Parent_Node := Parent (N);
   Parent_Kind := Nkind (Parent_Node);
@@ -6118,9 +6119,10 @@ package body Exp_Aggr is
  Parent_Kind := Nkind (Parent_Node);
   end if;
 
-  if Parent_Kind = N_Aggregate
-or else Parent_Kind = N_Extension_Aggregate
-or else Parent_Kind = N_Component_Association
+  if ((Parent_Kind = N_Component_Association
+or else Parent_Kind = N_Aggregate
+or else Parent_Kind = N_Extension_Aggregate)
+   and then not Is_Container_Aggregate (Parent_Node))
 or else (Parent_Kind = N_Object_Declaration
   and then (Needs_Finalization (Typ)
  or else Is_Special_Return_Object
-- 
2.40.0



[COMMITTED] ada: Add CHERI intrinsic bindings and helper functions.

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Daniel King 

The package Interfaces.CHERI provides intrinsic bindings and
helper functions to allow software to query, create, and
manipulate CHERI capabilities.

gcc/ada/

* libgnat/i-cheri.ads: Add CHERI intrinsics and helper functions.
* libgnat/i-cheri.adb: Likewise

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/i-cheri.adb |  75 ++
 gcc/ada/libgnat/i-cheri.ads | 470 
 2 files changed, 545 insertions(+)
 create mode 100644 gcc/ada/libgnat/i-cheri.adb
 create mode 100644 gcc/ada/libgnat/i-cheri.ads

diff --git a/gcc/ada/libgnat/i-cheri.adb b/gcc/ada/libgnat/i-cheri.adb
new file mode 100644
index 000..174fdcc5b47
--- /dev/null
+++ b/gcc/ada/libgnat/i-cheri.adb
@@ -0,0 +1,75 @@
+--
+--  --
+-- GNAT COMPILER COMPONENTS --
+--  --
+--   I N T E R F A C E S . C H E R I--
+--  --
+--  S p e c --
+--  --
+--Copyright (C) 2023, AdaCore   --
+--  --
+-- GNAT is free software;  you can  redistribute it  and/or modify it under --
+-- terms of the  GNU General Public License as published  by the Free Soft- --
+-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
+-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
+-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
+-- or FITNESS FOR A PARTICULAR PURPOSE. --
+--  --
+-- As a special exception under Section 7 of GPL version 3, you are granted --
+-- additional permissions described in the GCC Runtime Library Exception,   --
+-- version 3.1, as published by the Free Software Foundation.   --
+--  --
+-- You should have received a copy of the GNU General Public License and--
+-- a copy of the GCC Runtime Library Exception along with this program; --
+-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see--
+-- .  --
+--  --
+-- GNAT was originally developed  by the GNAT team at  New York University. --
+-- Extensive contributions were provided by Ada Core Technologies Inc.  --
+--  --
+--
+
+package body Interfaces.CHERI is
+
+   
+   -- Set_Address_And_Bounds --
+   
+
+   procedure Set_Address_And_Bounds
+ (Cap : in out Capability;
+  Address :System.Storage_Elements.Integer_Address;
+  Length  :Bounds_Length)
+   is
+   begin
+  Cap := Capability_With_Address_And_Bounds (Cap, Address, Length);
+   end Set_Address_And_Bounds;
+
+   --
+   -- Set_Address_And_Exact_Bounds --
+   --
+
+   procedure Set_Address_And_Exact_Bounds
+ (Cap : in out Capability;
+  Address :System.Storage_Elements.Integer_Address;
+  Length  :Bounds_Length)
+   is
+   begin
+  Cap := Capability_With_Address_And_Exact_Bounds (Cap, Address, Length);
+   end Set_Address_And_Exact_Bounds;
+
+   --
+   -- Align_Address_Up --
+   --
+
+   function Align_Address_Up
+ (Address : System.Storage_Elements.Integer_Address;
+  Length  : Bounds_Length)
+  return System.Storage_Elements.Integer_Address
+   is
+  Mask : constant System.Storage_Elements.Integer_Address :=
+Representable_Alignment_Mask (Length);
+   begin
+  return (Address + (not Mask)) and Mask;
+   end Align_Address_Up;
+
+end Interfaces.CHERI;
diff --git a/gcc/ada/libgnat/i-cheri.ads b/gcc/ada/libgnat/i-cheri.ads
new file mode 100644
index 000..547b033dbaf
--- /dev/null
+++ b/gcc/ada/libgnat/i-cheri.ads
@@ -0,0 +1,470 @@
+--
+--  --
+-- GNAT COMPILER COMPONENTS --
+--

[COMMITTED] ada: Fix type derivation of subtype of derived type

2023-06-20 Thread Marc Poulhiès via Gcc-patches
Deriving from a subtype of a derived type of a private type, whose full
view is itself a derived type of a discriminated record with a known
discriminatant was failing with the error message:

  invalid constraint: type has no discriminant

The compiler needs to use the full view to be able to constrain the
type.

Also fix minor typo in comments.

gcc/ada/

* sem_ch3.adb (Build_Derived_Record_Type): Use full view as
Parent_Base if needed.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch3.adb | 36 +---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
index a0677114288..b9302aae2a9 100644
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -5540,7 +5540,7 @@ package body Sem_Ch3 is
   --  avoided here, when the created subtype declaration is analyzed. (See
   --  Build_Derived_Types)
 
-  --  This also happens when the full view of a private type is derived
+  --  This also happens when the full view of a private type is a derived
   --  type with constraints. In this case the entity has been introduced
   --  in the private declaration.
 
@@ -8669,7 +8669,7 @@ package body Sem_Ch3 is
 
--  5. FIRST TRANSFORMATION FOR DERIVED RECORDS
--
-   --  Regardless of whether we dealing with a tagged or untagged type
+   --  Regardless of whether we are dealing with a tagged or untagged type
--  we will transform all derived type declarations of the form
--
--   type T is new R (...) [with ...];
@@ -9056,6 +9056,36 @@ package body Sem_Ch3 is
  Parent_Base := Base_Type (Parent_Base);
   end if;
 
+  --  If the parent base is a private type and only its full view has
+  --  discriminants, use the full view's base type.
+
+  --  This can happen when we are deriving from a subtype of a derived type
+  --  of a private type derived from a discriminated type with known
+  --  discriminant:
+  --
+  --  package Pkg;
+  -- type Root_Type(I: Positive) is record
+  --   ...
+  -- end record;
+  -- type Bounded_Root_Type is private;
+  --  private
+  -- type Bounded_Root_Type is new Root_Type(10);
+  --  end Pkg;
+  --
+  --  package Pkg2 is
+  -- type Constrained_Root_Type is new Pkg.Bounded_Root_Type;
+  --  end Pkg2;
+  --  subtype Sub_Base is Pkg2.Constrained_Root_Type;
+  --  type New_Der_Type is new Sub_Base;
+
+  if Is_Private_Type (Parent_Base)
+and then Present (Full_View (Parent_Base))
+and then not Has_Discriminants (Parent_Base)
+and then Has_Discriminants (Full_View (Parent_Base))
+  then
+ Parent_Base := Base_Type (Full_View (Parent_Base));
+  end if;
+
   --  AI05-0115: if this is a derivation from a private type in some
   --  other scope that may lead to invisible components for the derived
   --  type, mark it accordingly.
@@ -9287,7 +9317,7 @@ package body Sem_Ch3 is
 Is_Completion => False, Derive_Subps => False);
 
  --  ??? This needs re-examination to determine whether the
- --  above call can simply be replaced by a call to Analyze.
+ --  following call can simply be replaced by a call to Analyze.
 
  Set_Analyzed (New_Decl);
 
-- 
2.40.0



[COMMITTED] ada: Pass Error_Node to calls to Error_Msg in lib-load.adb

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Ghjuvan Lacambre 

When not passing Error_Node, Error_Msg will treat Current_Node as the
node attached to the message. When this happens in lib-load.adb due to a
file that cannot be loaded, Current_Node might reference a node that
doesn't actually exist. This is a problem when using -gnatdJ and
-fdiagnostics-format, as in this case GNAT will attempt to retrieve
information from the node attached to the message and thus crash when
said node is invalid.

gcc/ada/

* lib-load.adb (Load_Unit): Pass Error_Node to calls to Error_Msg.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/lib-load.adb | 31 +++
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/gcc/ada/lib-load.adb b/gcc/ada/lib-load.adb
index d79ee438243..72196b437f5 100644
--- a/gcc/ada/lib-load.adb
+++ b/gcc/ada/lib-load.adb
@@ -645,11 +645,16 @@ package body Lib.Load is
   if Is_Predefined_File_Name (Fname) then
  Error_Msg_Unit_1 := Uname_Actual;
  Error_Msg
-   ("$$ is not a language defined unit", Load_Msg_Sloc);
+   ("$$ is not a language defined unit",
+Load_Msg_Sloc,
+Error_Node);
   else
  Error_Msg_File_1 := Fname;
  Error_Msg_Unit_1 := Uname_Actual;
- Error_Msg ("file{ does not contain unit$", Load_Msg_Sloc);
+ Error_Msg
+   ("file{ does not contain unit$",
+Load_Msg_Sloc,
+Error_Node);
   end if;
 
   Write_Dependency_Chain;
@@ -697,7 +702,8 @@ package body Lib.Load is
 end if;
 
 if Present (Error_Node) then
-   Error_Msg ("circular unit dependency", Load_Msg_Sloc);
+   Error_Msg
+ ("circular unit dependency", Load_Msg_Sloc, Error_Node);
Write_Dependency_Chain;
 else
Load_Stack.Decrement_Last;
@@ -798,11 +804,14 @@ package body Lib.Load is
 then
Error_Msg_File_1 := Unit_File_Name (Corr_Body);
Error_Msg
- ("cannot compile subprogram in file {!", Load_Msg_Sloc);
+ ("cannot compile subprogram in file {!",
+  Load_Msg_Sloc,
+  Error_Node);
Error_Msg_File_1 := Unit_File_Name (Unum);
Error_Msg
  ("\incorrect spec in file { must be removed first!",
-  Load_Msg_Sloc);
+  Load_Msg_Sloc,
+  Error_Node);
Unum := No_Unit;
goto Done;
 end if;
@@ -879,15 +888,21 @@ package body Lib.Load is
 
   Error_Msg_Unit_1 := Uname_Actual;
   Error_Msg -- CODEFIX
-("$$ is not a predefined library unit", Load_Msg_Sloc);
+("$$ is not a predefined library unit",
+ Load_Msg_Sloc,
+ Error_Node);
 
else
   Error_Msg_File_1 := Fname;
 
   if Src_Ind = No_Access_To_Source_File then
- Error_Msg ("no read access to file{", Load_Msg_Sloc);
+ Error_Msg
+   ("no read access to file{",
+Load_Msg_Sloc,
+Error_Node
+);
   else
- Error_Msg ("file{ not found", Load_Msg_Sloc);
+ Error_Msg ("file{ not found", Load_Msg_Sloc, Error_Node);
   end if;
end if;
 
-- 
2.40.0



[COMMITTED] ada: Small fixes to handling of private views in instances

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

The main change is the removal of the special bypass for private views in
Resolve_Implicit_Dereference, which in exchange requires additional work
in Check_Generic_Actuals and a couple more calls to Set_Global_Type in
Save_References_In_Identifier.  This also removes an unused parameter in
Convert_View and adds a missing comment in Build_Derived_Record_Type.

gcc/ada/

* exp_ch7.adb (Convert_View): Remove Ind parameter and adjust.
* sem_ch12.adb (Check_Generic_Actuals): Check the type of both in
and in out actual objects, as well as the type of formal parameters
of actual subprograms.  Extend the condition under which the views
are swapped to nested generic constructs.
(Save_References_In_Identifier): Call Set_Global_Type on a global
identifier rewritten as an explicit dereference, either directly
or after having first been rewritten as a function call.
(Save_References_In_Operator): Set N2 unconditionally and reuse it.
* sem_ch3.adb (Build_Derived_Record_Type): Add missing comment.
* sem_res.adb (Resolve_Implicit_Dereference): Remove special bypass
for private views in instances.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch7.adb  |  24 ++--
 gcc/ada/sem_ch12.adb | 139 ---
 gcc/ada/sem_ch3.adb  |   9 ++-
 gcc/ada/sem_res.adb  |  11 
 4 files changed, 92 insertions(+), 91 deletions(-)

diff --git a/gcc/ada/exp_ch7.adb b/gcc/ada/exp_ch7.adb
index 42b41e5cf6b..f82301c0acd 100644
--- a/gcc/ada/exp_ch7.adb
+++ b/gcc/ada/exp_ch7.adb
@@ -394,13 +394,9 @@ package body Exp_Ch7 is
--  Check recursively whether a loop or block contains a subprogram that
--  may need an activation record.
 
-   function Convert_View
- (Proc : Entity_Id;
-  Arg  : Node_Id;
-  Ind  : Pos := 1) return Node_Id;
+   function Convert_View (Proc : Entity_Id; Arg  : Node_Id) return Node_Id;
--  Proc is one of the Initialize/Adjust/Finalize operations, and Arg is the
-   --  argument being passed to it. Ind indicates which formal of procedure
-   --  Proc we are trying to match. This function will, if necessary, generate
+   --  argument being passed to it. This function will, if necessary, generate
--  a conversion between the partial and full view of Arg to match the type
--  of the formal of Proc, or force a conversion to the class-wide type in
--  the case where the operation is abstract.
@@ -4402,22 +4398,12 @@ package body Exp_Ch7 is
-- Convert_View --
--
 
-   function Convert_View
- (Proc : Entity_Id;
-  Arg  : Node_Id;
-  Ind  : Pos := 1) return Node_Id
-   is
-  Fent : Entity_Id := First_Entity (Proc);
-  Ftyp : Entity_Id;
+   function Convert_View (Proc : Entity_Id; Arg  : Node_Id) return Node_Id is
+  Ftyp : constant Entity_Id := Etype (First_Formal (Proc));
+
   Atyp : Entity_Id;
 
begin
-  for J in 2 .. Ind loop
- Next_Entity (Fent);
-  end loop;
-
-  Ftyp := Etype (Fent);
-
   if Nkind (Arg) in N_Type_Conversion | N_Unchecked_Type_Conversion then
  Atyp := Entity (Subtype_Mark (Arg));
   else
diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb
index f584a9f3fb5..a65bd0fdfb5 100644
--- a/gcc/ada/sem_ch12.adb
+++ b/gcc/ada/sem_ch12.adb
@@ -6964,8 +6964,61 @@ package body Sem_Ch12 is
  (Instance  : Entity_Id;
   Is_Formal_Box : Boolean)
is
-  E  : Entity_Id;
+  Gen_Id : constant Entity_Id
+:= (if Is_Generic_Unit (Instance) then
+  Instance
+elsif Is_Wrapper_Package (Instance) then
+  Generic_Parent
+(Specification
+  (Unit_Declaration_Node (Related_Instance (Instance
+else
+  Generic_Parent (Package_Specification (Instance)));
+  --  The generic unit
+
+  Parent_Scope : constant Entity_Id := Scope (Gen_Id);
+  --  The enclosing scope of the generic unit
+
+  procedure Check_Actual_Type (Typ : Entity_Id);
+  --  If the type of the actual is a private type declared in the
+  --  enclosing scope of the generic unit, the body of the generic
+  --  sees the full view of the type (because it has to appear in
+  --  the corresponding package body). If the type is private now,
+  --  exchange views to restore the proper visibility in the instance.
+
+  ---
+  -- Check_Actual_Type --
+  ---
+
+  procedure Check_Actual_Type (Typ : Entity_Id) is
+ Btyp : constant Entity_Id := Base_Type (Typ);
+
+  begin
+ --  The exchange is only needed if the generic is defined
+ --  within a package which is not a common ancestor of the
+ --  scope of the instance, and is not already in scope.
+
+ if Is_Private_Type (Btyp)
+   and then Scope (Btyp) = Parent_Scope
+

[COMMITTED] ada: Fix fallout of fix to handling of private views in instances

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

Check_Actual_Type incorrectly switches the view of a private type declared
in the enclosing scope of a generic unit but that has a private ancestor.

gcc/ada/

* einfo.ads (Has_Private_Ancestor): Fix inaccuracy in description.
* sem_ch12.adb (Check_Actual_Type): Do not switch the view of the
type if it has a private ancestor.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/einfo.ads| 2 +-
 gcc/ada/sem_ch12.adb | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/einfo.ads b/gcc/ada/einfo.ads
index a861ff787a4..b356b76f0de 100644
--- a/gcc/ada/einfo.ads
+++ b/gcc/ada/einfo.ads
@@ -1969,7 +1969,7 @@ package Einfo is
 --   is defined for the type.
 
 --Has_Private_Ancestor
---   Applies to type extensions. True if some ancestor is derived from a
+--   Applies to derived record types. True if an ancestor is derived from a
 --   private type, making some components invisible and aggregates illegal.
 --   This flag is set at the point of derivation. The legality of the
 --   aggregate must be rechecked because it also depends on the visibility
diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb
index a65bd0fdfb5..a8e7c909c39 100644
--- a/gcc/ada/sem_ch12.adb
+++ b/gcc/ada/sem_ch12.adb
@@ -6980,7 +6980,8 @@ package body Sem_Ch12 is
 
   procedure Check_Actual_Type (Typ : Entity_Id);
   --  If the type of the actual is a private type declared in the
-  --  enclosing scope of the generic unit, the body of the generic
+  --  enclosing scope of the generic unit, but not a derived type
+  --  of a private type declared elsewhere, the body of the generic
   --  sees the full view of the type (because it has to appear in
   --  the corresponding package body). If the type is private now,
   --  exchange views to restore the proper visibility in the instance.
@@ -6999,6 +7000,7 @@ package body Sem_Ch12 is
 
  if Is_Private_Type (Btyp)
and then Scope (Btyp) = Parent_Scope
+   and then not Has_Private_Ancestor (Btyp)
and then Ekind (Parent_Scope) in E_Package | E_Generic_Package
and then Scope (Instance) /= Parent_Scope
and then not Is_Child_Unit (Gen_Id)
-- 
2.40.0



[COMMITTED] ada: Remove references to Might_Not_Return and Always_Return

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Claire Dross 

The Might_Not_Return and Always_Return annotations for GNATprove
should now be replaced by the two more precise aspects
Exceptional_Cases and Always_Terminates.
They allow to specify whether a subprogram is allowed to raise
exceptions or fail to complete.

gcc/ada/

* libgnat/a-strfix.ads: Replace Might_Not_Return annotations by
Exceptional_Cases and Always_Terminates aspects.
* libgnat/a-tideio.ads: Idem.
* libgnat/a-tienio.ads: Idem.
* libgnat/a-tifiio.ads: Idem.
* libgnat/a-tiflio.ads: Idem.
* libgnat/a-tiinio.ads: Idem.
* libgnat/a-timoio.ads: Idem.
* libgnat/a-textio.ads: Idem. Also mark functions Name, Col, Line,
and Page as out of SPARK as they might raise Layout_Error.
* libgnarl/a-reatim.ads: Replace Always_Return annotations by
Always_Terminates aspects.
* libgnat/a-chahan.ads: Idem.
* libgnat/a-nbnbig.ads: Idem.
* libgnat/a-nbnbin.ads: Idem.
* libgnat/a-nbnbre.ads: Idem.
* libgnat/a-ngelfu.ads: Idem.
* libgnat/a-nlelfu.ads: Idem.
* libgnat/a-nllefu.ads: Idem.
* libgnat/a-nselfu.ads: Idem.
* libgnat/a-nuelfu.ads: Idem.
* libgnat/a-strbou.ads: Idem.
* libgnat/a-strmap.ads: Idem.
* libgnat/a-strsea.ads: Idem.
* libgnat/a-strsup.ads: Idem.
* libgnat/a-strunb.ads: Idem.
* libgnat/a-strunb__shared.ads: Idem.
* libgnat/g-souinf.ads: Idem.
* libgnat/i-c.ads: Idem.
* libgnat/interfac.ads: Idem.
* libgnat/interfac__2020.ads: Idem.
* libgnat/s-aridou.adb: Idem.
* libgnat/s-arit32.adb: Idem.
* libgnat/s-atacco.ads: Idem.
* libgnat/s-spcuop.ads: Idem.
* libgnat/s-stoele.ads: Idem.
* libgnat/s-vaispe.ads: Idem.
* libgnat/s-vauspe.ads: Idem.
* libgnat/i-cstrin.ads: Add a precondition instead of a
Might_Not_Return annotation.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnarl/a-reatim.ads|   4 +-
 gcc/ada/libgnat/a-chahan.ads |   7 +-
 gcc/ada/libgnat/a-nbnbig.ads |   4 +-
 gcc/ada/libgnat/a-nbnbin.ads |   6 +-
 gcc/ada/libgnat/a-nbnbre.ads |   6 +-
 gcc/ada/libgnat/a-ngelfu.ads |   4 +-
 gcc/ada/libgnat/a-nlelfu.ads |   1 -
 gcc/ada/libgnat/a-nllefu.ads |   1 -
 gcc/ada/libgnat/a-nselfu.ads |   1 -
 gcc/ada/libgnat/a-nuelfu.ads |   1 -
 gcc/ada/libgnat/a-strbou.ads |  10 +-
 gcc/ada/libgnat/a-strfix.ads | 169 ++-
 gcc/ada/libgnat/a-strmap.ads |   7 +-
 gcc/ada/libgnat/a-strsea.ads |   6 +-
 gcc/ada/libgnat/a-strsup.ads |   6 +-
 gcc/ada/libgnat/a-strunb.ads |   4 +-
 gcc/ada/libgnat/a-strunb__shared.ads |   4 +-
 gcc/ada/libgnat/a-textio.ads | 300 +--
 gcc/ada/libgnat/a-tideio.ads |  36 ++--
 gcc/ada/libgnat/a-tienio.ads |  39 ++--
 gcc/ada/libgnat/a-tifiio.ads |  39 ++--
 gcc/ada/libgnat/a-tiflio.ads |  39 ++--
 gcc/ada/libgnat/a-tiinio.ads |  38 ++--
 gcc/ada/libgnat/a-timoio.ads |  38 ++--
 gcc/ada/libgnat/g-souinf.ads |   2 +-
 gcc/ada/libgnat/i-c.ads  |   7 +-
 gcc/ada/libgnat/i-cstrin.ads |  29 +--
 gcc/ada/libgnat/interfac.ads |   5 +-
 gcc/ada/libgnat/interfac__2020.ads   |   5 +-
 gcc/ada/libgnat/s-aridou.adb |  10 +-
 gcc/ada/libgnat/s-arit32.adb |  10 +-
 gcc/ada/libgnat/s-atacco.ads |   6 +-
 gcc/ada/libgnat/s-spcuop.ads |   2 +-
 gcc/ada/libgnat/s-stoele.ads |   6 +-
 gcc/ada/libgnat/s-vaispe.ads |   2 +-
 gcc/ada/libgnat/s-vauspe.ads |   2 +-
 36 files changed, 395 insertions(+), 461 deletions(-)

diff --git a/gcc/ada/libgnarl/a-reatim.ads b/gcc/ada/libgnarl/a-reatim.ads
index c5009d25cff..a616d579633 100644
--- a/gcc/ada/libgnarl/a-reatim.ads
+++ b/gcc/ada/libgnarl/a-reatim.ads
@@ -39,9 +39,9 @@ pragma Elaborate_All (System.Task_Primitives.Operations);
 package Ada.Real_Time with
   SPARK_Mode,
   Abstract_State => (Clock_Time with Synchronous),
-  Initializes=> Clock_Time
+  Initializes=> Clock_Time,
+  Always_Terminates
 is
-   pragma Annotate (GNATprove, Always_Return, Real_Time);
 
pragma Compile_Time_Error
  (Duration'Size /= 64,
diff --git a/gcc/ada/libgnat/a-chahan.ads b/gcc/ada/libgnat/a-chahan.ads
index 159cd70218e..89b2d68ccc2 100644
--- a/gcc/ada/libgnat/a-chahan.ads
+++ b/gcc/ada/libgnat/a-chahan.ads
@@ -40,14 +40,13 @@ pragma Assertion_Policy (Post => Ignore);
 
 with Ada.Characters.Latin_1;
 
-package Ada.Characters.Handling
-  with SPARK_Mode
+package Ada.Characters.Handling with
+  SPARK_Mode,
+  Always_Terminates
 is
pragma Pure;
--  In accordance with Ada 2005 AI-362
 
-   pragma Annotate (GNATprove, Always_Return, Handling);
-

-- Character 

[COMMITTED] ada: Update annotations in runtime for proof

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Yannick Moy 

With bump of stable SPARK used for proof of the runtime,
some annotations need to change.

gcc/ada/

* libgnat/s-aridou.adb (Scaled_Divide): Add assertions.
* libgnat/s-valuti.adb: Add Loop_Variant.
* libgnat/s-valuti.ads: Add Exceptional_Cases on No_Return
procedure.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/s-aridou.adb | 11 +++
 gcc/ada/libgnat/s-valuti.adb |  2 ++
 gcc/ada/libgnat/s-valuti.ads |  3 ++-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/libgnat/s-aridou.adb b/gcc/ada/libgnat/s-aridou.adb
index 66ace8071ff..831590ce387 100644
--- a/gcc/ada/libgnat/s-aridou.adb
+++ b/gcc/ada/libgnat/s-aridou.adb
@@ -2580,8 +2580,19 @@ is
 pragma Assert
   (Big (Double_Uns (Hi (T2))) + Big (Double_Uns (Hi (T1))) =
Big (Double_Uns (D (1;
+pragma Assert
+  (Is_Mult_Decomposition (D1 => Big (Double_Uns (D (1))),
+  D2 => Big (Double_Uns (D (2))),
+  D3 => Big (Double_Uns (D (3))),
+  D4 => Big (Double_Uns (D (4);
  else
 D (1) := 0;
+
+pragma Assert
+  (Is_Mult_Decomposition (D1 => Big (Double_Uns (D (1))),
+  D2 => Big (Double_Uns (D (2))),
+  D3 => Big (Double_Uns (D (3))),
+  D4 => Big (Double_Uns (D (4);
  end if;
 
   else
diff --git a/gcc/ada/libgnat/s-valuti.adb b/gcc/ada/libgnat/s-valuti.adb
index ec6fdb03225..ee37c1a636b 100644
--- a/gcc/ada/libgnat/s-valuti.adb
+++ b/gcc/ada/libgnat/s-valuti.adb
@@ -123,6 +123,7 @@ is
   while F < L and then S (F) = ' ' loop
  pragma Loop_Invariant (F in S'First .. L - 1);
  pragma Loop_Invariant (for all J in S'First .. F => S (J) = ' ');
+ pragma Loop_Variant (Increases => F);
  F := F + 1;
   end loop;
 
@@ -139,6 +140,7 @@ is
   while S (L) = ' ' loop
  pragma Loop_Invariant (L in F + 1 .. S'Last);
  pragma Loop_Invariant (for all J in L .. S'Last => S (J) = ' ');
+ pragma Loop_Variant (Decreases => L);
  L := L - 1;
   end loop;
 
diff --git a/gcc/ada/libgnat/s-valuti.ads b/gcc/ada/libgnat/s-valuti.ads
index 1faa6471d0d..22d0612bc32 100644
--- a/gcc/ada/libgnat/s-valuti.ads
+++ b/gcc/ada/libgnat/s-valuti.ads
@@ -51,7 +51,8 @@ is
 
procedure Bad_Value (S : String)
with
- Depends => (null => S);
+ Depends => (null => S),
+ Exceptional_Cases => (others => Standard.False);
pragma No_Return (Bad_Value);
--  Raises constraint error with message: bad input for 'Value: "xxx"
 
-- 
2.40.0



[COMMITTED] ada: Fix bug in predicate checks with address clauses

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Bob Duff 

This patch fixes a compiler bug triggered by having a type with some
defaulted components, and a predicate, and an object of that type with
an address clause. In this case, the compiler was crashing.

gcc/ada/

* sem_ch3.adb (Analyze_Object_Declaration): Remove predicate-check
generation if there is an address clause. These are unnecessary,
and cause gigi to crash.
* exp_util.ads (Following_Address_Clause): Remove obsolete "???"
comments. The suggested changes were done long ago.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_util.ads |  7 ---
 gcc/ada/sem_ch3.adb  | 11 ++-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/gcc/ada/exp_util.ads b/gcc/ada/exp_util.ads
index 24065b6f7b6..02324d23db0 100644
--- a/gcc/ada/exp_util.ads
+++ b/gcc/ada/exp_util.ads
@@ -647,13 +647,6 @@ package Exp_Util is
--  current declarative part to look for an address clause for the object
--  being declared, and returns the clause if one is found, returns
--  Empty otherwise.
-   --
-   --  Note: this function can be costly and must be invoked with special care.
-   --  Possibly we could introduce a flag at parse time indicating the presence
-   --  of an address clause to speed this up???
-   --
-   --  Note: currently this function does not scan the private part, that seems
-   --  like a potential bug ???
 
type Force_Evaluation_Mode is (Relaxed, Strict);
 
diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
index fb63690803b..85019dfffa5 100644
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -4690,6 +4690,16 @@ package body Sem_Ch3 is
  elsif No (E) and then Is_Null_Record_Type (T) then
 null;
 
+ --  If there is an address clause for this object, do not generate a
+ --  predicate check here. It will be generated later, at the freezng
+ --  point. It would be wrong to generate references to the object
+ --  here, before the address has been determined.
+
+ elsif Has_Aspect (Id, Aspect_Address)
+   or else Present (Following_Address_Clause (N))
+ then
+null;
+
  --  Do not generate a predicate check if the initialization expression
  --  is a type conversion whose target subtype statically matches the
  --  object's subtype because the conversion has been subjected to the
@@ -4709,7 +4719,6 @@ package body Sem_Ch3 is
 declare
Check : constant Node_Id :=
  Make_Predicate_Check (T, New_Occurrence_Of (Id, Loc));
-
 begin
if No (Next_Decl) then
   Append_To (List_Containing (N), Check);
-- 
2.40.0



[COMMITTED] ada: Fix -fdiagnostics-format=json not printing all messages

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Ghjuvan Lacambre 

The previous version of this code stopped printing messages as soon as
it encountered a deleted or continuation message. This was wrong,
continuation and deleted messages can be followed by live messages that
do need to be printed.

gcc/ada/

* errout.adb (Output_Messages): Fix loop termination condition.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/errout.adb | 21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/gcc/ada/errout.adb b/gcc/ada/errout.adb
index 1c6222b3a29..6e378a60731 100644
--- a/gcc/ada/errout.adb
+++ b/gcc/ada/errout.adb
@@ -3062,16 +3062,19 @@ package body Errout is
 
 E := Errors.Table (E).Next;
 
---  Skip deleted messages.
---  Also skip continuation messages, as they have already been
---  printed along the message they're attached to.
+while E /= No_Error_Msg loop
+
+   --  Skip deleted messages.
+   --  Also skip continuation messages, as they have already been
+   --  printed along the message they're attached to.
+
+   if not Errors.Table (E).Deleted
+ and then not Errors.Table (E).Msg_Cont
+   then
+  Write_Char (',');
+  Output_JSON_Message (E);
+   end if;
 
-while E /= No_Error_Msg
-  and then not Errors.Table (E).Deleted
-  and then not Errors.Table (E).Msg_Cont
-loop
-   Write_Char (',');
-   Output_JSON_Message (E);
E := Errors.Table (E).Next;
 end loop;
  end if;
-- 
2.40.0



[COMMITTED] ada: Introduce -gnateH switch to force reverse Bit_Order threshold to 64

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

This can be helpful for legacy code that still makes use of an original
reverse Bit_Order clause, i.e. without a Scalar_Storage_Order clause.

gcc/ada/

* doc/gnat_ugn/building_executable_programs_with_gnat.rst (Compiler
Switches): Document -gnateH.
* opt.ads (Reverse_Bit_Order_Threshold): New variable.
* sem_ch13.adb (Adjust_Record_For_Reverse_Bit_Order): Use its value
if it is nonnegative instead of System_Max_Integer_Size.
* switch-c.adb (Scan_Front_End_Switches): Deal with -gnateH.
* usage.adb (Usage): Print -gnateH.
* gnat_ugn.texi: Regenerate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 .../building_executable_programs_with_gnat.rst |  8 
 gcc/ada/gnat_ugn.texi  | 14 +-
 gcc/ada/opt.ads|  5 +
 gcc/ada/sem_ch13.adb   |  4 +++-
 gcc/ada/switch-c.adb   |  6 ++
 gcc/ada/usage.adb  |  5 +
 6 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst 
b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
index 20e003d4ac7..8e479679ec1 100644
--- a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
+++ b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
@@ -1612,6 +1612,14 @@ Alphabetical List of All Switches
   Save result of preprocessing in a text file.
 
 
+.. index:: -gnateH  (gcc)
+
+:switch:`-gnateH`
+  Set the threshold from which the RM 13.5.1(13.3/2) clause applies to 64.
+  This is useful only on 64-bit plaforms where this threshold is 128, but
+  used to be 64 in earlier versions of the compiler.
+
+
 .. index:: -gnatei  (gcc)
 
 :switch:`-gnatei{nnn}`
diff --git a/gcc/ada/gnat_ugn.texi b/gcc/ada/gnat_ugn.texi
index 88123df4332..021c2672bae 100644
--- a/gcc/ada/gnat_ugn.texi
+++ b/gcc/ada/gnat_ugn.texi
@@ -19,7 +19,7 @@
 
 @copying
 @quotation
-GNAT User's Guide for Native Platforms , Jun 01, 2023
+GNAT User's Guide for Native Platforms , Jun 16, 2023
 
 AdaCore
 
@@ -9075,6 +9075,18 @@ information.
 Save result of preprocessing in a text file.
 @end table
 
+@geindex -gnateH (gcc)
+
+
+@table @asis
+
+@item @code{-gnateH}
+
+Set the threshold from which the RM 13.5.1(13.3/2) clause applies to 64.
+This is useful only on 64-bit plaforms where this threshold is 128, but
+used to be 64 in earlier versions of the compiler.
+@end table
+
 @geindex -gnatei (gcc)
 
 
diff --git a/gcc/ada/opt.ads b/gcc/ada/opt.ads
index bcafba9e57d..87399c8a9d3 100644
--- a/gcc/ada/opt.ads
+++ b/gcc/ada/opt.ads
@@ -1342,6 +1342,11 @@ package Opt is
--  GNATPREP
--  Set to True if -C switch used.
 
+   Reverse_Bit_Order_Threshold : Int := -1;
+   --  GNAT
+   --  Set to the threshold from which the RM 13.5.1(13.3/2) clause applies,
+   --  or -1 if the size of the largest machine scalar is to be used.
+
RTS_Lib_Path_Name : String_Ptr := null;
RTS_Src_Path_Name : String_Ptr := null;
--  GNAT
diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb
index 65627321ffe..c3ea8d63566 100644
--- a/gcc/ada/sem_ch13.adb
+++ b/gcc/ada/sem_ch13.adb
@@ -426,7 +426,9 @@ package body Sem_Ch13 is
 
procedure Adjust_Record_For_Reverse_Bit_Order (R : Entity_Id) is
   Max_Machine_Scalar_Size : constant Uint :=
-  UI_From_Int (System_Max_Integer_Size);
+UI_From_Int (if Reverse_Bit_Order_Threshold >= 0
+ then Reverse_Bit_Order_Threshold
+ else System_Max_Integer_Size);
   --  We use this as the maximum machine scalar size
 
   SSU : constant Uint := UI_From_Int (System_Storage_Unit);
diff --git a/gcc/ada/switch-c.adb b/gcc/ada/switch-c.adb
index f6207e42f62..536903b 100644
--- a/gcc/ada/switch-c.adb
+++ b/gcc/ada/switch-c.adb
@@ -635,6 +635,12 @@ package body Switch.C is
  Generate_Processed_File := True;
  Ptr := Ptr + 1;
 
+  --  -gnateH (set reverse Bit_Order threshold to 64)
+
+  when 'H' =>
+ Reverse_Bit_Order_Threshold := 64;
+ Ptr := Ptr + 1;
+
   --  -gnatei (max number of instantiations)
 
   when 'i' =>
diff --git a/gcc/ada/usage.adb b/gcc/ada/usage.adb
index 58cfa786efa..681ece5d921 100644
--- a/gcc/ada/usage.adb
+++ b/gcc/ada/usage.adb
@@ -199,6 +199,11 @@ begin
Write_Switch_Char ("eG");
Write_Line ("Generate preprocessed source");
 
+   --  Line for -gnateH switch
+
+   Write_Switch_Char ("eH");
+   Write_Line ("Set reverse Bit_Order threshold to 64");
+
--  Line for -gnatei switch
 
Write_Switch_Char ("einn");
-- 
2.40.0



RE: [PATCH] RISC-V: Fix out of range memory access of machine mode table

2023-06-20 Thread Li, Pan2 via Gcc-patches
Hi Jakub,

Thanks for reviewing but I am not quite sure if I fully understand how to fix 
this issue. Could you please help to enlighten me more about this ?

Currently for RISC-V, the memset has touched out of range memory already due to 
MAX_MACHINE_MODE > 256. And we may have below parts require adjusting.

1. streamer_mode_table.
2.  bp_unpack_machine_mode/bp_pack_machine_mode 
3.  bp_pack_value/bp_unpack_value in lto_write_mode_table.
4. unsigned char *table = ggc_cleared_vec_alloc (1 << 8) in 
lto_input_mode_table.

For 1. is safe to extend the size to MAX_MACHINE_MODE as the array only used as 
Boolean, aka streamer_mode_table[XXXmode] = 1.
For 2 & 3. Keep 1 << 8 as is, or stream out the host MAX_MACHINE_MODE value 
somewhere for underlying consuming?
For 4, one possible approach is that extend unsigned char to unsigned short, as 
well as 256 to MAX_MACHINE_MODE. Because it stores the actually machine mode in 
array.

Pan

-Original Message-
From: Li, Pan2 
Sent: Monday, June 19, 2023 9:36 PM
To: Jakub Jelinek 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; rdapp@gmail.com; 
jeffreya...@gmail.com; Wang, Yanzhang ; 
kito.ch...@gmail.com; rguent...@suse.de
Subject: RE: [PATCH] RISC-V: Fix out of range memory access of machine mode 
table

Thanks Jakub for reviewing, sorry for misleading and will have a try for PATCH 
v3.

Pan

-Original Message-
From: Jakub Jelinek  
Sent: Monday, June 19, 2023 5:17 PM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; rdapp@gmail.com; 
jeffreya...@gmail.com; Wang, Yanzhang ; 
kito.ch...@gmail.com; rguent...@suse.de
Subject: Re: [PATCH] RISC-V: Fix out of range memory access of machine mode 
table

On Mon, Jun 19, 2023 at 05:05:48PM +0800, pan2...@intel.com wrote:
> --- a/gcc/lto-streamer-in.cc
> +++ b/gcc/lto-streamer-in.cc
> @@ -1985,7 +1985,8 @@ lto_input_mode_table (struct lto_file_decl_data 
> *file_data)
>  internal_error ("cannot read LTO mode table from %s",
>   file_data->file_name);
>  
> -  unsigned char *table = ggc_cleared_vec_alloc (1 << 8);
> +  unsigned char *table = ggc_cleared_vec_alloc (
> +MAX_MACHINE_MODE);

Incorrect formatting.  And, see my other mail, this is wrong.

> @@ -108,7 +108,7 @@ inline void
>  bp_pack_machine_mode (struct bitpack_d *bp, machine_mode mode)
>  {
>streamer_mode_table[mode] = 1;
> -  bp_pack_enum (bp, machine_mode, 1 << 8, mode);
> +  bp_pack_enum (bp, machine_mode, MAX_MACHINE_MODE, mode);
>  }
>  
>  inline machine_mode
> @@ -116,7 +116,8 @@ bp_unpack_machine_mode (struct bitpack_d *bp)
>  {
>return (machine_mode)
>  ((class lto_input_block *)
> - bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
> + bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode,
> + MAX_MACHINE_MODE)];
>  }

And these two are wrong as well.  The value passed to bp_pack_enum
has to match the one used on bp_unpack_enum.  But that is not the case
after your changes.  You stream out with the host MAX_MACHINE_MODE, and
stream in for normal LTO with the same value (ok), but for offloading
targets (nvptx, amdgcn) with a different MAX_MACHINE_MODE.  That will
immediate result in LTO streaming being out of sync and ICEs all around.
The reason for using 1 << 8 there was exactly to make it interoperable for
offloading.  What could be perhaps done is that you stream out the
host MAX_MACHINE_MODE value somewhere and stream it in inside of
lto_input_mode_table before you allocate the table.  But, that streamed
in host max_machine_mdoe has to be remembered somewhere and used e.g. in
bp_unpack_machine_mode instead of MAX_MACHINE_MODE.

Jakub



[COMMITTED] ada: Fix for quantified expressions in Exceptional_Cases

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

When detecting illegal uses of formal parameters of the current
subprogram in contract of its Exceptional_Cases, we relied on the
Current_Scope. However, quantified expressions introduce an implicit
scope, which we need to take into account.

gcc/ada/

* sem_res.adb (Resolve_Entity_Name): Ignore implicit loop scopes
introduced by quantified expressions.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_res.adb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb
index 266cf8e559e..6867c8f1275 100644
--- a/gcc/ada/sem_res.adb
+++ b/gcc/ada/sem_res.adb
@@ -8116,7 +8116,7 @@ package body Sem_Res is
 --  data from the object.
 
 if Ekind (E) in E_Out_Parameter | E_In_Out_Parameter
-  and then Scope (E) = Current_Scope
+  and then Scope (E) = Current_Scope_No_Loops
   and then Within_Exceptional_Cases_Consequence (N)
   and then not In_Attribute_Old (N)
   and then not (Nkind (Parent (N)) = N_Attribute_Reference
-- 
2.40.0



Re: [libstdc++] Improve M_check_len

2023-06-20 Thread Jan Hubicka via Gcc-patches
> >
> >   size_type
> >   _M_check_len(size_type __n, const char* __s) const
> >   {
> > const size_type __size = size();
> > const size_type __max_size = max_size();
> >
> > if (__is_same(allocator_type, allocator<_Tp>)
> >   && __size > __max_size / 2)
> >
> 
> This check is wrong for C++17 and older standards, because max_size()
> changed value in C++20.
> 
> In C++17 it was PTRDIFF_MAX / sizeof(T) but in C++20 it's SIZE_MAX /
> sizeof(T). So on 32-bit targets using C++17, it's possible a std::vector
> could use PTRDIFF_MAX/2 bytes, and then the size <= max_size/2 assumption
> would not hold.

Can we go with this perhaps only for 64bit targets?
I am not sure how completely safe this idea is in 32bit world: I guess
one can have OS that lets you to allocate half of address space as one
allocation.

Thanks!
Honza


[COMMITTED] ada: Add the ability to add error codes to error messages

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Yannick Moy 

Add a new character sequence [] for error codes in error messages
handled by Error_Msg procedures, to use for SPARK-related errors.
Display of additional information on the error or warning based on
the error code is delegated to GNATprove.

gcc/ada/

* err_vars.ads (Error_Msg_Code): New variable for error codes.
* errout.adb (Error_Msg_Internal): Display continuation message
when an error code was present.
(Set_Msg_Text): Handle character sequence [] for error codes.
* errout.ads: Document new insertion sequence [].
(Error_Msg_Code): New renaming.
* erroutc.adb (Prescan_Message): Detect presence of error code.
(Set_Msg_Insertion_Code): Handle new insertion sequence [].
* erroutc.ads (Has_Error_Code): New variable for prescan.
(Set_Msg_Insertion_Code): Handle new insertion sequence [].
* contracts.adb (Check_Type_Or_Object_External_Properties):
Replace reference to SPARK RM section by an error code.
* sem_elab.adb (SPARK_Processor): Same.
* sem_prag.adb (Check_Missing_Part_Of): Same.
* sem_res.adb (Resolve_Actuals, Resolve_Entity_Name): Same.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/contracts.adb |  5 +++--
 gcc/ada/err_vars.ads  |  5 +
 gcc/ada/errout.adb| 48 ---
 gcc/ada/errout.ads| 24 ++
 gcc/ada/erroutc.adb   | 46 +
 gcc/ada/erroutc.ads   | 10 +
 gcc/ada/sem_elab.adb  |  3 ++-
 gcc/ada/sem_prag.adb  |  5 +++--
 gcc/ada/sem_res.adb   |  9 
 9 files changed, 134 insertions(+), 21 deletions(-)

diff --git a/gcc/ada/contracts.adb b/gcc/ada/contracts.adb
index 26bc4b39735..77578dacc18 100644
--- a/gcc/ada/contracts.adb
+++ b/gcc/ada/contracts.adb
@@ -1040,11 +1040,12 @@ package body Contracts is
 --  appear at the library level (SPARK RM 7.1.3(3), C.6(6)).
 
 if not Is_Library_Level_Entity (Type_Or_Obj_Id) then
+   Error_Msg_Code := GEC_Volatile_At_Library_Level;
Error_Msg_N
  ("effectively volatile "
 & Decl_Kind
-& " & must be declared at library level "
-& "(SPARK RM 7.1.3(3))", Type_Or_Obj_Id);
+& " & must be declared at library level '[[]']",
+Type_Or_Obj_Id);
 
 --  An object of a discriminated type cannot be effectively
 --  volatile except for protected objects (SPARK RM 7.1.3(5)).
diff --git a/gcc/ada/err_vars.ads b/gcc/ada/err_vars.ads
index e73e9fb295a..e84efb65575 100644
--- a/gcc/ada/err_vars.ads
+++ b/gcc/ada/err_vars.ads
@@ -100,6 +100,11 @@ package Err_Vars is
Error_Msg_Uint_2 : Uint := No_Uint;
--  Uint values for ^ insertion characters in message
 
+   Error_Msg_Code_Digits : constant := 4;
+   Error_Msg_Code : Nat range 0 .. 10 ** Error_Msg_Code_Digits - 1;
+   --  Nat value for [] insertion sequence in message, where a value of zero
+   --  indicates the absence of an error code.
+
--  WARNING: There is a matching C declaration of these variables in fe.h
 
Error_Msg_Sloc : Source_Ptr;
diff --git a/gcc/ada/errout.adb b/gcc/ada/errout.adb
index 6e378a60731..adc260843ec 100644
--- a/gcc/ada/errout.adb
+++ b/gcc/ada/errout.adb
@@ -1447,6 +1447,22 @@ package body Errout is
 raise Unrecoverable_Error;
  end if;
   end if;
+
+  if Has_Error_Code then
+ declare
+Msg : constant String :=
+  "launch ""gnatprove --explain=[]"" for more information";
+ begin
+Prescan_Message (Msg);
+Has_Error_Code := False;
+Error_Msg_Internal
+  (Msg  => Msg,
+   Span => Span,
+   Opan => Opan,
+   Msg_Cont => True,
+   Node => Node);
+ end;
+  end if;
end Error_Msg_Internal;
 
-
@@ -4338,21 +4354,29 @@ package body Errout is
 
 when '[' =>
 
-   --  Switch the message from a warning to an error if the flag
-   --  -gnatwE is specified to treat run-time exception warnings
-   --  as errors.
+   --  "[]" (insertion of error code)
 
-   if Is_Warning_Msg
- and then Warning_Mode = Treat_Run_Time_Warnings_As_Errors
-   then
-  Is_Warning_Msg   := False;
-  Is_Runtime_Raise := True;
-   end if;
+   if P <= Text'Last and then Text (P) = ']' then
+  P := P + 1;
+  Set_Msg_Insertion_Code;
 
-   if Is_Warning_Msg then
-  Set_Msg_Str ("will be raised at run time");
else
-  Set_Msg_Str ("would have been raised at run time");
+  --  Switch the message from a warni

[COMMITTED] ada: Do not issue warning on postcondition in some cases

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Yannick Moy 

Warning on suspicious postcondition is not relevant if contract
Exceptional_Cases is present, or if contract Always_Terminates is
present with a non-statically True value, as in those cases the
postcondition can be used to indicate constraints on those pre-state
for which the subprogram might terminate normally.

gcc/ada/

* sem_util.adb (Check_Result_And_Post_State): Do not warn in cases
where the warning could be spurious.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_util.adb | 32 
 1 file changed, 32 insertions(+)

diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
index 3a64047d45c..1729a2addd8 100644
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -4566,6 +4566,38 @@ package body Sem_Util is
 
   elsif No (Items) then
  return;
+
+  --  If the subprogram has a contract Exceptional_Cases, it is often
+  --  useful to refer only to the pre-state in the postcondition, to
+  --  indicate when the subprogram might terminate normally.
+
+  elsif Present (Get_Pragma (Subp_Id, Pragma_Exceptional_Cases)) then
+ return;
+
+  --  Same if the subprogram has a contract Always_Terminates => Cond,
+  --  where Cond is not syntactically True.
+
+  else
+ declare
+Prag : constant Node_Id :=
+  Get_Pragma (Subp_Id, Pragma_Always_Terminates);
+ begin
+if Present (Prag)
+  and then Present (Pragma_Argument_Associations (Prag))
+then
+   declare
+  Cond : constant Node_Id :=
+Get_Pragma_Arg
+  (First (Pragma_Argument_Associations (Prag)));
+   begin
+  if not Compile_Time_Known_Value (Cond)
+or else not Is_True (Expr_Value (Cond))
+  then
+ return;
+  end if;
+   end;
+end if;
+ end;
   end if;
 
   --  Examine all postconditions for attribute 'Result and a post-state
-- 
2.40.0



[COMMITTED] ada: Fix for attribute Range in Exceptional_Cases

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

Attribute Range is now handled like First and Last when occurring within
the consequence of Exceptional_Cases, i.e. attribute Range is not
considered to be a read of a formal parameter that would not be allowed
in the contract.

gcc/ada/

* sem_res.adb (Resolve_Entity_Name): Handle Range like First and Last.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_res.adb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb
index 6867c8f1275..ef3b877f5db 100644
--- a/gcc/ada/sem_res.adb
+++ b/gcc/ada/sem_res.adb
@@ -8124,7 +8124,8 @@ package body Sem_Res is
 Attribute_Name (Parent (N)) in Name_Constrained
  | Name_First
  | Name_Last
- | Name_Length)
+ | Name_Length
+ | Name_Range)
   and then not Is_By_Reference_Type (Etype (E))
   and then not Is_Aliased (E)
 then
-- 
2.40.0



[PATCH][committed]AArch64 remove test comment from *mov_aarch64

2023-06-20 Thread Tamar Christina via Gcc-patches
Hi All,

I accidentally left a test comment in the final version of the patch.
This removes the comment.

Regtested on aarch64-none-linux-gnu and no issues.

Committed under the obvious rule.

Thanks,
Tamar

gcc/ChangeLog:

* config/aarch64/aarch64.md (*mov_aarch64): Drop test comment.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
97e5a59d30ca17c26c0951e9a7c62a7934af8d4f..25f7905c6a0b3b7dd860a1346b8e1d4e1b04164b
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1224,7 +1224,7 @@ (define_insn "*mov_aarch64"
  [m, r Z  ; store_4, * ] str\\t%w1, %0
  [m, w; store_4, * ] str\t%1, %0
  [r, w; neon_to_gp  , simd  ] umov\t%w0, %1.[0]
- [r, w; neon_to_gp  , nosimd] fmov\t%w0, %s1 /*foo */
+ [r, w; neon_to_gp  , nosimd] fmov\t%w0, %s1
  [w, r Z  ; neon_from_gp, simd  ] dup\t%0., %w1
  [w, r Z  ; neon_from_gp, nosimd] fmov\t%s0, %w1
  [w, w; neon_dup   , simd  ] dup\t%0, %1.[0]




-- 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
97e5a59d30ca17c26c0951e9a7c62a7934af8d4f..25f7905c6a0b3b7dd860a1346b8e1d4e1b04164b
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1224,7 +1224,7 @@ (define_insn "*mov_aarch64"
  [m, r Z  ; store_4, * ] str\\t%w1, %0
  [m, w; store_4, * ] str\t%1, %0
  [r, w; neon_to_gp  , simd  ] umov\t%w0, %1.[0]
- [r, w; neon_to_gp  , nosimd] fmov\t%w0, %s1 /*foo */
+ [r, w; neon_to_gp  , nosimd] fmov\t%w0, %s1
  [w, r Z  ; neon_from_gp, simd  ] dup\t%0., %w1
  [w, r Z  ; neon_from_gp, nosimd] fmov\t%s0, %w1
  [w, w; neon_dup   , simd  ] dup\t%0, %1.[0]





[COMMITTED] ada: Fix couple of issues in documentation of overflow checking

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

There is still a mention of the defunct CHECKED mode and the Default
Settings paragraph is confusing with regard to the -gnato switch.

gcc/ada/

* doc/gnat_ugn/gnat_and_program_execution.rst (Overflows in GNAT)
: Remove obsolete paragraph about -gnato.
: Replace CHECKED with STRICT.
* gnat_ugn.texi: Regenerate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 .../gnat_ugn/gnat_and_program_execution.rst   | 23 ++
 gcc/ada/gnat_ugn.texi | 30 +++
 2 files changed, 7 insertions(+), 46 deletions(-)

diff --git a/gcc/ada/doc/gnat_ugn/gnat_and_program_execution.rst 
b/gcc/ada/doc/gnat_ugn/gnat_and_program_execution.rst
index 9eb6b1c60aa..62abca24f41 100644
--- a/gcc/ada/doc/gnat_ugn/gnat_and_program_execution.rst
+++ b/gcc/ada/doc/gnat_ugn/gnat_and_program_execution.rst
@@ -2925,25 +2925,8 @@ The default mode for overflow checks is
 
   General => Strict
 
-which causes all computations both inside and outside assertions to use
-the base type.
-
-This retains compatibility with previous versions of
-GNAT which suppressed overflow checks by default and always
-used the base type for computation of intermediate results.
-
-.. Sphinx allows no emphasis within :index: role. As a workaround we
-   point the index to "switch" and use emphasis for "-gnato".
-
-The :index:`switch <-gnato (gcc)>` :switch:`-gnato` (with no digits following)
-is equivalent to
-
-  ::
-
-  General => Strict
-
-which causes overflow checking of all intermediate overflows
-both inside and outside assertions against the base type.
+which causes all computations both inside and outside assertions to use the
+base type, and is equivalent to :switch:`-gnato` (with no digits following).
 
 The pragma ``Suppress (Overflow_Check)`` disables overflow
 checking, but it has no effect on the method used for computing
@@ -2964,7 +2947,7 @@ reasonably efficient, and can be generally used. It also 
helps
 to ensure compatibility with code imported from some other
 compiler to GNAT.
 
-Setting all intermediate overflows checking (``CHECKED`` mode)
+Setting all intermediate overflows checking (``STRICT`` mode)
 makes sense if you want to
 make sure that your code is compatible with any other possible
 Ada implementation. This may be useful in ensuring portability
diff --git a/gcc/ada/gnat_ugn.texi b/gcc/ada/gnat_ugn.texi
index a63e1d428c3..b85711b1b5d 100644
--- a/gcc/ada/gnat_ugn.texi
+++ b/gcc/ada/gnat_ugn.texi
@@ -21644,30 +21644,8 @@ General => Strict
 @end example
 @end quotation
 
-which causes all computations both inside and outside assertions to use
-the base type.
-
-This retains compatibility with previous versions of
-GNAT which suppressed overflow checks by default and always
-used the base type for computation of intermediate results.
-
-@c Sphinx allows no emphasis within :index: role. As a workaround we
-@c point the index to "switch" and use emphasis for "-gnato".
-
-The 
-@geindex -gnato (gcc)
-switch @code{-gnato} (with no digits following)
-is equivalent to
-
-@quotation
-
-@example
-General => Strict
-@end example
-@end quotation
-
-which causes overflow checking of all intermediate overflows
-both inside and outside assertions against the base type.
+which causes all computations both inside and outside assertions to use the
+base type, and is equivalent to @code{-gnato} (with no digits following).
 
 The pragma @code{Suppress (Overflow_Check)} disables overflow
 checking, but it has no effect on the method used for computing
@@ -21687,7 +21665,7 @@ reasonably efficient, and can be generally used. It 
also helps
 to ensure compatibility with code imported from some other
 compiler to GNAT.
 
-Setting all intermediate overflows checking (@code{CHECKED} mode)
+Setting all intermediate overflows checking (@code{STRICT} mode)
 makes sense if you want to
 make sure that your code is compatible with any other possible
 Ada implementation. This may be useful in ensuring portability
@@ -29550,8 +29528,8 @@ to permit their use in free software.
 
 @printindex ge
 
-@anchor{gnat_ugn/gnat_utility_programs switches-related-to-project-files}@w{   
   }
 @anchor{d1}@w{  }
+@anchor{gnat_ugn/gnat_utility_programs switches-related-to-project-files}@w{   
   }
 
 @c %**end of body
 @bye
-- 
2.40.0



Re: [PATCH] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread Robin Dapp via Gcc-patches
Hi Juzhe,

> Case 1:
> void
> f (uint8_t *restrict a, uint8_t *restrict b)
> {
>   for (int i = 0; i < 100; ++i)
> {
>   a[i * 8] = b[i * 8 + 37] + 1;
>   a[i * 8 + 1] = b[i * 8 + 37] + 2;
>   a[i * 8 + 2] = b[i * 8 + 37] + 3;
>   a[i * 8 + 3] = b[i * 8 + 37] + 4;
>   a[i * 8 + 4] = b[i * 8 + 37] + 5;
>   a[i * 8 + 5] = b[i * 8 + 37] + 6;
>   a[i * 8 + 6] = b[i * 8 + 37] + 7;
>   a[i * 8 + 7] = b[i * 8 + 37] + 8;
> }
> }
> 
> We need to generate the stepped vector:
> NPATTERNS = 8.
> { 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8 }
> 
> Before this patch:
> vid.vv4 ;; {0,1,2,3,4,5,6,7,...}
> vsrl.vi  v4,v4,3;; {0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,...}
> li   a3,8   ;; {8}
> vmul.vx  v4,v4,a3   ;; {0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,...}
> 
> After this patch:
> vid.vv4;; {0,1,2,3,4,5,6,7,...}
> vand.vi  v4,v4,-8(-NPATTERNS)  ;; {0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,...}

This is a nice improvement.  Even though we're in the SLP realm I would
still add an assert that documents that we're indeed operating with
pow2_p (NPATTERNS) and some comment as to why we can use AND.
Sure we're doing exact_log2 et al later anyway, just to make things
clearer.

> Before this patch:
> li   a6,134221824
> slli a6,a6,5
> addi a6,a6,3;; 64-bit: 0x000300020001
> vmv.v.x  v6,a6  ;; {3, 2, 1, 0, ... }
> vid.vv4 ;; {0, 1, 2, 3, 4, 5, 6, 7, ... }
> vsrl.vi  v4,v4,2;; {0, 0, 0, 0, 1, 1, 1, 1, ... }
> li   a3,4   ;; {4}
> vmul.vx  v4,v4,a3   ;; {0, 0, 0, 0, 4, 4, 4, 4, ... }
> vadd.vv  v4,v4,v6   ;; {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 
> 12, ... }
> 
> After this patch:
> lia3,-536875008
> slli  a3,a3,4
> addi  a3,a3,1
> slli  a3,a3,16
> vmv.v.x   v2,a3   ;; {3, 1, -1, -3, ... }
> vid.v v4  ;; {0, 1, 2, 3, 4, 5, 6, 7, ... }
> vadd.vv   v4,v4,v2;; {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 
> 14, 13, 12, ... }

My immediate idea would have been to fall back to the first
approach, i.e. create the "0x00030002..." constant

> li   a6,134221824
> slli a6,a6,5
> addi a6,a6,3;; 64-bit: 0x000300020001
> vmv.v.x  v6,a6  ;; {3, 2, 1, 0, ... }

and then
  vid.v v4
  vand.vi v4, v4, -4
  vadd.vv v4, v4, v6

It's one more vector instruction though so possibly worse from a latency
standpoint.

Rest looks good to me.

Regards
 Robin


[PATCH] RISC-V: Fix vmul test expectation.

2023-06-20 Thread Robin Dapp via Gcc-patches
Hi,

I forgot to check for vfmul in the multiplication tests.  Fix this.

Regards
 Robin

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c: Check for
vfmul.
* gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c: Dito.
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c | 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c
index 1900c21121b..7d3dfade0ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c
@@ -4,3 +4,4 @@
 #include "vmul-template.h"
 
 /* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c
index c8508bcc1f7..a549d6f7be4 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c
@@ -4,3 +4,4 @@
 #include "vmul-template.h"
 
 /* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
-- 
2.40.1


Re: [PATCH] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread Robin Dapp via Gcc-patches
> This is a nice improvement.  Even though we're in the SLP realm I would
> still add an assert that documents that we're indeed operating with
> pow2_p (NPATTERNS) and some comment as to why we can use AND.
> Sure we're doing exact_log2 et al later anyway, just to make things
> clearer.

Actually no assert necessary, just a comment like:

/* As NPATTERNS is always a power of two, we can ..."  */ 

Regards
 Robin



Re: [PATCH] RISC-V: Fix out of range memory access of machine mode table

2023-06-20 Thread Jakub Jelinek via Gcc-patches
On Tue, Jun 20, 2023 at 07:50:00AM +, Li, Pan2 wrote:
> Hi Jakub,
> 
> Thanks for reviewing but I am not quite sure if I fully understand how to fix 
> this issue. Could you please help to enlighten me more about this ?
> 
> Currently for RISC-V, the memset has touched out of range memory already due 
> to MAX_MACHINE_MODE > 256. And we may have below parts require adjusting.
> 
> 1. streamer_mode_table.
> 2.  bp_unpack_machine_mode/bp_pack_machine_mode 
> 3.  bp_pack_value/bp_unpack_value in lto_write_mode_table.
> 4. unsigned char *table = ggc_cleared_vec_alloc (1 << 8) in 
> lto_input_mode_table.
> 
> For 1. is safe to extend the size to MAX_MACHINE_MODE as the array only used 
> as Boolean, aka streamer_mode_table[XXXmode] = 1.

Because the array is used only during stream out, that is safe.

> For 2 & 3. Keep 1 << 8 as is, or stream out the host MAX_MACHINE_MODE value 
> somewhere for underlying consuming?

You can't keep 1 << 8, otherwise you won't stream all the bits.
I think you want to use 1 << ceil_log2 (MAX_MACHINE_MODE) on the stream out
side, stream that ceil_log2 (MAX_MACHINE_MODE) value somewhere at the start
of the mode table, add some field next to mode_table in lto_input_block
which will contain that value (and make sure to initialize it to
ceil_log2 (MAX_MACHINE_MODE) in case mode table isn't streamed in and use
1 << ...->mode_bits in e.g. bp_unpack_machine_mode
Or for cases where 8 was used before use ceil_log2 (MAX_MACHINE_MODE)
or mode_bits.

> For 4, one possible approach is that extend unsigned char to unsigned short, 
> as well as 256 to MAX_MACHINE_MODE. Because it stores the actually machine 
> mode in array.

The 1 << 8 needs to be similarly 1 << ...->mode_bits or ...->num_modes (that
is also streamed out and in), it is sized by the host number of modes.
Whether it is unsigned char or unsigned short array depends on if we
want to support offloading targets with > 256 modes.  If yes, it needs to
be unsigned short, if not, we should add an assertion (e.g. on streaming
in the LTO table) that MAX_MACHINE_MODE <= 256.

Jakub



Re: [libstdc++] Improve M_check_len

2023-06-20 Thread Jan Hubicka via Gcc-patches
> > >
> > >   size_type
> > >   _M_check_len(size_type __n, const char* __s) const
> > >   {
> > > const size_type __size = size();
> > > const size_type __max_size = max_size();
> > >
> > > if (__is_same(allocator_type, allocator<_Tp>)
> > >   && __size > __max_size / 2)
> > >
> > 
> > This check is wrong for C++17 and older standards, because max_size()
> > changed value in C++20.
> > 
> > In C++17 it was PTRDIFF_MAX / sizeof(T) but in C++20 it's SIZE_MAX /
> > sizeof(T). So on 32-bit targets using C++17, it's possible a std::vector
> > could use PTRDIFF_MAX/2 bytes, and then the size <= max_size/2 assumption
> > would not hold.
> 
> Can we go with this perhaps only for 64bit targets?
> I am not sure how completely safe this idea is in 32bit world: I guess
> one can have OS that lets you to allocate half of address space as one
> allocation.

Perhaps something like:
  size > std::min ((uint64_t)__max_size, ((uint64_t)1 << 62) / sizeof (_Tp))
is safe for all allocators and 32bit, so we won't need __is_same test
and test for 64bit?

Honza
> 
> Thanks!
> Honza


Re: Re: [PATCH] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread juzhe.zh...@rivai.ai
Hi, Robin.

>> Actually no assert necessary, just a comment like:
>> /* As NPATTERNS is always a power of two, we can ..."  */
Ok.

>> My immediate idea would have been to fall back to the first
>> approach, i.e. create the "0x00030002..." constant
>>and then
>>  vid.v v4
  vand.vi v4, v4, -4
>>  vadd.vv v4, v4, v6

>>It's one more vector instruction though so possibly worse from a latency
>>standpoint.

>>Rest looks good to me.

This solution I have tried and talked with my colleague deeply, turns out is 
very obvious consume 1 more vector insn.
You may argue that this patch needs 1 more scalar insn (slli) which is relative 
cheaper than vector insn.
I prefer this patch solution.

Address comments. I have add comments to send V2. 
Thanks.


juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-06-20 15:55
To: Juzhe-Zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Optimize codegen of VLA SLP
Hi Juzhe,
 
> Case 1:
> void
> f (uint8_t *restrict a, uint8_t *restrict b)
> {
>   for (int i = 0; i < 100; ++i)
> {
>   a[i * 8] = b[i * 8 + 37] + 1;
>   a[i * 8 + 1] = b[i * 8 + 37] + 2;
>   a[i * 8 + 2] = b[i * 8 + 37] + 3;
>   a[i * 8 + 3] = b[i * 8 + 37] + 4;
>   a[i * 8 + 4] = b[i * 8 + 37] + 5;
>   a[i * 8 + 5] = b[i * 8 + 37] + 6;
>   a[i * 8 + 6] = b[i * 8 + 37] + 7;
>   a[i * 8 + 7] = b[i * 8 + 37] + 8;
> }
> }
> 
> We need to generate the stepped vector:
> NPATTERNS = 8.
> { 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8 }
> 
> Before this patch:
> vid.vv4 ;; {0,1,2,3,4,5,6,7,...}
> vsrl.vi  v4,v4,3;; {0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,...}
> li   a3,8   ;; {8}
> vmul.vx  v4,v4,a3   ;; {0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,...}
> 
> After this patch:
> vid.vv4;; {0,1,2,3,4,5,6,7,...}
> vand.vi  v4,v4,-8(-NPATTERNS)  ;; {0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,...}
 
This is a nice improvement.  Even though we're in the SLP realm I would
still add an assert that documents that we're indeed operating with
pow2_p (NPATTERNS) and some comment as to why we can use AND.
Sure we're doing exact_log2 et al later anyway, just to make things
clearer.
 
> Before this patch:
> li   a6,134221824
> slli a6,a6,5
> addi a6,a6,3;; 64-bit: 0x000300020001
> vmv.v.x  v6,a6  ;; {3, 2, 1, 0, ... }
> vid.vv4 ;; {0, 1, 2, 3, 4, 5, 6, 7, ... }
> vsrl.vi  v4,v4,2;; {0, 0, 0, 0, 1, 1, 1, 1, ... }
> li   a3,4   ;; {4}
> vmul.vx  v4,v4,a3   ;; {0, 0, 0, 0, 4, 4, 4, 4, ... }
> vadd.vv  v4,v4,v6   ;; {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 
> 12, ... }
> 
> After this patch:
> li a3,-536875008
> slli a3,a3,4
> addi a3,a3,1
> slli a3,a3,16
> vmv.v.x v2,a3   ;; {3, 1, -1, -3, ... }
> vid.v v4  ;; {0, 1, 2, 3, 4, 5, 6, 7, ... }
> vadd.vv v4,v4,v2;; {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 
> 12, ... }
 
My immediate idea would have been to fall back to the first
approach, i.e. create the "0x00030002..." constant
 
> li   a6,134221824
> slli a6,a6,5
> addi a6,a6,3;; 64-bit: 0x000300020001
> vmv.v.x  v6,a6  ;; {3, 2, 1, 0, ... }
 
and then
  vid.v v4
  vand.vi v4, v4, -4
  vadd.vv v4, v4, v6
 
It's one more vector instruction though so possibly worse from a latency
standpoint.
 
Rest looks good to me.
 
Regards
Robin
 


Re: [libstdc++] Improve M_check_len

2023-06-20 Thread Jakub Jelinek via Gcc-patches
On Tue, Jun 20, 2023 at 09:50:25AM +0200, Jan Hubicka wrote:
> > >
> > >   size_type
> > >   _M_check_len(size_type __n, const char* __s) const
> > >   {
> > > const size_type __size = size();
> > > const size_type __max_size = max_size();
> > >
> > > if (__is_same(allocator_type, allocator<_Tp>)
> > >   && __size > __max_size / 2)
> > >
> > 
> > This check is wrong for C++17 and older standards, because max_size()
> > changed value in C++20.
> > 
> > In C++17 it was PTRDIFF_MAX / sizeof(T) but in C++20 it's SIZE_MAX /
> > sizeof(T). So on 32-bit targets using C++17, it's possible a std::vector
> > could use PTRDIFF_MAX/2 bytes, and then the size <= max_size/2 assumption
> > would not hold.
> 
> Can we go with this perhaps only for 64bit targets?
> I am not sure how completely safe this idea is in 32bit world: I guess
> one can have OS that lets you to allocate half of address space as one
> allocation.

Is it safe even on 64bit targets?  I mean, doesn't say PowerPC already allow
full 64-bit virtual address space?  The assumption that one can't have
more than half of virtual address space allocations is true right now at
least on x86-64, aarch64 and others, but isn't that something that can
change with newer versions of CPUs without the need to recompile
applications (add another level or two of page tables)?
By being hardcoded in libstdc++ headers those assumptions will be hardcoded
in lots of applications.

Jakub



Re: [PATCH] RISC-V: Fix vmul test expectation.

2023-06-20 Thread juzhe.zh...@rivai.ai
Ok. No need to wait just commit.
Thanks.



juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-06-20 15:58
To: gcc-patches; palmer; Kito Cheng; juzhe.zh...@rivai.ai; jeffreyalaw
CC: rdapp.gcc
Subject: [PATCH] RISC-V: Fix vmul test expectation.
Hi,
 
I forgot to check for vfmul in the multiplication tests.  Fix this.
 
Regards
Robin
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c: Check for
vfmul.
* gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c: Dito.
---
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c | 1 +
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c | 1 +
2 files changed, 2 insertions(+)
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c
index 1900c21121b..7d3dfade0ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c
@@ -4,3 +4,4 @@
#include "vmul-template.h"
/* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c
index c8508bcc1f7..a549d6f7be4 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c
@@ -4,3 +4,4 @@
#include "vmul-template.h"
/* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
-- 
2.40.1
 


Re: Re: [PATCH] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread juzhe.zh...@rivai.ai
Hi, Robin. Can you give me more details of the comments:

What about:

  /* As NPATTERNS is always a power of two, we can optimize codegen of
 VLA const vector according to this feature.  */

Is this comment Ok ? You know I am always struggle with adding an English 
comment or description.

Thanks.


juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-06-20 16:03
To: Juzhe-Zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Optimize codegen of VLA SLP
> This is a nice improvement.  Even though we're in the SLP realm I would
> still add an assert that documents that we're indeed operating with
> pow2_p (NPATTERNS) and some comment as to why we can use AND.
> Sure we're doing exact_log2 et al later anyway, just to make things
> clearer.
 
Actually no assert necessary, just a comment like:
 
/* As NPATTERNS is always a power of two, we can ..."  */ 
 
Regards
Robin
 
 


Re: [libstdc++] Improve M_check_len

2023-06-20 Thread Andreas Schwab via Gcc-patches
On Jun 20 2023, Jakub Jelinek via Gcc-patches wrote:

> Is it safe even on 64bit targets?  I mean, doesn't say PowerPC already allow
> full 64-bit virtual address space?  The assumption that one can't have
> more than half of virtual address space allocations is true right now at
> least on x86-64, aarch64 and others, but isn't that something that can
> change with newer versions of CPUs without the need to recompile
> applications (add another level or two of page tables)?

At least s390 can allocate more than half the address space.  That
triggered a failure in gawk.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [committed] libstdc++: Optimize std::to_array for trivial types [PR110167]

2023-06-20 Thread Jonathan Wakely via Gcc-patches
On Tue, 20 Jun 2023 at 01:54, Patrick Palka  wrote:

> On Fri, 9 Jun 2023, Jonathan Wakely via Libstdc++ wrote:
>
> > Tested powerpc64le-linux. Pushed to trunk.
> >
> > This makes sense to backport after some soak time on trunk.
> >
> > -- >8 --
> >
> > As reported in PR libstdc++/110167, std::to_array compiles extremely
> > slowly for very large arrays. It needs to instantiate a very large
> > specialization of std::index_sequence and then create a very large
> > aggregate initializer from the pack expansion. For trivial types we can
> > simply default-initialize the std::array and then use memcpy to copy the
> > values. For non-trivial types we need to use the existing
> > implementation, despite the compilation cost.
> >
> > As also noted in the PR, using a generic lambda instead of the
> > __to_array helper compiles faster since gcc-13. It also produces
> > slightly smaller code at -O1, due to additional inlining. The code at
> > -Os, -O2 and -O3 seems to be the same. This new implementation requires
> > __cpp_generic_lambdas >= 201707L (i.e. P0428R2) but that is supported
> > since Clang 10 and since Intel icc 2021.5.0 (and since GCC 10.1).
> >
> > libstdc++-v3/ChangeLog:
> >
> >   PR libstdc++/110167
> >   * include/std/array (to_array): Initialize arrays of trivial
> >   types using memcpy. For non-trivial types, use lambda
> >   expressions instead of a separate helper function.
> >   (__to_array): Remove.
> >   * testsuite/23_containers/array/creation/110167.cc: New test.
> > ---
> >  libstdc++-v3/include/std/array| 53 +--
> >  .../23_containers/array/creation/110167.cc| 14 +
> >  2 files changed, 51 insertions(+), 16 deletions(-)
> >  create mode 100644
> libstdc++-v3/testsuite/23_containers/array/creation/110167.cc
> >
> > diff --git a/libstdc++-v3/include/std/array
> b/libstdc++-v3/include/std/array
> > index 70280c1beeb..b791d86ddb2 100644
> > --- a/libstdc++-v3/include/std/array
> > +++ b/libstdc++-v3/include/std/array
> > @@ -414,19 +414,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >return std::move(std::get<_Int>(__arr));
> >  }
> >
> > -#if __cplusplus > 201703L
> > +#if __cplusplus >= 202002L && __cpp_generic_lambdas >= 201707L
> >  #define __cpp_lib_to_array 201907L
> > -
> > -  template
> > -constexpr array, sizeof...(_Idx)>
> > -__to_array(_Tp (&__a)[sizeof...(_Idx)], index_sequence<_Idx...>)
> > -{
> > -  if constexpr (_Move)
> > - return {{std::move(__a[_Idx])...}};
> > -  else
> > - return {{__a[_Idx]...}};
> > -}
> > -
> >template
> >  [[nodiscard]]
> >  constexpr array, _Nm>
> > @@ -436,8 +425,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >static_assert(!is_array_v<_Tp>);
> >static_assert(is_constructible_v<_Tp, _Tp&>);
> >if constexpr (is_constructible_v<_Tp, _Tp&>)
> > - return __to_array(__a, make_index_sequence<_Nm>{});
> > -  __builtin_unreachable(); // FIXME: see PR c++/91388
> > + {
> > +   if constexpr (is_trivial_v<_Tp> && _Nm != 0)
>
> redundant _Nm != 0 test?
>

Ah yes, I added it below to ensure we don't use memcpy with a null
__arr.data() and forgot to remove it here.


>
> > + {
> > +   array, _Nm> __arr;
> > +   if (!__is_constant_evaluated() && _Nm != 0)
> > + __builtin_memcpy(__arr.data(), __a, sizeof(__a));
> > +   else
> > + for (size_t __i = 0; __i < _Nm; ++__i)
> > +   __arr._M_elems[__i] = __a[__i];
> > +   return __arr;
> > + }
> > +   else
> > + return [&__a](index_sequence<_Idx...>) {
> > +   return array, _Nm>{{ __a[_Idx]... }};
> > + }(make_index_sequence<_Nm>{});
> > + }
> > +  else
> > + __builtin_unreachable(); // FIXME: see PR c++/91388
> >  }
> >
> >template
> > @@ -449,8 +454,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >static_assert(!is_array_v<_Tp>);
> >static_assert(is_move_constructible_v<_Tp>);
> >if constexpr (is_move_constructible_v<_Tp>)
> > - return __to_array<1>(__a, make_index_sequence<_Nm>{});
> > -  __builtin_unreachable(); // FIXME: see PR c++/91388
> > + {
> > +   if constexpr (is_trivial_v<_Tp>)
> > + {
> > +   array, _Nm> __arr;
> > +   if (!__is_constant_evaluated() && _Nm != 0)
> > + __builtin_memcpy(__arr.data(), __a, sizeof(__a));
> > +   else
> > + for (size_t __i = 0; __i < _Nm; ++__i)
> > +   __arr._M_elems[__i] = std::move(__a[__i]);
>
> IIUC this std::move is unnecessary for trivial arrays?
>

Good point, thanks.

That makes the lvalue and rvalue overloads identical for trivial types. It
seems a shame to duplicate the code, so the rvalue one could do:

  if constexpr (is_trivial_v<_Tp>)
   return std::to_array<_Tp, _Num>(__a);
 else

But that would imply an extra function call at -O0, and repeating overload
resolution. Since the duplicated cod

Re: [PATCH v3] x86: make VPTERNLOG* usable on less than 512-bit operands with just AVX512F

2023-06-20 Thread Hongtao Liu via Gcc-patches
On Tue, Jun 20, 2023 at 3:07 PM Jan Beulich via Gcc-patches
 wrote:
>
> There's no reason to constrain this to AVX512VL, unless instructed so by
> -mprefer-vector-width=, as the wider operation is unusable for more
> narrow operands only when the possible memory source is a non-broadcast
> one. This way even the scalar copysign3 can benefit from the
> operation being a single-insn one (leaving aside moves which the
> compiler decides to insert for unclear reasons, and leaving aside the
> fact that bcst_mem_operand() is too restrictive for broadcast to be
> embedded right into VPTERNLOG*).
>
> While there also bring *_vternlog_all's in sync with that
> of the three splitters.
>
> Along with this also request value duplication in
> ix86_expand_copysign()'s call to ix86_build_signbit_mask(), eliminating
> excess space allocation in .rodata.*, filled with zeros which are never
> read.
>
> gcc/
>
> * config/i386/i386-expand.cc (ix86_expand_copysign): Request
> value duplication by ix86_build_signbit_mask() when AVX512F and
> not HFmode.
> * config/i386/sse.md (*_vternlog_all): Convert to
> 2-alternative form. Adjust "mode" attribute. Add "enabled"
> attribute.
> (*_vpternlog_1): Also permit when TARGET_AVX512F
> && !TARGET_PREFER_AVX256.
> (*_vpternlog_2): Likewise.
> (*_vpternlog_3): Likewise.
>
> gcc/testsuite/
> * gcc.target/i386/avx512f-copysign.c: New test.
> ---
> I haven't been able to find documentation on the dejagnu(?) regex syntax
> (?:...). With ordinary (...) failing (producing twice as many matches),
> I could only derive this from other scan-assembler patterns.
>
> I guess the underlying pattern, going along the lines of what
> one_cmpl2 uses, can be applied elsewhere
> as well.
That should be guarded with !TARGET_PREFER_AVX256, let's handle that
in a separate patch.
>
> HFmode could use embedded broadcast too for copysign and alike, but that
> would need to be V2HF -> V8HF (for which I don't think there are any
> existing patterns).
> ---
> v3: Adjust insn conditional as well. Add testcase.
> v2: Respect -mprefer-vector-width=.
>
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -2266,7 +2266,7 @@ ix86_expand_copysign (rtx operands[])
>else
>  dest = NULL_RTX;
>op1 = lowpart_subreg (vmode, force_reg (mode, operands[2]), mode);
> -  mask = ix86_build_signbit_mask (vmode, 0, 0);
> +  mask = ix86_build_signbit_mask (vmode, TARGET_AVX512F && mode != HFmode, 
> 0);
>
>if (CONST_DOUBLE_P (operands[1]))
>  {
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -12399,22 +12399,35 @@
> (set_attr "mode" "")])
>
>  (define_insn "*_vternlog_all"
> -  [(set (match_operand:V 0 "register_operand" "=v")
> +  [(set (match_operand:V 0 "register_operand" "=v,v")
> (unspec:V
> - [(match_operand:V 1 "register_operand" "0")
> -  (match_operand:V 2 "register_operand" "v")
> -  (match_operand:V 3 "bcst_vector_operand" "vmBr")
> + [(match_operand:V 1 "register_operand" "0,0")
> +  (match_operand:V 2 "register_operand" "v,v")
> +  (match_operand:V 3 "bcst_vector_operand" "vBr,m")
>(match_operand:SI 4 "const_0_to_255_operand")]
>   UNSPEC_VTERNLOG))]
> -  "TARGET_AVX512F
> +  "( == 64 || TARGET_AVX512VL
> +|| (TARGET_AVX512F && !TARGET_PREFER_AVX256))
>  /* Disallow embeded broadcast for vector HFmode since
> it's not real AVX512FP16 instruction.  */
>&& (GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4
>   || GET_CODE (operands[3]) != VEC_DUPLICATE)"
> -  "vpternlog\t{%4, %3, %2, %0|%0, %2, %3, %4}"
> +{
> +  if (TARGET_AVX512VL)
> +return "vpternlog\t{%4, %3, %2, %0|%0, %2, %3, %4}";
> +  else
> +return "vpternlog\t{%4, %g3, %g2, %g0|%g0, %g2, %g3, %4}";
> +}
>[(set_attr "type" "sselog")
> (set_attr "prefix" "evex")
> -   (set_attr "mode" "")])
> +   (set (attr "mode")
> +(if_then_else (match_test "TARGET_AVX512VL")
> + (const_string "")
> + (const_string "XI")))
> +   (set (attr "enabled")
> +   (if_then_else (eq_attr "alternative" "1")
> + (symbol_ref " == 64 || TARGET_AVX512VL")
> + (const_string "*")))])
>
>  ;; There must be lots of other combinations like
>  ;;
> @@ -12443,7 +12456,8 @@
>   (any_logic2:V
> (match_operand:V 3 "regmem_or_bitnot_regmem_operand")
> (match_operand:V 4 "regmem_or_bitnot_regmem_operand"]
> -  "( == 64 || TARGET_AVX512VL)
> +  "( == 64 || TARGET_AVX512VL
> +|| (TARGET_AVX512F && !TARGET_PREFER_AVX256))
> && ix86_pre_reload_split ()
> && (rtx_equal_p (STRIP_UNARY (operands[1]),
> STRIP_UNARY (operands[4]))
> @@ -12527,7 +12541,8 @@
>   (match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
> (match_operand:V 3 "regmem_or_bitnot_regmem_operand"))
> 

Re: [PATCH] [vect]Use intermiediate integer type for float_expr/fix_trunc_expr when direct optab is not existed.

2023-06-20 Thread Richard Biener via Gcc-patches
On Fri, Jun 2, 2023 at 3:01 AM liuhongt via Gcc-patches
 wrote:
>
> We have already use intermidate type in case WIDEN, but not for NONE,
> this patch extended that.
>
> I didn't do that in pattern recog since we need to know whether the
> stmt belongs to any slp_node to decide the vectype, the related optabs
> are checked according to vectype_in and vectype_out. For non-slp case,
> vec_pack/unpack are always used when lhs has different size from rhs,
> for slp case, sometimes vec_pack/unpack is used, somethings
> direct conversion is used.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
> PR target/110018
> * tree-vect-stmts.cc (vectorizable_conversion): Use
> intermiediate integer type for float_expr/fix_trunc_expr when
> direct optab is not existed.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr110018-1.c: New test.
> ---
>  gcc/testsuite/gcc.target/i386/pr110018-1.c | 94 ++
>  gcc/tree-vect-stmts.cc | 56 -
>  2 files changed, 149 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110018-1.c
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr110018-1.c 
> b/gcc/testsuite/gcc.target/i386/pr110018-1.c
> new file mode 100644
> index 000..b1baffd7af1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110018-1.c
> @@ -0,0 +1,94 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512fp16 -mavx512vl -O2 -mavx512dq" } */
> +/* { dg-final { scan-assembler-times {(?n)vcvttp[dsh]2[dqw]} 5 } } */
> +/* { dg-final { scan-assembler-times {(?n)vcvt[dqw]*2p[dsh]} 5 } } */
> +
> +void
> +foo (double* __restrict a, char* b)
> +{
> +  a[0] = b[0];
> +  a[1] = b[1];
> +}
> +
> +void
> +foo1 (float* __restrict a, char* b)
> +{
> +  a[0] = b[0];
> +  a[1] = b[1];
> +  a[2] = b[2];
> +  a[3] = b[3];
> +}
> +
> +void
> +foo2 (_Float16* __restrict a, char* b)
> +{
> +  a[0] = b[0];
> +  a[1] = b[1];
> +  a[2] = b[2];
> +  a[3] = b[3];
> +  a[4] = b[4];
> +  a[5] = b[5];
> +  a[6] = b[6];
> +  a[7] = b[7];
> +}
> +
> +void
> +foo3 (double* __restrict a, short* b)
> +{
> +  a[0] = b[0];
> +  a[1] = b[1];
> +}
> +
> +void
> +foo4 (float* __restrict a, char* b)
> +{
> +  a[0] = b[0];
> +  a[1] = b[1];
> +  a[2] = b[2];
> +  a[3] = b[3];
> +}
> +
> +void
> +foo5 (double* __restrict b, char* a)
> +{
> +  a[0] = b[0];
> +  a[1] = b[1];
> +}
> +
> +void
> +foo6 (float* __restrict b, char* a)
> +{
> +  a[0] = b[0];
> +  a[1] = b[1];
> +  a[2] = b[2];
> +  a[3] = b[3];
> +}
> +
> +void
> +foo7 (_Float16* __restrict b, char* a)
> +{
> +  a[0] = b[0];
> +  a[1] = b[1];
> +  a[2] = b[2];
> +  a[3] = b[3];
> +  a[4] = b[4];
> +  a[5] = b[5];
> +  a[6] = b[6];
> +  a[7] = b[7];
> +}
> +
> +void
> +foo8 (double* __restrict b, short* a)
> +{
> +  a[0] = b[0];
> +  a[1] = b[1];
> +}
> +
> +void
> +foo9 (float* __restrict b, char* a)
> +{
> +  a[0] = b[0];
> +  a[1] = b[1];
> +  a[2] = b[2];
> +  a[3] = b[3];
> +}
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index bd3b07a3aa1..1118c89686d 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -5162,6 +5162,49 @@ vectorizable_conversion (vec_info *vinfo,
> return false;
>if (supportable_convert_operation (code, vectype_out, vectype_in, 
> &code1))
> break;

A comment would be nice here.  Like

   /* For conversions between float and smaller integer types try whether we can
  use intermediate signed integer types to support the conversion.  */

> +  if ((code == FLOAT_EXPR
> +  && GET_MODE_SIZE (lhs_mode) > GET_MODE_SIZE (rhs_mode))
> + || (code == FIX_TRUNC_EXPR
> + && GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode)))
> +   {
> + bool float_expr_p = code == FLOAT_EXPR;
> + scalar_mode imode = float_expr_p ? rhs_mode : lhs_mode;
> + fltsz = GET_MODE_SIZE (float_expr_p ? lhs_mode : rhs_mode);
> + code1 = float_expr_p ? code : NOP_EXPR;
> + codecvt1 = float_expr_p ? NOP_EXPR : code;
> + FOR_EACH_2XWIDER_MODE (rhs_mode_iter, imode)
> +   {
> + imode = rhs_mode_iter.require ();
> + if (GET_MODE_SIZE (imode) > fltsz)
> +   break;
> +
> + cvt_type
> +   = build_nonstandard_integer_type (GET_MODE_BITSIZE (imode),
> + 0);
> + cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type,
> + slp_node);
> + /* This should only happened for SLP as long as loop vectorizer
> +only supports same-sized vector.  */
> + if (cvt_type == NULL_TREE
> + || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
> + || !supportable_convert_operation (code1, vectype_out,
> +cvt_type, &

[PATCHv4, rs6000] Add two peephole2 patterns for mr. insn

2023-06-20 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds two peephole2 patterns which help convert certain insn
sequences to "mr." instruction. These insn sequences can't be combined in
combine pass.

  Compared to last version, the empty constraint is removed and test cases
run only on powerpc Linux as AIX doesn't support "-mregnames" option.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: Add two peephole patterns for "mr." insn

When investigating the issue mentioned in PR87871#c30 - if compare
and move pattern benefits before RA, I checked the assembly generated
for SPEC2017 and found that certain insn sequences aren't converted to
"mr." instructions.
Following two sequence are never to be combined to "mr." pattern as
there is no register link between them. This patch adds two peephole2
patterns to convert them to "mr." instructions.

cmp 0,3,0
mr 4,3

mr 4,3
cmp 0,3,0

The patch also creates a new mode iterator which decided by
TARGET_POWERPC64.  This mode iterator is used in "mr." and its split
pattern.  The original P iterator is wrong when -m32/-mpowerpc64 is set.
In this situation, the "mr." should compares the whole 64-bit register
with 0 other than the low 32-bit one.

gcc/
* config/rs6000/rs6000.md (peephole2 for compare_and_move): New.
(peephole2 for move_and_compare): New.
(mode_iterator WORD): New.  Set the mode to SI/DImode by
TARGET_POWERPC64.
(*mov_internal2): Change the mode iterator from P to WORD.
(split pattern for compare_and_move): Likewise.

gcc/testsuite/
* gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New.
* gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b0db8ae508d..2ab1e8d4c80 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -491,6 +491,7 @@ (define_mode_iterator SDI [SI DI])
 ; The size of a pointer.  Also, the size of the value that a record-condition
 ; (one with a '.') will compare; and the size used for arithmetic carries.
 (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
+(define_mode_iterator WORD [(SI "!TARGET_POWERPC64") (DI "TARGET_POWERPC64")])

 ; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
 ; PTImode is GPR only)
@@ -7879,9 +7880,9 @@ (define_split

 (define_insn "*mov_internal2"
   [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y")
-   (compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r")
+   (compare:CC (match_operand:WORD 1 "gpc_reg_operand" "0,r,r")
(const_int 0)))
-   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
+   (set (match_operand:WORD 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
   ""
   "@
cmpi %2,%0,0
@@ -7891,11 +7892,41 @@ (define_insn "*mov_internal2"
(set_attr "dot" "yes")
(set_attr "length" "4,4,8")])

+(define_peephole2
+  [(set (match_operand:CC 2 "cc_reg_operand")
+   (compare:CC (match_operand:WORD 1 "int_reg_operand")
+   (const_int 0)))
+   (set (match_operand:WORD 0 "int_reg_operand")
+   (match_dup 1))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+  (compare:CC (match_operand:WORD 1 "int_reg_operand" "r")
+  (const_int 0)))
+ (set (match_operand:WORD 0 "int_reg_operand" "=r")
+  (match_dup 1))])]
+  ""
+)
+
+(define_peephole2
+  [(set (match_operand:WORD 0 "int_reg_operand")
+   (match_operand:WORD 1 "int_reg_operand"))
+   (set (match_operand:CC 2 "cc_reg_operand")
+   (compare:CC (match_dup 1)
+   (const_int 0)))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+  (compare:CC (match_operand:GPR 1 "int_reg_operand" "r")
+  (const_int 0)))
+ (set (match_operand:WORD 0 "int_reg_operand" "=r")
+  (match_dup 1))])]
+  ""
+)
+
 (define_split
   [(set (match_operand:CC 2 "cc_reg_not_cr0_operand")
-   (compare:CC (match_operand:P 1 "gpc_reg_operand")
+   (compare:CC (match_operand:WORD 1 "gpc_reg_operand")
(const_int 0)))
-   (set (match_operand:P 0 "gpc_reg_operand") (match_dup 1))]
+   (set (match_operand:WORD 0 "gpc_reg_operand") (match_dup 1))]
   "reload_completed"
   [(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c 
b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
new file mode 100644
index 000..571a3112a74
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
@@ -0,0 +1,60 @@
+/* { dg-do compile { target powerpc*-*-linux* } } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
+/* { dg-options "-O2 -mregnames" } */
+
+/* Following instruction sequence is fo

[PATCH V2] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread Juzhe-Zhong
V2 patch adds comment for Robin:
/* As NPATTERNS is always a power of two, we can AND -NPATTERNS
   to simplify the codegen.  */

Recently, I figure out a better approach in case of codegen for VLA stepped 
vector.

Here is the detail descriptions:

Case 1:
void
f (uint8_t *restrict a, uint8_t *restrict b)
{
  for (int i = 0; i < 100; ++i)
{
  a[i * 8] = b[i * 8 + 37] + 1;
  a[i * 8 + 1] = b[i * 8 + 37] + 2;
  a[i * 8 + 2] = b[i * 8 + 37] + 3;
  a[i * 8 + 3] = b[i * 8 + 37] + 4;
  a[i * 8 + 4] = b[i * 8 + 37] + 5;
  a[i * 8 + 5] = b[i * 8 + 37] + 6;
  a[i * 8 + 6] = b[i * 8 + 37] + 7;
  a[i * 8 + 7] = b[i * 8 + 37] + 8;
}
}

We need to generate the stepped vector:
NPATTERNS = 8.
{ 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8 }

Before this patch:
vid.vv4 ;; {0,1,2,3,4,5,6,7,...}
vsrl.vi  v4,v4,3;; {0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,...}
li   a3,8   ;; {8}
vmul.vx  v4,v4,a3   ;; {0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,...}

After this patch:
vid.vv4;; {0,1,2,3,4,5,6,7,...}
vand.vi  v4,v4,-8(-NPATTERNS)  ;; {0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,...}

Case 2:
void
f (uint8_t *restrict a, uint8_t *restrict b)
{
  for (int i = 0; i < 100; ++i)
{
  a[i * 8] = b[i * 8 + 3] + 1;
  a[i * 8 + 1] = b[i * 8 + 2] + 2;
  a[i * 8 + 2] = b[i * 8 + 1] + 3;
  a[i * 8 + 3] = b[i * 8 + 0] + 4;
  a[i * 8 + 4] = b[i * 8 + 7] + 5;
  a[i * 8 + 5] = b[i * 8 + 6] + 6;
  a[i * 8 + 6] = b[i * 8 + 5] + 7;
  a[i * 8 + 7] = b[i * 8 + 4] + 8;
}
} 

We need to generate the stepped vector:
NPATTERNS = 4.
{ 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, ... }

Before this patch:
li   a6,134221824
slli a6,a6,5
addi a6,a6,3;; 64-bit: 0x000300020001
vmv.v.x  v6,a6  ;; {3, 2, 1, 0, ... }
vid.vv4 ;; {0, 1, 2, 3, 4, 5, 6, 7, ... }
vsrl.vi  v4,v4,2;; {0, 0, 0, 0, 1, 1, 1, 1, ... }
li   a3,4   ;; {4}
vmul.vx  v4,v4,a3   ;; {0, 0, 0, 0, 4, 4, 4, 4, ... }
vadd.vv  v4,v4,v6   ;; {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 
12, ... }

After this patch:
li  a3,-536875008
sllia3,a3,4
addia3,a3,1
sllia3,a3,16
vmv.v.x v2,a3   ;; {3, 1, -1, -3, ... }
vid.v   v4  ;; {0, 1, 2, 3, 4, 5, 6, 7, ... }
vadd.vv v4,v4,v2;; {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 
12, ... }

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_const_vector): Optimize codegen.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/partial/slp-1.c: Adapt testcase.
* gcc.target/riscv/rvv/autovec/partial/slp-16.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-16.c: New test.

---
 gcc/config/riscv/riscv-v.cc   | 80 ---
 .../riscv/rvv/autovec/partial/slp-1.c |  2 +
 .../riscv/rvv/autovec/partial/slp-16.c| 24 ++
 .../riscv/rvv/autovec/partial/slp_run-16.c| 66 +++
 4 files changed, 127 insertions(+), 45 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-16.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 79c0337327d..5d61187a848 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1128,7 +1128,7 @@ expand_const_vector (rtx target, rtx src)
builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
 }
   builder.finalize ();
-  
+
   if (CONST_VECTOR_DUPLICATE_P (src))
 {
   /* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1
@@ -1204,61 +1204,51 @@ expand_const_vector (rtx target, rtx src)
   if (builder.single_step_npatterns_p ())
{
  /* Describe the case by choosing NPATTERNS = 4 as an example.  */
- rtx base, step;
+ insn_code icode;
+
+ /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }.  */
+ rtx vid = gen_reg_rtx (builder.mode ());
+ rtx vid_ops[] = {vid};
+ icode = code_for_pred_series (builder.mode ());
+ emit_vlmax_insn (icode, RVV_MISC_OP, vid_ops);
+
  if (builder.npatterns_all_equal_p ())
{
  /* Generate the variable-length vector following this rule:
 { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
   E.g. { 0, 0, 8, 8, 16, 16, ... } */
- /* Step 1: Generate base = { 0, 0, 0, 0, 0, 0, 0, ... }.  */
- base = expand_vector_broadcast (builder.mode (), builder.elt (0));
+ /* Step 2: VID AND -NPATTERNS:
+{ 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... }
+ */
+ rtx imm
+   = gen_int_mode (-builder.npatterns (), builder.inner_mode ());
+ /* As NPATTERNS is always a power of two, we can AND -NPATTERNS
+to simplify the codegen. 

Re: [PATCH] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread Robin Dapp via Gcc-patches


> +   /* Step 2: VID AND -NPATTERNS:
> +  { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... }
> +   */
Before that, just add something simple like:

We want to create a pattern where value[ix] = floor (ix / NPATTERNS).
As NPATTERNS is always a power of two we can rewrite this as
= ix & -NPATTERNS.

Regards
 Robin



Re: Re: [PATCH] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread juzhe.zh...@rivai.ai
Ok. Just sent V2. I will adjust comment and send V3 again :)



juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-06-20 16:55
To: juzhe.zh...@rivai.ai; gcc-patches
CC: rdapp.gcc; kito.cheng; Kito.cheng; palmer; palmer; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Optimize codegen of VLA SLP
 
> +   /* Step 2: VID AND -NPATTERNS:
> + { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... }
> +   */
Before that, just add something simple like:
 
We want to create a pattern where value[ix] = floor (ix / NPATTERNS).
As NPATTERNS is always a power of two we can rewrite this as
= ix & -NPATTERNS.
 
Regards
Robin
 
 


Re: [PATCH] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread Robin Dapp via Gcc-patches
> Ok. Just sent V2. I will adjust comment and send V3 again :)

Sorry, was too slow.

Regards
 Robin



[PATCH V3] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread Juzhe-Zhong
Add comments for Robin:
We want to create a pattern where value[ix] = floor (ix / NPATTERNS).
As NPATTERNS is always a power of two we can rewrite this as
= ix & -NPATTERNS.
`
Recently, I figure out a better approach in case of codegen for VLA stepped 
vector.

Here is the detail descriptions:

Case 1:
void
f (uint8_t *restrict a, uint8_t *restrict b)
{
  for (int i = 0; i < 100; ++i)
{
  a[i * 8] = b[i * 8 + 37] + 1;
  a[i * 8 + 1] = b[i * 8 + 37] + 2;
  a[i * 8 + 2] = b[i * 8 + 37] + 3;
  a[i * 8 + 3] = b[i * 8 + 37] + 4;
  a[i * 8 + 4] = b[i * 8 + 37] + 5;
  a[i * 8 + 5] = b[i * 8 + 37] + 6;
  a[i * 8 + 6] = b[i * 8 + 37] + 7;
  a[i * 8 + 7] = b[i * 8 + 37] + 8;
}
}

We need to generate the stepped vector:
NPATTERNS = 8.
{ 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8 }

Before this patch:
vid.vv4 ;; {0,1,2,3,4,5,6,7,...}
vsrl.vi  v4,v4,3;; {0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,...}
li   a3,8   ;; {8}
vmul.vx  v4,v4,a3   ;; {0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,...}

After this patch:
vid.vv4;; {0,1,2,3,4,5,6,7,...}
vand.vi  v4,v4,-8(-NPATTERNS)  ;; {0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,...}

Case 2:
void
f (uint8_t *restrict a, uint8_t *restrict b)
{
  for (int i = 0; i < 100; ++i)
{
  a[i * 8] = b[i * 8 + 3] + 1;
  a[i * 8 + 1] = b[i * 8 + 2] + 2;
  a[i * 8 + 2] = b[i * 8 + 1] + 3;
  a[i * 8 + 3] = b[i * 8 + 0] + 4;
  a[i * 8 + 4] = b[i * 8 + 7] + 5;
  a[i * 8 + 5] = b[i * 8 + 6] + 6;
  a[i * 8 + 6] = b[i * 8 + 5] + 7;
  a[i * 8 + 7] = b[i * 8 + 4] + 8;
}
} 

We need to generate the stepped vector:
NPATTERNS = 4.
{ 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, ... }

Before this patch:
li   a6,134221824
slli a6,a6,5
addi a6,a6,3;; 64-bit: 0x000300020001
vmv.v.x  v6,a6  ;; {3, 2, 1, 0, ... }
vid.vv4 ;; {0, 1, 2, 3, 4, 5, 6, 7, ... }
vsrl.vi  v4,v4,2;; {0, 0, 0, 0, 1, 1, 1, 1, ... }
li   a3,4   ;; {4}
vmul.vx  v4,v4,a3   ;; {0, 0, 0, 0, 4, 4, 4, 4, ... }
vadd.vv  v4,v4,v6   ;; {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 
12, ... }

After this patch:
li  a3,-536875008
sllia3,a3,4
addia3,a3,1
sllia3,a3,16
vmv.v.x v2,a3   ;; {3, 1, -1, -3, ... }
vid.v   v4  ;; {0, 1, 2, 3, 4, 5, 6, 7, ... }
vadd.vv v4,v4,v2;; {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 
12, ... }

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_const_vector): Optimize codegen.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/partial/slp-1.c: Adapt testcase.
* gcc.target/riscv/rvv/autovec/partial/slp-16.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-16.c: New test.

---
 gcc/config/riscv/riscv-v.cc   | 81 +--
 .../riscv/rvv/autovec/partial/slp-1.c |  2 +
 .../riscv/rvv/autovec/partial/slp-16.c| 24 ++
 .../riscv/rvv/autovec/partial/slp_run-16.c| 66 +++
 4 files changed, 128 insertions(+), 45 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-16.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 79c0337327d..839a2c6ba71 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1128,7 +1128,7 @@ expand_const_vector (rtx target, rtx src)
builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
 }
   builder.finalize ();
-  
+
   if (CONST_VECTOR_DUPLICATE_P (src))
 {
   /* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1
@@ -1204,61 +1204,52 @@ expand_const_vector (rtx target, rtx src)
   if (builder.single_step_npatterns_p ())
{
  /* Describe the case by choosing NPATTERNS = 4 as an example.  */
- rtx base, step;
+ insn_code icode;
+
+ /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }.  */
+ rtx vid = gen_reg_rtx (builder.mode ());
+ rtx vid_ops[] = {vid};
+ icode = code_for_pred_series (builder.mode ());
+ emit_vlmax_insn (icode, RVV_MISC_OP, vid_ops);
+
  if (builder.npatterns_all_equal_p ())
{
  /* Generate the variable-length vector following this rule:
 { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
   E.g. { 0, 0, 8, 8, 16, 16, ... } */
- /* Step 1: Generate base = { 0, 0, 0, 0, 0, 0, 0, ... }.  */
- base = expand_vector_broadcast (builder.mode (), builder.elt (0));
+ /* We want to create a pattern where value[ix] = floor (ix /
+NPATTERNS). As NPATTERNS is always a power of two we can
+rewrite this as = ix & -NPATTERNS.  */
+ /* Step 2: VID AND -NPATTERNS:
+{ 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 

Re: [PATCH V3] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread Robin Dapp via Gcc-patches
LGTM.

Regards
 Robin



Re: [PATCH] [vect]Use intermiediate integer type for float_expr/fix_trunc_expr when direct optab is not existed.

2023-06-20 Thread Hongtao Liu via Gcc-patches
On Tue, Jun 20, 2023 at 4:41 PM Richard Biener
 wrote:
>
> On Fri, Jun 2, 2023 at 3:01 AM liuhongt via Gcc-patches
>  wrote:
> >
> > We have already use intermidate type in case WIDEN, but not for NONE,
> > this patch extended that.
> >
> > I didn't do that in pattern recog since we need to know whether the
> > stmt belongs to any slp_node to decide the vectype, the related optabs
> > are checked according to vectype_in and vectype_out. For non-slp case,
> > vec_pack/unpack are always used when lhs has different size from rhs,
> > for slp case, sometimes vec_pack/unpack is used, somethings
> > direct conversion is used.
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> > PR target/110018
> > * tree-vect-stmts.cc (vectorizable_conversion): Use
> > intermiediate integer type for float_expr/fix_trunc_expr when
> > direct optab is not existed.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/pr110018-1.c: New test.
> > ---
> >  gcc/testsuite/gcc.target/i386/pr110018-1.c | 94 ++
> >  gcc/tree-vect-stmts.cc | 56 -
> >  2 files changed, 149 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr110018-1.c
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pr110018-1.c 
> > b/gcc/testsuite/gcc.target/i386/pr110018-1.c
> > new file mode 100644
> > index 000..b1baffd7af1
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr110018-1.c
> > @@ -0,0 +1,94 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-mavx512fp16 -mavx512vl -O2 -mavx512dq" } */
> > +/* { dg-final { scan-assembler-times {(?n)vcvttp[dsh]2[dqw]} 5 } } */
> > +/* { dg-final { scan-assembler-times {(?n)vcvt[dqw]*2p[dsh]} 5 } } */
> > +
> > +void
> > +foo (double* __restrict a, char* b)
> > +{
> > +  a[0] = b[0];
> > +  a[1] = b[1];
> > +}
> > +
> > +void
> > +foo1 (float* __restrict a, char* b)
> > +{
> > +  a[0] = b[0];
> > +  a[1] = b[1];
> > +  a[2] = b[2];
> > +  a[3] = b[3];
> > +}
> > +
> > +void
> > +foo2 (_Float16* __restrict a, char* b)
> > +{
> > +  a[0] = b[0];
> > +  a[1] = b[1];
> > +  a[2] = b[2];
> > +  a[3] = b[3];
> > +  a[4] = b[4];
> > +  a[5] = b[5];
> > +  a[6] = b[6];
> > +  a[7] = b[7];
> > +}
> > +
> > +void
> > +foo3 (double* __restrict a, short* b)
> > +{
> > +  a[0] = b[0];
> > +  a[1] = b[1];
> > +}
> > +
> > +void
> > +foo4 (float* __restrict a, char* b)
> > +{
> > +  a[0] = b[0];
> > +  a[1] = b[1];
> > +  a[2] = b[2];
> > +  a[3] = b[3];
> > +}
> > +
> > +void
> > +foo5 (double* __restrict b, char* a)
> > +{
> > +  a[0] = b[0];
> > +  a[1] = b[1];
> > +}
> > +
> > +void
> > +foo6 (float* __restrict b, char* a)
> > +{
> > +  a[0] = b[0];
> > +  a[1] = b[1];
> > +  a[2] = b[2];
> > +  a[3] = b[3];
> > +}
> > +
> > +void
> > +foo7 (_Float16* __restrict b, char* a)
> > +{
> > +  a[0] = b[0];
> > +  a[1] = b[1];
> > +  a[2] = b[2];
> > +  a[3] = b[3];
> > +  a[4] = b[4];
> > +  a[5] = b[5];
> > +  a[6] = b[6];
> > +  a[7] = b[7];
> > +}
> > +
> > +void
> > +foo8 (double* __restrict b, short* a)
> > +{
> > +  a[0] = b[0];
> > +  a[1] = b[1];
> > +}
> > +
> > +void
> > +foo9 (float* __restrict b, char* a)
> > +{
> > +  a[0] = b[0];
> > +  a[1] = b[1];
> > +  a[2] = b[2];
> > +  a[3] = b[3];
> > +}
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > index bd3b07a3aa1..1118c89686d 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -5162,6 +5162,49 @@ vectorizable_conversion (vec_info *vinfo,
> > return false;
> >if (supportable_convert_operation (code, vectype_out, vectype_in, 
> > &code1))
> > break;
>
> A comment would be nice here.  Like
>
>/* For conversions between float and smaller integer types try whether we 
> can
>   use intermediate signed integer types to support the conversion.  */
>
> > +  if ((code == FLOAT_EXPR
> > +  && GET_MODE_SIZE (lhs_mode) > GET_MODE_SIZE (rhs_mode))
> > + || (code == FIX_TRUNC_EXPR
> > + && GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode)))
> > +   {
> > + bool float_expr_p = code == FLOAT_EXPR;
> > + scalar_mode imode = float_expr_p ? rhs_mode : lhs_mode;
> > + fltsz = GET_MODE_SIZE (float_expr_p ? lhs_mode : rhs_mode);
> > + code1 = float_expr_p ? code : NOP_EXPR;
> > + codecvt1 = float_expr_p ? NOP_EXPR : code;
> > + FOR_EACH_2XWIDER_MODE (rhs_mode_iter, imode)
> > +   {
> > + imode = rhs_mode_iter.require ();
> > + if (GET_MODE_SIZE (imode) > fltsz)
> > +   break;
> > +
> > + cvt_type
> > +   = build_nonstandard_integer_type (GET_MODE_BITSIZE (imode),
> > + 0);
> > + cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type,
> > + slp_node);
>

Re: [PATCH v3] x86: make VPTERNLOG* usable on less than 512-bit operands with just AVX512F

2023-06-20 Thread Jan Beulich via Gcc-patches
On 20.06.2023 10:33, Hongtao Liu wrote:
> On Tue, Jun 20, 2023 at 3:07 PM Jan Beulich via Gcc-patches
>  wrote:
>>
>> I guess the underlying pattern, going along the lines of what
>> one_cmpl2 uses, can be applied elsewhere
>> as well.
> That should be guarded with !TARGET_PREFER_AVX256, let's handle that
> in a separate patch.

Sure, and as indicated there are more places where similar things could
be done.

>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/avx512f-copysign.c
>> @@ -0,0 +1,32 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-mavx512f -mno-avx512vl -O2" } */
> Please explicitly add -mprefer-vector-width=512, our tester will also
> test unix{-m32 \-march=cascadelake,\ -march=cascadelake} which set the
> - mprefer-vector-width=256, -mprefer-vector-width=512 in dg-options
> can overwrite that.

Oh, I see. Will do. And I expect I then also need to adjust the newly
added avx512f-dupv2di.c from the earlier patch. I guess I could commit
that option addition there as obvious?

> Others LGTM.

May I take this as "okay with that change", or should I submit v4?

Jan


Re: [PATCHv4, rs6000] Add two peephole2 patterns for mr. insn

2023-06-20 Thread Kewen.Lin via Gcc-patches
Hi,

on 2023/6/20 16:49, HAO CHEN GUI wrote:
> Hi,
>   This patch adds two peephole2 patterns which help convert certain insn
> sequences to "mr." instruction. These insn sequences can't be combined in
> combine pass.
> 
>   Compared to last version, the empty constraint is removed and test cases
> run only on powerpc Linux as AIX doesn't support "-mregnames" option.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.


OK for trunk, thanks!

BR,
Kewen

> 
> Thanks
> Gui Haochen
> 
> ChangeLog
> rs6000: Add two peephole patterns for "mr." insn
> 
> When investigating the issue mentioned in PR87871#c30 - if compare
> and move pattern benefits before RA, I checked the assembly generated
> for SPEC2017 and found that certain insn sequences aren't converted to
> "mr." instructions.
> Following two sequence are never to be combined to "mr." pattern as
> there is no register link between them. This patch adds two peephole2
> patterns to convert them to "mr." instructions.
> 
> cmp 0,3,0
> mr 4,3
> 
> mr 4,3
> cmp 0,3,0
> 
> The patch also creates a new mode iterator which decided by
> TARGET_POWERPC64.  This mode iterator is used in "mr." and its split
> pattern.  The original P iterator is wrong when -m32/-mpowerpc64 is set.
> In this situation, the "mr." should compares the whole 64-bit register
> with 0 other than the low 32-bit one.
> 
> gcc/
>   * config/rs6000/rs6000.md (peephole2 for compare_and_move): New.
>   (peephole2 for move_and_compare): New.
>   (mode_iterator WORD): New.  Set the mode to SI/DImode by
>   TARGET_POWERPC64.
>   (*mov_internal2): Change the mode iterator from P to WORD.
>   (split pattern for compare_and_move): Likewise.
> 
> gcc/testsuite/
>   * gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New.
>   * gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index b0db8ae508d..2ab1e8d4c80 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -491,6 +491,7 @@ (define_mode_iterator SDI [SI DI])
>  ; The size of a pointer.  Also, the size of the value that a record-condition
>  ; (one with a '.') will compare; and the size used for arithmetic carries.
>  (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
> +(define_mode_iterator WORD [(SI "!TARGET_POWERPC64") (DI 
> "TARGET_POWERPC64")])
> 
>  ; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
>  ; PTImode is GPR only)
> @@ -7879,9 +7880,9 @@ (define_split
> 
>  (define_insn "*mov_internal2"
>[(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y")
> - (compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r")
> + (compare:CC (match_operand:WORD 1 "gpc_reg_operand" "0,r,r")
>   (const_int 0)))
> -   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
> +   (set (match_operand:WORD 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
>""
>"@
> cmpi %2,%0,0
> @@ -7891,11 +7892,41 @@ (define_insn "*mov_internal2"
> (set_attr "dot" "yes")
> (set_attr "length" "4,4,8")])
> 
> +(define_peephole2
> +  [(set (match_operand:CC 2 "cc_reg_operand")
> + (compare:CC (match_operand:WORD 1 "int_reg_operand")
> + (const_int 0)))
> +   (set (match_operand:WORD 0 "int_reg_operand")
> + (match_dup 1))]
> +  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
> +  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> +(compare:CC (match_operand:WORD 1 "int_reg_operand" "r")
> +(const_int 0)))
> +   (set (match_operand:WORD 0 "int_reg_operand" "=r")
> +(match_dup 1))])]
> +  ""
> +)
> +
> +(define_peephole2
> +  [(set (match_operand:WORD 0 "int_reg_operand")
> + (match_operand:WORD 1 "int_reg_operand"))
> +   (set (match_operand:CC 2 "cc_reg_operand")
> + (compare:CC (match_dup 1)
> + (const_int 0)))]
> +  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
> +  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> +(compare:CC (match_operand:GPR 1 "int_reg_operand" "r")
> +(const_int 0)))
> +   (set (match_operand:WORD 0 "int_reg_operand" "=r")
> +(match_dup 1))])]
> +  ""
> +)
> +
>  (define_split
>[(set (match_operand:CC 2 "cc_reg_not_cr0_operand")
> - (compare:CC (match_operand:P 1 "gpc_reg_operand")
> + (compare:CC (match_operand:WORD 1 "gpc_reg_operand")
>   (const_int 0)))
> -   (set (match_operand:P 0 "gpc_reg_operand") (match_dup 1))]
> +   (set (match_operand:WORD 0 "gpc_reg_operand") (match_dup 1))]
>"reload_completed"
>[(set (match_dup 0) (match_dup 1))
> (set (match_dup 2)
> diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c 
> b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
> new file mode 100644
> index 000..571a3112a74
> --- 

Re: [PATCH] [vect]Use intermiediate integer type for float_expr/fix_trunc_expr when direct optab is not existed.

2023-06-20 Thread Richard Biener via Gcc-patches
On Tue, Jun 20, 2023 at 11:02 AM Hongtao Liu  wrote:
>
> On Tue, Jun 20, 2023 at 4:41 PM Richard Biener
>  wrote:
> >
> > On Fri, Jun 2, 2023 at 3:01 AM liuhongt via Gcc-patches
> >  wrote:
> > >
> > > We have already use intermidate type in case WIDEN, but not for NONE,
> > > this patch extended that.
> > >
> > > I didn't do that in pattern recog since we need to know whether the
> > > stmt belongs to any slp_node to decide the vectype, the related optabs
> > > are checked according to vectype_in and vectype_out. For non-slp case,
> > > vec_pack/unpack are always used when lhs has different size from rhs,
> > > for slp case, sometimes vec_pack/unpack is used, somethings
> > > direct conversion is used.
> > >
> > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > > Ok for trunk?
> > >
> > > gcc/ChangeLog:
> > >
> > > PR target/110018
> > > * tree-vect-stmts.cc (vectorizable_conversion): Use
> > > intermiediate integer type for float_expr/fix_trunc_expr when
> > > direct optab is not existed.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.target/i386/pr110018-1.c: New test.
> > > ---
> > >  gcc/testsuite/gcc.target/i386/pr110018-1.c | 94 ++
> > >  gcc/tree-vect-stmts.cc | 56 -
> > >  2 files changed, 149 insertions(+), 1 deletion(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr110018-1.c
> > >
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr110018-1.c 
> > > b/gcc/testsuite/gcc.target/i386/pr110018-1.c
> > > new file mode 100644
> > > index 000..b1baffd7af1
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr110018-1.c
> > > @@ -0,0 +1,94 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2 -mavx512dq" } */
> > > +/* { dg-final { scan-assembler-times {(?n)vcvttp[dsh]2[dqw]} 5 } } */
> > > +/* { dg-final { scan-assembler-times {(?n)vcvt[dqw]*2p[dsh]} 5 } } */
> > > +
> > > +void
> > > +foo (double* __restrict a, char* b)
> > > +{
> > > +  a[0] = b[0];
> > > +  a[1] = b[1];
> > > +}
> > > +
> > > +void
> > > +foo1 (float* __restrict a, char* b)
> > > +{
> > > +  a[0] = b[0];
> > > +  a[1] = b[1];
> > > +  a[2] = b[2];
> > > +  a[3] = b[3];
> > > +}
> > > +
> > > +void
> > > +foo2 (_Float16* __restrict a, char* b)
> > > +{
> > > +  a[0] = b[0];
> > > +  a[1] = b[1];
> > > +  a[2] = b[2];
> > > +  a[3] = b[3];
> > > +  a[4] = b[4];
> > > +  a[5] = b[5];
> > > +  a[6] = b[6];
> > > +  a[7] = b[7];
> > > +}
> > > +
> > > +void
> > > +foo3 (double* __restrict a, short* b)
> > > +{
> > > +  a[0] = b[0];
> > > +  a[1] = b[1];
> > > +}
> > > +
> > > +void
> > > +foo4 (float* __restrict a, char* b)
> > > +{
> > > +  a[0] = b[0];
> > > +  a[1] = b[1];
> > > +  a[2] = b[2];
> > > +  a[3] = b[3];
> > > +}
> > > +
> > > +void
> > > +foo5 (double* __restrict b, char* a)
> > > +{
> > > +  a[0] = b[0];
> > > +  a[1] = b[1];
> > > +}
> > > +
> > > +void
> > > +foo6 (float* __restrict b, char* a)
> > > +{
> > > +  a[0] = b[0];
> > > +  a[1] = b[1];
> > > +  a[2] = b[2];
> > > +  a[3] = b[3];
> > > +}
> > > +
> > > +void
> > > +foo7 (_Float16* __restrict b, char* a)
> > > +{
> > > +  a[0] = b[0];
> > > +  a[1] = b[1];
> > > +  a[2] = b[2];
> > > +  a[3] = b[3];
> > > +  a[4] = b[4];
> > > +  a[5] = b[5];
> > > +  a[6] = b[6];
> > > +  a[7] = b[7];
> > > +}
> > > +
> > > +void
> > > +foo8 (double* __restrict b, short* a)
> > > +{
> > > +  a[0] = b[0];
> > > +  a[1] = b[1];
> > > +}
> > > +
> > > +void
> > > +foo9 (float* __restrict b, char* a)
> > > +{
> > > +  a[0] = b[0];
> > > +  a[1] = b[1];
> > > +  a[2] = b[2];
> > > +  a[3] = b[3];
> > > +}
> > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > > index bd3b07a3aa1..1118c89686d 100644
> > > --- a/gcc/tree-vect-stmts.cc
> > > +++ b/gcc/tree-vect-stmts.cc
> > > @@ -5162,6 +5162,49 @@ vectorizable_conversion (vec_info *vinfo,
> > > return false;
> > >if (supportable_convert_operation (code, vectype_out, vectype_in, 
> > > &code1))
> > > break;
> >
> > A comment would be nice here.  Like
> >
> >/* For conversions between float and smaller integer types try whether 
> > we can
> >   use intermediate signed integer types to support the conversion.  */
> >
> > > +  if ((code == FLOAT_EXPR
> > > +  && GET_MODE_SIZE (lhs_mode) > GET_MODE_SIZE (rhs_mode))
> > > + || (code == FIX_TRUNC_EXPR
> > > + && GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode)))
> > > +   {
> > > + bool float_expr_p = code == FLOAT_EXPR;
> > > + scalar_mode imode = float_expr_p ? rhs_mode : lhs_mode;
> > > + fltsz = GET_MODE_SIZE (float_expr_p ? lhs_mode : rhs_mode);
> > > + code1 = float_expr_p ? code : NOP_EXPR;
> > > + codecvt1 = float_expr_p ? NOP_EXPR : code;
> > > + FOR_EACH_2XWIDER_MODE (rhs_mode_iter, imode)
> > > +   {
> > > + imode = rhs_mode_iter.require ();
> > > +

Re: [PATCH v3] x86: make VPTERNLOG* usable on less than 512-bit operands with just AVX512F

2023-06-20 Thread Hongtao Liu via Gcc-patches
On Tue, Jun 20, 2023 at 5:03 PM Jan Beulich  wrote:
>
> On 20.06.2023 10:33, Hongtao Liu wrote:
> > On Tue, Jun 20, 2023 at 3:07 PM Jan Beulich via Gcc-patches
> >  wrote:
> >>
> >> I guess the underlying pattern, going along the lines of what
> >> one_cmpl2 uses, can be applied elsewhere
> >> as well.
> > That should be guarded with !TARGET_PREFER_AVX256, let's handle that
> > in a separate patch.
>
> Sure, and as indicated there are more places where similar things could
> be done.
>
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/i386/avx512f-copysign.c
> >> @@ -0,0 +1,32 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-mavx512f -mno-avx512vl -O2" } */
> > Please explicitly add -mprefer-vector-width=512, our tester will also
> > test unix{-m32 \-march=cascadelake,\ -march=cascadelake} which set the
> > - mprefer-vector-width=256, -mprefer-vector-width=512 in dg-options
> > can overwrite that.
>
> Oh, I see. Will do. And I expect I then also need to adjust the newly
> added avx512f-dupv2di.c from the earlier patch. I guess I could commit
> that option addition there as obvious?
Still need to send out the patch, and commit as an obvious fix.
>
> > Others LGTM.
>
> May I take this as "okay with that change", or should I submit v4?
Okay. no need for a v4 version.
>
> Jan



-- 
BR,
Hongtao


[PATCH 2/2] [RISC-V] resolve confilct between zcmp multi push/pop and shrink-wrap-separate

2023-06-20 Thread Fei Gao
Disable zcmp multi push/pop if shrink-wrap-separate is active.

So in -Os that prefers smaller code size, by default shrink-wrap-separate
is disabled while zcmp multi push/pop is enabled.

And in -O2 and others that prefers speed, by default shrink-wrap-separate
is enabled while zcmp multi push/pop is disabled. To force enabling zcmp multi
push/pop in this case, -fno-shrink-wrap-separate has to be explictly given.

The following TC shows the issues in -O2 before this patch with both
shrink-wrap-separate and zcmp multi push/pop active.
1. duplicated store of s regs.
2. cm.push pushes ra, s0-s11 in reverse order than what normal
   prologue does, causing stack corruption and failure to resotre s regs.

TC: zcmp_shrink_wrap_separate.c included in this patch.

output asm before this patch:
calc_func:
cm.push {ra, s0-s3}, -32
...
beq a5,zero,.L2
...
.L2:
...
sw  s1,20(sp) //issue here
sw  s3,12(sp) //issue here
...
sw  s2,16(sp) //issue here

output asm after this patch:
calc_func:
addisp,sp,-32
sw  s0,24(sp)
...
beq a5,zero,.L2
...
.L2:
...
sw  s1,20(sp)
sw  s3,12(sp)
...
sw  s2,16(sp)

Signed-off-by: Fei Gao 
Co-Authored-By: Zhangjin Liao 

gcc/ChangeLog:

* config/riscv/riscv.cc
(riscv_avoid_shrink_wrapping_separate): wrap the condition check in
  riscv_avoid_shrink_wrapping_separate.
(riscv_avoid_multi_push): avoid multi push if shrink_wrapping_separate
  is active.
(riscv_get_separate_components): call 
riscv_avoid_shrink_wrapping_separate
---
 gcc/config/riscv/riscv.cc | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 26405b5978b..2cca5fbb62d 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfghooks.h"
 #include "cfgloop.h"
 #include "cfgrtl.h"
+#include "shrink-wrap.h"
 #include "sel-sched.h"
 #include "fold-const.h"
 #include "gimple-iterator.h"
@@ -389,6 +390,7 @@ static const struct riscv_tune_param 
optimize_size_tune_info = {
   false,   /* use_divmod_expansion */
 };
 
+static bool riscv_avoid_shrink_wrapping_separate ();
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
 
@@ -5032,6 +5034,8 @@ riscv_avoid_multi_push(const struct riscv_frame_info 
*frame)
   || cfun->machine->interrupt_handler_p
   || cfun->machine->varargs_size != 0
   || crtl->args.pretend_args_size != 0
+  || (use_shrink_wrapping_separate ()
+  && !riscv_avoid_shrink_wrapping_separate ())
   || (frame->mask & ~ MULTI_PUSH_GPR_MASK))
 return true;
 
@@ -6199,6 +6203,17 @@ riscv_epilogue_uses (unsigned int regno)
   return false;
 }
 
+static bool
+riscv_avoid_shrink_wrapping_separate ()
+{
+  if (riscv_use_save_libcall (&cfun->machine->frame)
+  || cfun->machine->interrupt_handler_p
+  || !cfun->machine->frame.gp_sp_offset.is_constant ())
+return true;
+
+  return false;
+}
+
 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS.  */
 
 static sbitmap
@@ -6208,9 +6223,7 @@ riscv_get_separate_components (void)
   sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
   bitmap_clear (components);
 
-  if (riscv_use_save_libcall (&cfun->machine->frame)
-  || cfun->machine->interrupt_handler_p
-  || !cfun->machine->frame.gp_sp_offset.is_constant ())
+  if (riscv_avoid_shrink_wrapping_separate ())
 return components;
 
   offset = cfun->machine->frame.gp_sp_offset.to_constant ();
-- 
2.17.1



[PATCH 0/2] resolve confilct between RISC-V zcmp and shrink-wrap-separate

2023-06-20 Thread Fei Gao
These 2 patches resolve confilct between zcmp multi push/pop and
shrink-wrap-separate.

As per Kito's review comment
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg310564.html,
I split the orginal patch into two parts: RISC-V part and
the rest part (shrink-wrap.h / shrink-wrap.cc).

Fei Gao (2):
  allow target to check shrink-wrap-separate enabled or not
  [RISC-V] resolve confilct between zcmp multi push/pop and
shrink-wrap-separate

 gcc/config/riscv/riscv.cc | 19 ---
 gcc/shrink-wrap.cc| 25 +
 gcc/shrink-wrap.h |  1 +
 3 files changed, 34 insertions(+), 11 deletions(-)

-- 
2.17.1



[PATCH 1/2] allow target to check shrink-wrap-separate enabled or not

2023-06-20 Thread Fei Gao
gcc/ChangeLog:

* shrink-wrap.cc (try_shrink_wrapping_separate):call
  use_shrink_wrapping_separate.
(use_shrink_wrapping_separate): wrap the condition
  check in use_shrink_wrapping_separate.
* shrink-wrap.h (use_shrink_wrapping_separate): add to extern
---
 gcc/shrink-wrap.cc | 25 +
 gcc/shrink-wrap.h  |  1 +
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/gcc/shrink-wrap.cc b/gcc/shrink-wrap.cc
index b8d7b557130..d534964321a 100644
--- a/gcc/shrink-wrap.cc
+++ b/gcc/shrink-wrap.cc
@@ -1776,16 +1776,14 @@ insert_prologue_epilogue_for_components (sbitmap 
components)
   commit_edge_insertions ();
 }
 
-/* The main entry point to this subpass.  FIRST_BB is where the prologue
-   would be normally put.  */
-void
-try_shrink_wrapping_separate (basic_block first_bb)
+bool
+use_shrink_wrapping_separate (void)
 {
   if (!(SHRINK_WRAPPING_ENABLED
-   && flag_shrink_wrap_separate
-   && optimize_function_for_speed_p (cfun)
-   && targetm.shrink_wrap.get_separate_components))
-return;
+&& flag_shrink_wrap_separate
+&& optimize_function_for_speed_p (cfun)
+&& targetm.shrink_wrap.get_separate_components))
+return false;
 
   /* We don't handle "strange" functions.  */
   if (cfun->calls_alloca
@@ -1794,6 +1792,17 @@ try_shrink_wrapping_separate (basic_block first_bb)
   || crtl->calls_eh_return
   || crtl->has_nonlocal_goto
   || crtl->saves_all_registers)
+return false;
+
+  return true;
+}
+
+/* The main entry point to this subpass.  FIRST_BB is where the prologue
+   would be normally put.  */
+void
+try_shrink_wrapping_separate (basic_block first_bb)
+{
+  if (!use_shrink_wrapping_separate ())
 return;
 
   /* Ask the target what components there are.  If it returns NULL, don't
diff --git a/gcc/shrink-wrap.h b/gcc/shrink-wrap.h
index 161647711a3..82386c2b712 100644
--- a/gcc/shrink-wrap.h
+++ b/gcc/shrink-wrap.h
@@ -26,6 +26,7 @@ along with GCC; see the file COPYING3.  If not see
 extern bool requires_stack_frame_p (rtx_insn *, HARD_REG_SET, HARD_REG_SET);
 extern void try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq);
 extern void try_shrink_wrapping_separate (basic_block first_bb);
+extern bool use_shrink_wrapping_separate (void);
 #define SHRINK_WRAPPING_ENABLED \
   (flag_shrink_wrap && targetm.have_simple_return ())
 
-- 
2.17.1



[PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread Lehua Ding
Hi,

This little patch fixes a compile warning issue that my previous patch 
introduced, sorry for introducing this issue.

Best,
Lehua

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_arg_has_vector): Add default branch.

---
 gcc/config/riscv/riscv.cc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 6eb63a9d4de7..9558e28de3fc 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3820,8 +3820,8 @@ riscv_arg_has_vector (const_tree type)
   switch (TREE_CODE (type))
 {
 case RECORD_TYPE:
-  /* If it is a record, it is further determined whether its fileds have
- vector type.  */
+  /* If it is a record, it is further determined whether its fields have
+vector type.  */
   for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
if (TREE_CODE (f) == FIELD_DECL)
  {
@@ -3835,6 +3835,8 @@ riscv_arg_has_vector (const_tree type)
   break;
 case ARRAY_TYPE:
   return riscv_arg_has_vector (TREE_TYPE (type));
+default:
+  break;
 }
 
   return false;
-- 
2.36.3



Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread juzhe.zh...@rivai.ai
Ok.



juzhe.zh...@rivai.ai
 
From: Lehua Ding
Date: 2023-06-20 17:45
To: gcc-patches
CC: juzhe.zhong; rdapp.gcc; kito.cheng; palmer; jeffreyalaw; pan2.li
Subject: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector
Hi,
 
This little patch fixes a compile warning issue that my previous patch 
introduced, sorry for introducing this issue.
 
Best,
Lehua
 
gcc/ChangeLog:
 
* config/riscv/riscv.cc (riscv_arg_has_vector): Add default branch.
 
---
gcc/config/riscv/riscv.cc | 6 --
1 file changed, 4 insertions(+), 2 deletions(-)
 
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 6eb63a9d4de7..9558e28de3fc 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3820,8 +3820,8 @@ riscv_arg_has_vector (const_tree type)
   switch (TREE_CODE (type))
 {
 case RECORD_TYPE:
-  /* If it is a record, it is further determined whether its fileds have
- vector type.  */
+  /* If it is a record, it is further determined whether its fields have
+ vector type.  */
   for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
if (TREE_CODE (f) == FIELD_DECL)
  {
@@ -3835,6 +3835,8 @@ riscv_arg_has_vector (const_tree type)
   break;
 case ARRAY_TYPE:
   return riscv_arg_has_vector (TREE_TYPE (type));
+default:
+  break;
 }
   return false;
-- 
2.36.3
 


[SVE][match.pd] Fix ICE observed in PR110280

2023-06-20 Thread Prathamesh Kulkarni via Gcc-patches
Hi Richard,
For the following reduced test-case taken from PR:

#include "arm_sve.h"
svuint32_t l() {
  alignas(16) const unsigned int lanes[4] = {0, 0, 0, 0};
  return svld1rq_u32(svptrue_b8(), lanes);
}

compiling with -O3 -mcpu=generic+sve results in following ICE:
during GIMPLE pass: fre
pr110280.c: In function 'l':
pr110280.c:5:1: internal compiler error: in eliminate_stmt, at
tree-ssa-sccvn.cc:6890
5 | }
  | ^
0x865fb1 eliminate_dom_walker::eliminate_stmt(basic_block_def*,
gimple_stmt_iterator*)
../../gcc/gcc/tree-ssa-sccvn.cc:6890
0x120bf4d eliminate_dom_walker::before_dom_children(basic_block_def*)
../../gcc/gcc/tree-ssa-sccvn.cc:7324
0x120bf4d eliminate_dom_walker::before_dom_children(basic_block_def*)
../../gcc/gcc/tree-ssa-sccvn.cc:7257
0x1aeec77 dom_walker::walk(basic_block_def*)
../../gcc/gcc/domwalk.cc:311
0x11fd924 eliminate_with_rpo_vn(bitmap_head*)
../../gcc/gcc/tree-ssa-sccvn.cc:7504
0x1214664 do_rpo_vn_1
../../gcc/gcc/tree-ssa-sccvn.cc:8616
0x1215ba5 execute
../../gcc/gcc/tree-ssa-sccvn.cc:8702

cc1 simplifies:
  lanes[0] = 0;
  lanes[1] = 0;
  lanes[2] = 0;
  lanes[3] = 0;
  _1 = { -1, ... };
  _7 = svld1rq_u32 (_1, &lanes);

to:
  _9 = MEM  [(unsigned int * {ref-all})&lanes];
  _7 = VEC_PERM_EXPR <_9, _9, { 0, 1, 2, 3, ... }>;

and then fre1 dump shows:
Applying pattern match.pd:8675, generic-match-5.cc:9025
Match-and-simplified VEC_PERM_EXPR <_9, _9, { 0, 1, 2, 3, ... }> to {
0, 0, 0, 0 }
RHS VEC_PERM_EXPR <_9, _9, { 0, 1, 2, 3, ... }> simplified to { 0, 0, 0, 0 }

The issue seems to be with the following pattern:
(simplify
 (vec_perm vec_same_elem_p@0 @0 @1)
 @0)

which simplifies above VEC_PERM_EXPR to:
_7 = {0, 0, 0, 0}
which is incorrect since _9 and mask have different vector lengths.

The attached patch amends the pattern to simplify above VEC_PERM_EXPR
only if operand and mask have same number of elements, which seems to fix
the issue, and we're left with the following in .optimized dump:
   [local count: 1073741824]:
  _2 = VEC_PERM_EXPR <{ 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 1, 2, 3, ... }>;
  return _2;

code-gen:
l:
mov z0.b, #0
ret

Patch is bootstrapped+tested on aarch64-linux-gnu.
OK to commit ?

Thanks,
Prathamesh
[SVE][match.pd] Fix ICE observed in PR110280.

gcc/ChangeLog:
PR tree-optimization/110280
* match.pd (vec_perm_expr(v, v, mask) -> v): Simplify the pattern
only if operand and mask of VEC_PERM_EXPR have same number of
elements.

gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/pr110280.c: New test.

diff --git a/gcc/match.pd b/gcc/match.pd
index 2dd23826034..0eb5f8f0af6 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8669,10 +8669,11 @@ and,
  @0
  (if (uniform_vector_p (@0
 
-
 (simplify
  (vec_perm vec_same_elem_p@0 @0 @1)
- @0)
+ (if (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (@0)),
+   TYPE_VECTOR_SUBPARTS (TREE_TYPE (@1
+  @0))
 
 /* Push VEC_PERM earlier if that may help FMA perception (PR101895).  */
 (simplify
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr110280.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr110280.c
new file mode 100644
index 000..453c9cbcf9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr110280.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#include "arm_sve.h"
+
+svuint32_t l()
+{
+  _Alignas(16) const unsigned int lanes[4] = {0, 0, 0, 0};
+  return svld1rq_u32(svptrue_b8(), lanes);
+}


[PATCH][committed] aarch64: Optimise ADDP with same source operands

2023-06-20 Thread Kyrylo Tkachov via Gcc-patches
Hi all,

We've been asked to optimise the testcase in this patch of a 64-bit ADDP with
the low and high halves of the same 128-bit vector. This can be done by a
single .4s ADDP followed by just reading the bottom 64 bits. A splitter for
this is quite straightforward now that all the vec_concat stuff is collapsed
by simplify-rtx.

With this patch we generate a single:
addpv0.4s, v0.4s, v0.4s
instead of:
dup d31, v0.d[1]
addpv0.2s, v0.2s, v31.2s
ret

Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (*aarch64_addp_same_reg):
New define_insn_and_split.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/simd/addp-same-low_1.c: New test.


addp-q.patch
Description: addp-q.patch


Re: [libstdc++] Improve M_check_len

2023-06-20 Thread Jonathan Wakely via Gcc-patches
On Tue, 20 Jun 2023 at 09:21, Andreas Schwab wrote:

> On Jun 20 2023, Jakub Jelinek via Gcc-patches wrote:
>
> > Is it safe even on 64bit targets?  I mean, doesn't say PowerPC already
> allow
> > full 64-bit virtual address space?  The assumption that one can't have
> > more than half of virtual address space allocations is true right now at
> > least on x86-64, aarch64 and others, but isn't that something that can
> > change with newer versions of CPUs without the need to recompile
> > applications (add another level or two of page tables)?
>
> At least s390 can allocate more than half the address space.  That
> triggered a failure in gawk.
>

Is PTRDIFF_MAX large enough to represent the difference between any two
pointers?

What we're considering for libstdc++ is treating PTRDIFF_MAX as an upper
limit on allocation size. If there are targets that can really allocate a
2^63 byte array, they won't be able to represent the difference between the
first element and the last element unless ptrdiff_t is wider than 64 bits.


Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread Robin Dapp via Gcc-patches
> This little patch fixes a compile warning issue that my previous
> patch introduced, sorry for introducing this issue.

OK and obvious enough to push directly.

Regards
 Robin


[Patch] Fortran's gfc_match_char: %S to match symbol with host_assoc

2023-06-20 Thread Tobias Burnus

When just matching a symbol, one can use 'gfc_match_symbol (&sym, host_assoc)'
and has the option to match with and without host association.

However, when matching something more complex via 'gfc_match' like
"something ( %s ) , " the match uses host_assoc = false.
While it can be combined ("something (" + symbol + " ) ,"), this requires
keeping track of the previous location and resetting it.

It seems to be much simply to add a new flag supporting host_assoc = true,
which this patch does (using '%S'). The advantage is also that when looking
at the comment or at the "%s" implementation, it is clear that there are two
variants, making it less likely to choose the wrong matching.

OK for mainline?

Tobias

PS: I will use it in an upcoming OpenMP to parse 'uses_allocators'.
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
Fortran's gfc_match_char: %S to match symbol with host_assoc

gfc_match ("... %s ...", ...) matches a gfc_symbol but with
host_assoc = 0. This commit adds '%S' as variant which matches
with host_assoc = 1

gcc/fortran/ChangeLog:

	* match.cc (gfc_match_char): Match with '%S' a symbol
	with host_assoc = 1.

diff --git a/gcc/fortran/match.cc b/gcc/fortran/match.cc
index e7be7fddc64..a350ebf754e 100644
--- a/gcc/fortran/match.cc
+++ b/gcc/fortran/match.cc
@@ -1084,7 +1084,8 @@ gfc_match_char (char c, bool gobble_ws)
 
%%  Literal percent sign
%e  Expression, pointer to a pointer is set
-   %s  Symbol, pointer to the symbol is set
+   %s  Symbol, pointer to the symbol is set (host_assoc = 0)
+   %S  Symbol, pointer to the symbol is set (host_assoc = 1)
%n  Name, character buffer is set to name
%t  Matches end of statement.
%o  Matches an intrinsic operator, returned as an INTRINSIC enum.
@@ -1151,8 +1152,9 @@ loop:
 	  goto loop;
 
 	case 's':
+	case 'S':
 	  vp = va_arg (argp, void **);
-	  n = gfc_match_symbol ((gfc_symbol **) vp, 0);
+	  n = gfc_match_symbol ((gfc_symbol **) vp, c == 'S');
 	  if (n != MATCH_YES)
 	{
 	  m = n;



[PATCH] Update virtual SSA form manually where easily possible in phiprop

2023-06-20 Thread Richard Biener via Gcc-patches
This keeps virtual SSA form up-to-date in phiprop when easily possible.
Only when we deal with aggregate copies the work would be too
heavy-handed in general.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-ssa-phiprop.cc (phiprop_insert_phi): For simple loads
keep the virtual SSA form up-to-date.
---
 gcc/tree-ssa-phiprop.cc | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-phiprop.cc b/gcc/tree-ssa-phiprop.cc
index 5dc505df420..21a349a25e2 100644
--- a/gcc/tree-ssa-phiprop.cc
+++ b/gcc/tree-ssa-phiprop.cc
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-iterator.h"
 #include "stor-layout.h"
 #include "tree-ssa-loop.h"
+#include "tree-cfg.h"
 
 /* This pass propagates indirect loads through the PHI node for its
address to make the load source possibly non-addressable and to
@@ -153,6 +154,8 @@ phiprop_insert_phi (basic_block bb, gphi *phi, gimple 
*use_stmt,
   print_gimple_stmt (dump_file, use_stmt, 0);
 }
 
+  gphi *vphi = get_virtual_phi (bb);
+
   /* Add PHI arguments for each edge inserting loads of the
  addressable operands.  */
   FOR_EACH_EDGE (e, ei, bb->preds)
@@ -190,9 +193,20 @@ phiprop_insert_phi (basic_block bb, gphi *phi, gimple 
*use_stmt,
{
  tree rhs = gimple_assign_rhs1 (use_stmt);
  gcc_assert (TREE_CODE (old_arg) == ADDR_EXPR);
+ tree vuse = NULL_TREE;
  if (TREE_CODE (res) == SSA_NAME)
-   new_var = make_ssa_name (TREE_TYPE (rhs));
+   {
+ new_var = make_ssa_name (TREE_TYPE (rhs));
+ if (vphi)
+   vuse = PHI_ARG_DEF_FROM_EDGE (vphi, e);
+ else
+   vuse = gimple_vuse (use_stmt);
+   }
  else
+   /* For the aggregate copy case updating virtual operands
+  we'd have to possibly insert a virtual PHI and we have
+  to split the existing VUSE lifetime.  Leave that to
+  the generic SSA updating.  */
new_var = unshare_expr (res);
  if (!is_gimple_min_invariant (old_arg))
old_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
@@ -203,6 +217,8 @@ phiprop_insert_phi (basic_block bb, gphi *phi, gimple 
*use_stmt,
  old_arg,
  TREE_OPERAND (rhs, 1)));
  gimple_set_location (tmp, locus);
+ if (vuse)
+   gimple_set_vuse (tmp, vuse);
 
  gsi_insert_on_edge (e, tmp);
  update_stmt (tmp);
-- 
2.35.3


Re: [libstdc++] Improve M_check_len

2023-06-20 Thread Jonathan Wakely via Gcc-patches
On Tue, 20 Jun 2023 at 11:45, Jonathan Wakely  wrote:

> On Tue, 20 Jun 2023 at 09:21, Andreas Schwab wrote:
>
>> On Jun 20 2023, Jakub Jelinek via Gcc-patches wrote:
>>
>> > Is it safe even on 64bit targets?  I mean, doesn't say PowerPC already
>> allow
>> > full 64-bit virtual address space?  The assumption that one can't have
>> > more than half of virtual address space allocations is true right now at
>> > least on x86-64, aarch64 and others, but isn't that something that can
>> > change with newer versions of CPUs without the need to recompile
>> > applications (add another level or two of page tables)?
>>
>> At least s390 can allocate more than half the address space.  That
>> triggered a failure in gawk.
>>
>
> Is PTRDIFF_MAX large enough to represent the difference between any two
> pointers?
>
> What we're considering for libstdc++ is treating PTRDIFF_MAX as an upper
> limit on allocation size. If there are targets that can really allocate a
> 2^63 byte array, they won't be able to represent the difference between the
> first element and the last element unless ptrdiff_t is wider than 64 bits.
>

Of course if we're talking about 32-bit targets then you only need a 64-bit
ptrdiff_t to allow arrays larger than 2^31 bytes. In any case, PTRDIFF_MAX
is still an upper limit (although for a 64-bit ptrdiff_t and a 32-bit
address space, it's not a useful limit, because it's much much larger than
the real limit).


Re: Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread juzhe.zh...@rivai.ai
Could you merge it ?
By the way, could Lehua get the write access?

Thanks.


juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-06-20 18:47
To: Lehua Ding; gcc-patches
CC: rdapp.gcc; juzhe.zhong; kito.cheng; palmer; jeffreyalaw; pan2.li
Subject: Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector
> This little patch fixes a compile warning issue that my previous
> patch introduced, sorry for introducing this issue.
 
OK and obvious enough to push directly.
 
Regards
Robin
 


Re: [Patch, fortran] PR107900 Select type with intrinsic type inside associate causes ICE / Segmenation fault

2023-06-20 Thread Paul Richard Thomas via Gcc-patches
Hi Harald,

Fixing the original testcase in this PR turned out to be slightly more
involved than I expected. However, it resulted in an open door to fix
some other PRs and the attached much larger patch.

This time, I did remember to include the testcases in the .diff :-)

I believe that, between the Change.Logs and the comments, it is
reasonably self-explanatory.

OK for trunk?

Regards

Paul

Fortran: Fix some bugs in associate [PR87477]

2023-06-20  Paul Thomas  

gcc/fortran
PR fortran/87477
PR fortran/88688
PR fortran/94380
PR fortran/107900
PR fortran/110224
* decl.cc (char_len_param_value): Fix memory leak.
(resolve_block_construct): Remove unnecessary static decls.
* expr.cc (gfc_is_ptr_fcn): New function.
(gfc_check_vardef_context): Use it to permit pointer function
result selectors to be used for associate names in variable
definition context.
* gfortran.h: Prototype for gfc_is_ptr_fcn.
* match.cc (build_associate_name): New function.
(gfc_match_select_type): Use the new function to replace inline
version and to build a new associate name for the case where
the supplied associate name is already used for that purpose.
* resolve.cc (resolve_assoc_var): Call gfc_is_ptr_fcn to allow
associate names with pointer function targets to be used in
variable definition context.
* trans-decl.cc (gfc_get_symbol_decl): Unlimited polymorphic
variables need deferred initialisation of the vptr.
(gfc_trans_deferred_vars): Do the vptr initialisation.
* trans-stmt.cc (trans_associate_var): Ensure that a pointer
associate name points to the target of the selector and not
the selector itself.

gcc/testsuite/
PR fortran/87477
PR fortran/107900
* gfortran.dg/pr107900.f90 : New test

PR fortran/110224
* gfortran.dg/pr110224.f90 : New test

PR fortran/88688
* gfortran.dg/pr88688.f90 : New test

PR fortran/94380
* gfortran.dg/pr94380.f90 : New test

PR fortran/95398
* gfortran.dg/pr95398.f90 : Set -std=f2008, bump the line
numbers in the error tests by two and change the text in two.
diff --git a/gcc/fortran/decl.cc b/gcc/fortran/decl.cc
index d09c8bc97d9..844345df77e 100644
--- a/gcc/fortran/decl.cc
+++ b/gcc/fortran/decl.cc
@@ -1086,6 +1086,8 @@ char_len_param_value (gfc_expr **expr, bool *deferred)
   p = gfc_copy_expr (*expr);
   if (gfc_is_constant_expr (p) && gfc_simplify_expr (p, 1))
 gfc_replace_expr (*expr, p);
+  else
+gfc_free_expr (p);

   if ((*expr)->expr_type == EXPR_FUNCTION)
 {
diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc
index d5cfbe0cc55..c960dfeabd9 100644
--- a/gcc/fortran/expr.cc
+++ b/gcc/fortran/expr.cc
@@ -812,6 +812,16 @@ gfc_has_vector_index (gfc_expr *e)
 }


+bool
+gfc_is_ptr_fcn (gfc_expr *e)
+{
+  return e != NULL && e->expr_type == EXPR_FUNCTION
+	  && (gfc_expr_attr (e).pointer
+		  || (e->ts.type == BT_CLASS
+		  && CLASS_DATA (e)->attr.class_pointer));
+}
+
+
 /* Copy a shape array.  */

 mpz_t *
@@ -6470,6 +6480,22 @@ gfc_check_vardef_context (gfc_expr* e, bool pointer, bool alloc_obj,
 	}
 	  return false;
 	}
+  else if (context && gfc_is_ptr_fcn (assoc->target))
+	{
+	  if (!gfc_notify_std (GFC_STD_F2018, "%qs at %L associated to "
+			   "pointer function target being used in a "
+			   "variable definition context (%s)", name,
+			   &e->where, context))
+	return false;
+	  else if (gfc_has_vector_index (e))
+	{
+	  gfc_error ("%qs at %L associated to vector-indexed target"
+			 " cannot be used in a variable definition"
+			 " context (%s)",
+			 name, &e->where, context);
+	  return false;
+	}
+	}

   /* Target must be allowed to appear in a variable definition context.  */
   if (!gfc_check_vardef_context (assoc->target, pointer, false, false, NULL))
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index a58c60e9828..30631abd788 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -3659,6 +3659,7 @@ bool gfc_is_constant_expr (gfc_expr *);
 bool gfc_simplify_expr (gfc_expr *, int);
 bool gfc_try_simplify_expr (gfc_expr *, int);
 bool gfc_has_vector_index (gfc_expr *);
+bool gfc_is_ptr_fcn (gfc_expr *);

 gfc_expr *gfc_get_expr (void);
 gfc_expr *gfc_get_array_expr (bt type, int kind, locus *);
diff --git a/gcc/fortran/match.cc b/gcc/fortran/match.cc
index e7be7fddc64..0e4b5440393 100644
--- a/gcc/fortran/match.cc
+++ b/gcc/fortran/match.cc
@@ -6377,6 +6377,39 @@ build_class_sym:
 }


+/* Build the associate name  */
+static int
+build_associate_name (const char *name, gfc_expr **e1, gfc_expr **e2)
+{
+  gfc_expr *expr1 = *e1;
+  gfc_expr *expr2 = *e2;
+  gfc_symbol *sym;
+
+  /* For the case where the associate name is already an associate name.  */
+  if (!expr2)
+expr2 = expr1;
+  expr1 = gfc_get_expr ();
+  expr1->expr_type = EXPR_VARIABLE;
+  expr1->where = expr2->where;
+  if (gfc_get_sym_tree (name, NULL, &expr1->symtree, false))
+return 1;
+
+  sym = expr1->symtree->n.sym;
+  if (expr2->ts.type == BT_UNKNOWN)
+  sym->attr.untyped = 1;
+  else
+  copy_ts_from_

Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread Robin Dapp via Gcc-patches
> Could you merge it ?
> By the way, could Lehua get the write access?

IMHO nothing stands in the way but I'll defer to Jeff to have
the "official seal" :)
Once he ACKs Lehua needs to go the usual way of requesting
sourceware access via https://sourceware.org/cgi-bin/pdw/ps_form.cgi.

Regards
 Robin



Re: [Patch] Fortran's gfc_match_char: %S to match symbol with host_assoc

2023-06-20 Thread Paul Richard Thomas via Gcc-patches
Hi Tobias,

This looks good to me. I'm interested to see it in use :-)

OK for trunk

Paul

On Tue, 20 Jun 2023 at 11:50, Tobias Burnus  wrote:
>
> When just matching a symbol, one can use 'gfc_match_symbol (&sym, host_assoc)'
> and has the option to match with and without host association.
>
> However, when matching something more complex via 'gfc_match' like
> "something ( %s ) , " the match uses host_assoc = false.
> While it can be combined ("something (" + symbol + " ) ,"), this requires
> keeping track of the previous location and resetting it.
>
> It seems to be much simply to add a new flag supporting host_assoc = true,
> which this patch does (using '%S'). The advantage is also that when looking
> at the comment or at the "%s" implementation, it is clear that there are two
> variants, making it less likely to choose the wrong matching.
>
> OK for mainline?
>
> Tobias
>
> PS: I will use it in an upcoming OpenMP to parse 'uses_allocators'.
> -
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
> München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
> Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
> München, HRB 106955



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Re: [SVE][match.pd] Fix ICE observed in PR110280

2023-06-20 Thread Richard Biener via Gcc-patches
On Tue, Jun 20, 2023 at 11:56 AM Prathamesh Kulkarni via Gcc-patches
 wrote:
>
> Hi Richard,
> For the following reduced test-case taken from PR:
>
> #include "arm_sve.h"
> svuint32_t l() {
>   alignas(16) const unsigned int lanes[4] = {0, 0, 0, 0};
>   return svld1rq_u32(svptrue_b8(), lanes);
> }
>
> compiling with -O3 -mcpu=generic+sve results in following ICE:
> during GIMPLE pass: fre
> pr110280.c: In function 'l':
> pr110280.c:5:1: internal compiler error: in eliminate_stmt, at
> tree-ssa-sccvn.cc:6890
> 5 | }
>   | ^
> 0x865fb1 eliminate_dom_walker::eliminate_stmt(basic_block_def*,
> gimple_stmt_iterator*)
> ../../gcc/gcc/tree-ssa-sccvn.cc:6890
> 0x120bf4d eliminate_dom_walker::before_dom_children(basic_block_def*)
> ../../gcc/gcc/tree-ssa-sccvn.cc:7324
> 0x120bf4d eliminate_dom_walker::before_dom_children(basic_block_def*)
> ../../gcc/gcc/tree-ssa-sccvn.cc:7257
> 0x1aeec77 dom_walker::walk(basic_block_def*)
> ../../gcc/gcc/domwalk.cc:311
> 0x11fd924 eliminate_with_rpo_vn(bitmap_head*)
> ../../gcc/gcc/tree-ssa-sccvn.cc:7504
> 0x1214664 do_rpo_vn_1
> ../../gcc/gcc/tree-ssa-sccvn.cc:8616
> 0x1215ba5 execute
> ../../gcc/gcc/tree-ssa-sccvn.cc:8702
>
> cc1 simplifies:
>   lanes[0] = 0;
>   lanes[1] = 0;
>   lanes[2] = 0;
>   lanes[3] = 0;
>   _1 = { -1, ... };
>   _7 = svld1rq_u32 (_1, &lanes);
>
> to:
>   _9 = MEM  [(unsigned int * {ref-all})&lanes];
>   _7 = VEC_PERM_EXPR <_9, _9, { 0, 1, 2, 3, ... }>;
>
> and then fre1 dump shows:
> Applying pattern match.pd:8675, generic-match-5.cc:9025
> Match-and-simplified VEC_PERM_EXPR <_9, _9, { 0, 1, 2, 3, ... }> to {
> 0, 0, 0, 0 }
> RHS VEC_PERM_EXPR <_9, _9, { 0, 1, 2, 3, ... }> simplified to { 0, 0, 0, 0 }
>
> The issue seems to be with the following pattern:
> (simplify
>  (vec_perm vec_same_elem_p@0 @0 @1)
>  @0)
>
> which simplifies above VEC_PERM_EXPR to:
> _7 = {0, 0, 0, 0}
> which is incorrect since _9 and mask have different vector lengths.
>
> The attached patch amends the pattern to simplify above VEC_PERM_EXPR
> only if operand and mask have same number of elements, which seems to fix
> the issue, and we're left with the following in .optimized dump:
>[local count: 1073741824]:
>   _2 = VEC_PERM_EXPR <{ 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 1, 2, 3, ... }>;

it would be nice to have this optimized.

-
 (simplify
  (vec_perm vec_same_elem_p@0 @0 @1)
- @0)
+ (if (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (@0)),
+   TYPE_VECTOR_SUBPARTS (TREE_TYPE (@1
+  @0))

that looks good I think.  Maybe even better use 'type' instead of TREE_TYPE (@1)
since that's more obviously the return type in which case

  (if (types_match (type, TREE_TYPE (@0))

would be more to the point.

But can't you to simplify this in the !known_eq case do a simple

  { build_vector_from_val (type, the-element); }

?  The 'vec_same_elem_p' predicate doesn't get you at the element,

 (with { tree el = uniform_vector_p (@0); }
  (if (el)
   { build_vector_from_val (type, el); })))

would be the cheapest workaround.

>   return _2;
>
> code-gen:
> l:
> mov z0.b, #0
> ret
>
> Patch is bootstrapped+tested on aarch64-linux-gnu.
> OK to commit ?
>
> Thanks,
> Prathamesh


[COMMITTED] ada: Remove outdated comment

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Ronan Desplanques 

The Preelaborate pragma the removed comment was referring to was
indeed present in AI 167, as well as in clause 5.3 of the rationale
for Ada 2012, but it never made it into the 2012 version of the
reference manual.

gcc/ada/

* libgnarl/s-mudido.ads: Remove outdated comment.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnarl/s-mudido.ads | 4 
 1 file changed, 4 deletions(-)

diff --git a/gcc/ada/libgnarl/s-mudido.ads b/gcc/ada/libgnarl/s-mudido.ads
index 06e48bd1b9c..cc97463077b 100644
--- a/gcc/ada/libgnarl/s-mudido.ads
+++ b/gcc/ada/libgnarl/s-mudido.ads
@@ -20,10 +20,6 @@ with Ada.Task_Identification;
 private with System.Tasking;
 
 package System.Multiprocessors.Dispatching_Domains is
-   --  pragma Preelaborate (Dispatching_Domains);
-   --  ??? According to AI 167 this unit should be preelaborate, but it cannot
-   --  be preelaborate because it depends on Ada.Real_Time which is not
-   --  preelaborate.
 
Dispatching_Domain_Error : exception;
 
-- 
2.40.0



[COMMITTED] ada: Fix crash on inlining in GNATprove

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Yannick Moy 

After the recent change on detection of non-inlining, calls inside
the iterator part of a quantified expression were not considered
as preventing inlining anymore, leading to a crash later on inside
GNATprove. Now fixed.

gcc/ada/

* sem_res.adb (Resolve_Call): Fix change that replaced test for
quantified expressions by the test for potentially unevaluated
contexts. Both should be performed.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_res.adb | 9 +
 1 file changed, 9 insertions(+)

diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb
index f4dfc041cd6..2c8efec524b 100644
--- a/gcc/ada/sem_res.adb
+++ b/gcc/ada/sem_res.adb
@@ -7300,6 +7300,15 @@ package body Sem_Res is
   ("cannot inline & (in potentially unevaluated context)?",
N, Nam_UA);
 
+--  Calls are not inlined inside the loop_parameter_specification
+--  or iterator_specification of the quantified expression, as they
+--  are only preanalyzed. Calls in the predicate part are handled
+--  by the previous test on potentially unevaluated expressions.
+
+elsif In_Quantified_Expression (N) then
+   Cannot_Inline
+ ("cannot inline & (in quantified expression)?", N, Nam_UA);
+
 --  Inlining should not be performed during preanalysis
 
 elsif Full_Analysis then
-- 
2.40.0



[COMMITTED] ada: Minor tweaks

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

gcc/ada/

* gcc-interface/decl.cc (gnat_to_gnu_entity) : Pass
the NULL_TREE explicitly and test imported_p in lieu of
Is_Imported. : Remove public_flag local variable and
make extern_flag local variable a constant.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/decl.cc | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
index b2b77787bc0..494b24e2111 100644
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -1162,7 +1162,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, 
bool definition)
 
gnu_expr = build_unary_op (ADDR_EXPR, gnu_type, gnu_expr);
 
-   create_var_decl (gnu_entity_name, gnu_ext_name,
+   create_var_decl (gnu_entity_name, NULL_TREE,
 TREE_TYPE (gnu_expr), gnu_expr,
 const_flag, Is_Public (gnat_entity),
 imported_p, static_flag, volatile_flag,
@@ -1533,7 +1533,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, 
bool definition)
 
/* If this name is external or a name was specified, use it, but don't
   use the Interface_Name with an address clause (see cd30005).  */
-   if ((Is_Public (gnat_entity) && !Is_Imported (gnat_entity))
+   if ((Is_Public (gnat_entity) && !imported_p)
|| (Present (Interface_Name (gnat_entity))
&& No (Address_Clause (gnat_entity
  gnu_ext_name = create_concat_name (gnat_entity, NULL);
@@ -3977,10 +3977,9 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree 
gnu_expr, bool definition)
  = gnu_ext_name_for_subprog (gnat_entity, gnu_entity_name);
const enum inline_status_t inline_status
  = inline_status_for_subprog (gnat_entity);
-   bool public_flag = Is_Public (gnat_entity) || imported_p;
/* Subprograms marked both Intrinsic and Always_Inline need not
   have a body of their own.  */
-   bool extern_flag
+   const bool extern_flag
  = ((Is_Public (gnat_entity) && !definition)
 || imported_p
 || (Is_Intrinsic_Subprogram (gnat_entity)
@@ -4135,10 +4134,9 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree 
gnu_expr, bool definition)
else
  gnu_decl
= create_subprog_decl (gnu_entity_name, gnu_ext_name,
-  gnu_type, gnu_param_list,
-  inline_status, public_flag,
-  extern_flag, artificial_p,
-  debug_info_p,
+  gnu_type, gnu_param_list, inline_status,
+  Is_Public (gnat_entity) || imported_p,
+  extern_flag, artificial_p, debug_info_p,
   definition && imported_p, attr_list,
   gnat_entity);
  }
-- 
2.40.0



[COMMITTED] ada: Further fixes to handling of private views in instances

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

This removes more bypasses for private views in instances that are present
in type predicates (Conforming_Types, Covers, Specific_Type and Wrong_Type),
which in exchange requires additional work in Sem_Ch12 to restore the proper
view of types during the instantiation of generic bodies.

The main mechanism for this is the Has_Private_View flag, but it comes with
the limitations that 1) there must be a direct reference to the global type
in the generic construct (either a reference to a global object of this type
or the explicit declaration of a local object of this type), which is not
always the case e.g. for loop parameters and 2) it can deal with a single
type at a time, e.g. it cannot deal with an array type and its component
type if their respective views are not the same in the instance.

To overcome the second limitation, a new Has_Secondary_Private_View flag
is introduced to deal with a secondary type, which as of this writing is
either the component type of an array type or the designated type of an
access type (together they make up the vast majority of the problematic
cases for the Has_Private_View flag alone). This new mechanism subsumes
a specific treatment for them that was added in Copy_Generic_Node a few
years ago, although a specific treatment still needs to be preserved for
comparison and equality operators in a narrower case.

Additional handling is also introduced to overcome the first limitation
for loop parameters in Copy_Generic_Node, and a relaxed condition is used
in Exp_Ch7.Convert_View to generate an unchecked conversion between views.

gcc/ada/

* exp_ch7.adb (Convert_View): Detect more cases of mismatches for
private types and use Implementation_Base_Type as main criterion.
* gen_il-fields.ads (Opt_Field_Enum): Add
Has_Secondary_Private_View
* gen_il-gen-gen_nodes.adb (N_Expanded_Name): Likewise.
(N_Direct_Name): Likewise.
(N_Op): Likewise.
* sem_ch12.ads (Check_Private_View): Document the usage of second
flag Has_Secondary_Private_View.
* sem_ch12.adb (Get_Associated_Entity): New function to retrieve
the ultimate associated entity, if any.
(Check_Private_View): Implement Has_Secondary_Private_View
support.
(Copy_Generic_Node): Remove specific treatment for Component_Type
of an array type and Designated_Type of an access type. Add
specific treatment for comparison and equality operators, as well
as iterator and loop parameter specifications.
(Instantiate_Type): Implement Has_Secondary_Private_View support.
(Requires_Delayed_Save): Call Get_Associated_Entity.
(Set_Global_Type): Implement Has_Secondary_Private_View support.
* sem_ch6.adb (Conforming_Types): Remove bypass for private views
in instances.
* sem_type.adb (Covers): Return true if Is_Subtype_Of does so.
Remove bypass for private views in instances.
(Specific_Type): Likewise.
* sem_util.adb (Wrong_Type): Likewise.
* sinfo.ads (Has_Secondary_Private_View): Document new flag.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch7.adb  |  12 +-
 gcc/ada/gen_il-fields.ads|   1 +
 gcc/ada/gen_il-gen-gen_nodes.adb |   7 +-
 gcc/ada/sem_ch12.adb | 295 ---
 gcc/ada/sem_ch12.ads |   4 +-
 gcc/ada/sem_ch6.adb  |  17 +-
 gcc/ada/sem_type.adb |  31 ++--
 gcc/ada/sem_util.adb |  50 --
 gcc/ada/sinfo.ads|  39 ++--
 9 files changed, 219 insertions(+), 237 deletions(-)

diff --git a/gcc/ada/exp_ch7.adb b/gcc/ada/exp_ch7.adb
index f82301c0acd..1b16839ddf3 100644
--- a/gcc/ada/exp_ch7.adb
+++ b/gcc/ada/exp_ch7.adb
@@ -4413,11 +4413,13 @@ package body Exp_Ch7 is
   if Is_Abstract_Subprogram (Proc) and then Is_Tagged_Type (Ftyp) then
  return Unchecked_Convert_To (Class_Wide_Type (Ftyp), Arg);
 
-  elsif Ftyp /= Atyp
-and then Present (Atyp)
-and then (Is_Private_Type (Ftyp) or else Is_Private_Type (Atyp))
-and then Base_Type (Underlying_Type (Atyp)) =
- Base_Type (Underlying_Type (Ftyp))
+  elsif Present (Atyp)
+and then Atyp /= Ftyp
+and then (Is_Private_Type (Ftyp)
+   or else Is_Private_Type (Atyp)
+   or else Is_Private_Type (Base_Type (Atyp)))
+and then Implementation_Base_Type (Atyp) =
+ Implementation_Base_Type (Ftyp)
   then
  return Unchecked_Convert_To (Ftyp, Arg);
 
diff --git a/gcc/ada/gen_il-fields.ads b/gcc/ada/gen_il-fields.ads
index c62523d9075..a017f45d9a6 100644
--- a/gcc/ada/gen_il-fields.ads
+++ b/gcc/ada/gen_il-fields.ads
@@ -210,6 +210,7 @@ package Gen_IL.Fields is
   Has_Pragma_Suppress_All,
   Has_Private_View,
   Has_Relative_Deadline_Pragma,
+  Has_Secondary_Private_View,
   

[committed] Fortran: Fix parse-dump-tree for OpenMP ALLOCATE clause

2023-06-20 Thread Tobias Burnus

A rather obvious fix. The allocator(...) bit showed up with -fdump-parse-tree.

Committed as Rev. r14-1988-g99e3214f582b08

Side remark:

Regarding the example inside the commit log: OpenMP 5.1 permitted derived-type
components. Due to global wording changes, OpenMP 5.2 disabled it for both
the new 'allocators' and for the old 'allocate' directive. This turned out to be
an accidental change and it will be permitted in a future OpenMP version again
(for 'allocators' as the executable form of the 'allocate' directive was 
deprecated
and then removed; in GCC, we will have to support it for both for legacy support
reasons). – While n->expr is now support for the dump, it is currently 
unreachable
as dt-component parsing is disabled for both allocate and allocators. (To be 
changed.)

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit 99e3214f582b08b69b11b53eb3fc73b0919ef4f1
Author: Tobias Burnus 
Date:   Tue Jun 20 13:46:11 2023 +0200

Fortran: Fix parse-dump-tree for OpenMP ALLOCATE clause

Commit r14-1301-gd64e8e1224708e added u2.allocator to gfc_omp_namelist
for better readability and to permit to use namelist->expr for code
like the following:
  !$omp allocators allocate(align(32) : dt%alloc_comp)
allocate (dt%alloc_comp(5))
  !$omp allocate(dt%alloc_comp2) align(64)
allocate (dt%alloc_comp2(10))
However, for the parse-tree dump the change was incomplete.

gcc/fortran/ChangeLog:

* dump-parse-tree.cc (show_omp_namelist): Fix dump of the allocator
modifier of OMP_LIST_ALLOCATE.

diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc
index 99c8bdaadce..effcebe9325 100644
--- a/gcc/fortran/dump-parse-tree.cc
+++ b/gcc/fortran/dump-parse-tree.cc
@@ -1370,31 +1370,34 @@ show_omp_namelist (int list_type, gfc_omp_namelist *n)
 		  fputc (list_type == OMP_LIST_AFFINITY ? ':' : ',', dumpfile);
 		}
 	}
 	  ns_iter = n->u2.ns;
 	}
   if (list_type == OMP_LIST_ALLOCATE)
 	{
-	  if (n->expr)
+	  if (n->u2.allocator)
 	{
 	  fputs ("allocator(", dumpfile);
 	  show_expr (n->u2.allocator);
 	  fputc (')', dumpfile);
 	}
 	  if (n->expr && n->u.align)
 	fputc (',', dumpfile);
 	  if (n->u.align)
 	{
 	  fputs ("align(", dumpfile);
 	  show_expr (n->u.align);
 	  fputc (')', dumpfile);
 	}
-	  if (n->expr || n->u.align)
+	  if (n->u2.allocator || n->u.align)
 	fputc (':', dumpfile);
-	  fputs (n->sym->name, dumpfile);
+	  if (n->expr)
+	show_expr (n->expr);
+	  else
+	fputs (n->sym->name, dumpfile);
 	  if (n->next)
 	fputs (") ALLOCATE(", dumpfile);
 	  continue;
 	}
   if (list_type == OMP_LIST_REDUCTION)
 	switch (n->u.reduction_op)
 	  {


Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Robin Dapp via Gcc-patches
> Committed, thanks Jeff.

The vec_set/vec_extract tests FAIL since this commit.  I'm going to
commit the attached as obvious.

Lehua, would they not show up in your test runs?  You fixed several
other tests but these somehow not?

Regards
 Robin

Subject: [PATCH] RISC-V: testsuite: Add -Wno-psabi to vec_set/vec_extract
 testcases.

This fixes some fallout from the recent psabi changes.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c: Add
-Wno-psabi.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c:
Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-run.c: Dito.
---
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-run.c| 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c
index 1a6e6dd83ee..34efd5f700a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c
index 884c38e0bd8..5f3168a320a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c
index 844ad392df0..7210327a4ff 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c
index 04c234e7d2d..c5cb56a88c7 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c
index dd22dae5eb9..43110c0bb8d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "-std=c99 -Wno-pedantic" } */
+/* { dg-additional-options "-std=c99 -Wno-pedantic -Wno-psabi" } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
index 4fb4e822b93..28f11150f8f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.

Re: [PATCH] VECT: Apply LEN_MASK_{LOAD,STORE} into vectorizer

2023-06-20 Thread Richard Biener via Gcc-patches
On Tue, 20 Jun 2023, juzhe.zh...@rivai.ai wrote:

> From: Ju-Zhe Zhong 
> 
> This patch is apply LEN_MASK_{LOAD,STORE} into vectorizer.
> I refactor gimple IR build to make codes look cleaner.
> 
> gcc/ChangeLog:
> 
> * internal-fn.cc (expand_partial_store_optab_fn): Add 
> LEN_MASK_{LOAD,STORE} vectorizer support.
> (internal_load_fn_p): Ditto.
> (internal_store_fn_p): Ditto.
> (internal_fn_mask_index): Ditto.
> (internal_fn_stored_value_index): Ditto.
> (internal_len_load_store_bias): Ditto.
> * optabs-query.cc (can_vec_mask_load_store_p): Ditto.
> (get_len_load_store_mode): Ditto.
> * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
> (get_all_ones_mask): New function.
> (vectorizable_store): Add LEN_MASK_{LOAD,STORE} vectorizer support.
> (vectorizable_load): Ditto.
> 
> ---
>  gcc/internal-fn.cc |  35 +-
>  gcc/optabs-query.cc|  25 +++-
>  gcc/tree-vect-stmts.cc | 259 +
>  3 files changed, 213 insertions(+), 106 deletions(-)
> 
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index c911ae790cb..e10c21de5f1 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -2949,7 +2949,7 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, 
> convert_optab optab)
>   * OPTAB.  */
>  
>  static void
> -expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
> +expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab 
> optab)
>  {
>class expand_operand ops[5];
>tree type, lhs, rhs, maskt, biast;
> @@ -2957,7 +2957,7 @@ expand_partial_store_optab_fn (internal_fn, gcall 
> *stmt, convert_optab optab)
>insn_code icode;
>  
>maskt = gimple_call_arg (stmt, 2);
> -  rhs = gimple_call_arg (stmt, 3);
> +  rhs = gimple_call_arg (stmt, internal_fn_stored_value_index (ifn));
>type = TREE_TYPE (rhs);
>lhs = expand_call_mem_ref (type, stmt, 0);
>  
> @@ -4435,6 +4435,7 @@ internal_load_fn_p (internal_fn fn)
>  case IFN_GATHER_LOAD:
>  case IFN_MASK_GATHER_LOAD:
>  case IFN_LEN_LOAD:
> +case IFN_LEN_MASK_LOAD:
>return true;
>  
>  default:
> @@ -4455,6 +4456,7 @@ internal_store_fn_p (internal_fn fn)
>  case IFN_SCATTER_STORE:
>  case IFN_MASK_SCATTER_STORE:
>  case IFN_LEN_STORE:
> +case IFN_LEN_MASK_STORE:
>return true;
>  
>  default:
> @@ -4494,6 +4496,10 @@ internal_fn_mask_index (internal_fn fn)
>  case IFN_MASK_STORE_LANES:
>return 2;
>  
> +case IFN_LEN_MASK_LOAD:
> +case IFN_LEN_MASK_STORE:
> +  return 3;
> +
>  case IFN_MASK_GATHER_LOAD:
>  case IFN_MASK_SCATTER_STORE:
>return 4;
> @@ -4519,6 +4525,9 @@ internal_fn_stored_value_index (internal_fn fn)
>  case IFN_LEN_STORE:
>return 3;
>  
> +case IFN_LEN_MASK_STORE:
> +  return 4;
> +
>  default:
>return -1;
>  }
> @@ -4583,13 +4592,31 @@ internal_len_load_store_bias (internal_fn ifn, 
> machine_mode mode)
>  {
>optab optab = direct_internal_fn_optab (ifn);
>insn_code icode = direct_optab_handler (optab, mode);
> +  int bias_argno = 3;
> +  if (icode == CODE_FOR_nothing)
> +{
> +  machine_mode mask_mode
> + = targetm.vectorize.get_mask_mode (mode).require ();
> +  if (ifn == IFN_LEN_LOAD)
> + {
> +   /* Try LEN_MASK_LOAD.  */
> +   optab = direct_internal_fn_optab (IFN_LEN_MASK_LOAD);
> + }
> +  else
> + {
> +   /* Try LEN_MASK_STORE.  */
> +   optab = direct_internal_fn_optab (IFN_LEN_MASK_STORE);
> + }
> +  icode = convert_optab_handler (optab, mode, mask_mode);
> +  bias_argno = 4;
> +}
>  
>if (icode != CODE_FOR_nothing)
>  {
>/* For now we only support biases of 0 or -1.  Try both of them.  */
> -  if (insn_operand_matches (icode, 3, GEN_INT (0)))
> +  if (insn_operand_matches (icode, bias_argno, GEN_INT (0)))
>   return 0;
> -  if (insn_operand_matches (icode, 3, GEN_INT (-1)))
> +  if (insn_operand_matches (icode, bias_argno, GEN_INT (-1)))
>   return -1;
>  }
>  
> diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> index 276f8408dd7..4394d391200 100644
> --- a/gcc/optabs-query.cc
> +++ b/gcc/optabs-query.cc
> @@ -566,11 +566,14 @@ can_vec_mask_load_store_p (machine_mode mode,
>  bool is_load)
>  {
>optab op = is_load ? maskload_optab : maskstore_optab;
> +  optab len_op = is_load ? len_maskload_optab : len_maskstore_optab;
>machine_mode vmode;
>  
>/* If mode is vector mode, check it directly.  */
>if (VECTOR_MODE_P (mode))
> -return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing;
> +return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing
> +|| convert_optab_handler (len_op, mode, mask_mode)
> + != CODE_FOR_nothing;
>  
>/* Otherwise, return true if there is som

Re: [PATCH][RFC] c-family: Implement __has_feature and __has_extension [PR60512]

2023-06-20 Thread Alex Coplan via Gcc-patches
Hi Iain,

On 14/05/2023 17:05, Iain Sandoe wrote:
> Hi Alex,
> 
> thanks for working on this.
> 
> I’ve applied this patch and evaluated on a few Darwin versions (which is the
> target currently most affected, I believe):
> 
> > On 9 May 2023, at 13:07, Alex Coplan  wrote:
> 
> > This patch implements clang's __has_feature and __has_extension in GCC.
> 
> Thanks, this blocks consuming Darwin SDK headers “properly” (PR 90709 as
> linked to  60512) (which is why I had a WIP patch too).
> 
> So I am very keen to see this land in GCC-14, but have some  issues to deal 
> with and would be looking for ideas about how to handle them by extending or
> amending the patch.
> 
> The main concern I have at the moment is that it seems to me that we need
> more flexible and general predicates for declaring feature/ext support:
> 
>   a) on target (see below for examples)
>   b) on potentially multiple flags and language version at the same time (see 
> below)
>   c) what about features that exist for a closed range of language versions?
> 
> As mentioned by Jakub in a conversation about this on irc (months ago!) the
> current identifiers potentially clash with use symbols.
> 
> IFF we add feature designations (which IMO we should, since this approach does
> help simplify testcases and configurations) we should add them into the
> implementation namespace:
> 
> e.g. ( for C) 
> _GNU_nested_functions or __nested_functions
> 
> > Currently the patch aims to implement all documented features (and some
> > undocumented ones) following the documentation at
> > https://clang.llvm.org/docs/LanguageExtensions.html
> 
> TL;DR 
> without guards or target-specific opt out this breaks bootstrap on Darwin.

Thanks for trying out the patch and pointing this out, this blocker has
now been addressed by relaxing the C++ parser as per
g:b106f11dc6adb8df15cc5c268896d314c76ca35f.

The patch can now survive bootstrap on Darwin (it looks like we'll need
to adjust some Objective-C++ tests in light of the new pedwarn, but that
looks to be straightforward).



> (one reason to allow target opt-in/out of specific features)
> 
> > with the following omissions:
> 
> > - Objective-C-specific features.
> 
> I can clearly append the objective-c(++) cases to the end of the respective
> lists, but then we need to make them conditional on language, version and
> dialect (some will not be appropriate to GNU runtime).
> 
> this is why I think we need more flexible predicates on declaring features
> and extensions.

Would it help mitigate these concerns if I implemented some Objective-C
features as part of this patch (say, those implemented by your WIP
patch)?

My feeling is that the vast majority of extensions / features have
similar logic, so we should exploit that redundancy to keep things terse
in the encoding for the general case. Where we need more flexible
predicates (e.g. for objc_nonfragile_abi in your WIP patch), those can
be handled on a case-by-case basis by adding a new enumerator and logic
to handle that specially.

What do you think, does that sound OK to you?

> 
> 
> 
> index 2b4c82facf7..5b8429244b2 100644
> --- a/gcc/c-family/c-common.cc
> +++ b/gcc/c-family/c-common.cc
> 
> +struct hf_feature_info
> 
> +  { "enumerator_attributes",   0, 0 },
> +  { "tls", 0, 0 },
> 
> Do all GCC targets support tls?

This is a good point. In clang, the features tls, c_thread_local, and
cxx_thread_local are all gated on whether the target supports TLS.
But in clang, it is a hard error to use TLS variables on a target which
doesn't support TLS. So it seems the features are used to check whether
code can make use of TLS constructs.

In GCC, AFAICT, TLS variables never get rejected, since GCC just uses
emulated TLS in the case that the target doesn't support TLS for real.

This then begs the question of how these features should be interpreted.
For c{,xx}_thread_local I'd expect that we want them to return true
whenever the language-level constructs are useable (even if we end up
using emutls).

I think it's defensible to take the position that GCC "always supports
TLS" since (AFAIK) you can make use of thread-local variables regardless
of whether the target really supports TLS (since you just get emutls if
it doesn't). So it's not clear which interpretation we should use for
the "tls" feature.

> 
> What about things like this:
> 
>  attribute_availability_tvos, 
>  attribute_availability_watchos, 
>  attribute_availability_driverkit, 

FWIW, clang looks to define these unconditionally, so restricting these
to a given target would be deviating from its precedent.

However, I don't think it would be hard to extend the implementation in
this patch to support target-specific features if required. I think
perhaps a langhook that targets can call to add their own features would
be a reasonable approach.

> ?
> 
> Even if they are implemented centrally, it is unlikely that all targets would 
> want
> to claim support (although note that the availabili

Re: [PATCH] RISC-V: Fix vmul test expectation.

2023-06-20 Thread Robin Dapp via Gcc-patches
I just noticed there is also a -ffast-math missing in vadd-run.c
as well as one redundant in vrem-rv32gcv.c and added it to the
patch.

Going to commit the attached as obvious.

Regards
 Robin

Subject: [PATCH] RISC-V: testsuite: Fix vmul test expectation and fix 
-ffast-math.

I forgot to check for vfmul in the multiplication tests as well as
some -ffast-math arguments.  Fix this.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vadd-run.c: Add
-ffast-math.
* gcc.target/riscv/rvv/autovec/binop/vadd-zvfh-run.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c: Remove
-ffast-math
* gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c: Check for
vfmul.
* gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c: Dito.
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-run.c | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vadd-zvfh-run.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c | 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c | 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c | 2 +-
 5 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-run.c
index 5db0a3c79be..12fb952118e 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-run.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-run.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model 
--param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model 
--param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
 
 #include "vadd-template.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-zvfh-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-zvfh-run.c
index 1a11fe0fb30..30b467f50c7 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-zvfh-run.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-zvfh-run.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model 
--param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model 
--param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
 
 #include "vadd-template.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c
index 1900c21121b..7d3dfade0ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c
@@ -4,3 +4,4 @@
 #include "vmul-template.h"
 
 /* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c
index c8508bcc1f7..a549d6f7be4 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c
@@ -4,3 +4,4 @@
 #include "vmul-template.h"
 
 /* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c
index c6fe79e37b8..86607d03777 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c
@@ -1,4 +1,4 @@
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv 
-mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv 
-mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
 
 #include "vrem-template.h"
 
-- 
2.40.1



[PATCH] RISC-V: Implement autovec copysign.

2023-06-20 Thread Robin Dapp via Gcc-patches
Hi,

this adds vector copysign, ncopysign and xorsign as well as the
accompanying tests.

In order to easily match the ncopysign patterns I changed the
builtin implementation slightly.  Juzhe might want to comment
on that.  For now I kept the  attribute's name even though
it doesn't emit an "n" anymore.

Regards
 Robin


gcc/ChangeLog:

* config/riscv/autovec.md (copysign3): Add expander.
(xorsign3): Dito.
* config/riscv/riscv-vector-builtins-bases.cc (class vfsgnjn):
New class.
* config/riscv/vector-iterators.md (copysign): Remove ncopysign.
(xorsign): Dito.
(n): Dito.
(x): Dito.
* config/riscv/vector.md (@pred_ncopysign): Split off.
(@pred_ncopysign_scalar): Dito.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/copysign-run.c: New test.
* gcc.target/riscv/rvv/autovec/binop/copysign-rv64gcv.c: New test.
* gcc.target/riscv/rvv/autovec/binop/copysign-rv32gcv.c: New test.
* gcc.target/riscv/rvv/autovec/binop/copysign-template.h: New test.
* gcc.target/riscv/rvv/autovec/binop/copysign-zvfh-run.c: New test.
---
 gcc/config/riscv/autovec.md   | 43 +
 .../riscv/riscv-vector-builtins-bases.cc  | 18 +++-
 gcc/config/riscv/vector-iterators.md  |  9 +-
 gcc/config/riscv/vector.md| 43 +
 .../riscv/rvv/autovec/binop/copysign-run.c| 89 +++
 .../rvv/autovec/binop/copysign-rv32gcv.c  |  8 ++
 .../rvv/autovec/binop/copysign-rv64gcv.c  |  8 ++
 .../rvv/autovec/binop/copysign-template.h | 78 
 .../rvv/autovec/binop/copysign-zvfh-run.c | 83 +
 9 files changed, 371 insertions(+), 8 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-run.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-rv32gcv.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-rv64gcv.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-template.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-zvfh-run.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f1641d7e1ea..f2e69aaf102 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -804,3 +804,46 @@ (define_expand "3"
 riscv_vector::RVV_BINOP, operands);
   DONE;
 })
+
+;; 
---
+;;  [FP] Sign copying
+;; 
---
+;; Includes:
+;; - vfsgnj.vv/vfsgnjn.vv
+;; - vfsgnj.vf/vfsgnjn.vf
+;; 
---
+
+;; Leave the pattern like this as to still allow combine to match
+;; a negated copysign (see vector.md) before adding the UNSPEC_VPREDICATE 
later.
+(define_insn_and_split "copysign3"
+  [(set (match_operand:VF 0 "register_operand"  "=vd, vd, vr, vr")
+(unspec:VF
+ [(match_operand:VF 1 "register_operand"" vr, vr, vr, vr")
+ (match_operand:VF 2 "register_operand" " vr, vr, vr, vr")] 
UNSPEC_VCOPYSIGN))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  riscv_vector::emit_vlmax_insn (code_for_pred (UNSPEC_VCOPYSIGN, mode),
+riscv_vector::RVV_BINOP, operands);
+  DONE;
+}
+  [(set_attr "type" "vfsgnj")
+   (set_attr "mode" "")])
+
+;; 
---
+;; Includes:
+;; - vfsgnjx.vv
+;; - vfsgnjx.vf
+;; 
---
+(define_expand "xorsign3"
+  [(match_operand:VF_AUTO 0 "register_operand")
+(match_operand:VF_AUTO 1 "register_operand")
+(match_operand:VF_AUTO 2 "register_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_vlmax_insn (code_for_pred (UNSPEC_VXORSIGN, mode),
+riscv_vector::RVV_BINOP, operands);
+  DONE;
+})
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index c6c53dc13a5..0313986f6b9 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1212,7 +1212,7 @@ public:
   }
 };
 
-/* Implements vfsqrt7/vfrec7/vfclass/vfsgnj/vfsgnjn/vfsgnjx.  */
+/* Implements vfsqrt7/vfrec7/vfclass/vfsgnj/vfsgnjx.  */
 template
 class float_misc : public function_base
 {
@@ -1227,6 +1227,20 @@ public:
   }
 };
 
+/* Implements vfsgnjn.  */
+class vfsgnjn : public function_base
+{
+public:
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_exact_insn (code_for_pred_ncopysign_scalar (e.vector_mode 
()));
+if (e.op_info->op == OP_TYPE_vv)
+  r

Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Lehua Ding
> Lehua, would they not show up in your test runs?  You fixed several
> other tests but these somehow not?


Oh, I should know why. These cases of yours were added yesterday,
while I submitted the patch the day before, and then yesterday by Pan
to help me merge in after your cases. Sorry for introducing this issue,
I'll submit a new fix patch.


Best,
Lehua

Re: [COMMITTED] ada: Add CHERI intrinsic bindings and helper functions.

2023-06-20 Thread Alex Coplan via Gcc-patches
Hi,

On 20/06/2023 09:47, Marc Poulhiès via Gcc-patches wrote:
> From: Daniel King 
> 
> The package Interfaces.CHERI provides intrinsic bindings and
> helper functions to allow software to query, create, and
> manipulate CHERI capabilities.

I'm curious what the motivation for these intrinsic wrappers is, given that
GCC trunk doesn't currently support them. Out of interest, can you share what
the use case for these is?

Thanks,
Alex

> 
> gcc/ada/
> 
>   * libgnat/i-cheri.ads: Add CHERI intrinsics and helper functions.
>   * libgnat/i-cheri.adb: Likewise
> 
> Tested on x86_64-pc-linux-gnu, committed on master.
> 
> ---
>  gcc/ada/libgnat/i-cheri.adb |  75 ++
>  gcc/ada/libgnat/i-cheri.ads | 470 
>  2 files changed, 545 insertions(+)
>  create mode 100644 gcc/ada/libgnat/i-cheri.adb
>  create mode 100644 gcc/ada/libgnat/i-cheri.ads


Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Robin Dapp via Gcc-patches
> Oh, I should know why. These cases of yours were added yesterday,
> while I submitted the patch the day before, and then yesterday by Pan
> to help me merge in after your cases. Sorry for introducing this issue,
> I'll submit a new fix patch.

Actually they are already in for a bit :)
51795b910737 (Robin Dapp 2023-06-01 14:18:57 +0200  1) /* { dg-do compile } */

I thought something is special about them that they somehow didn't run
on your machine or so.

But no need for a new patch, thanks.  I already have it and will commit
it soon.

Regards
 Robin


Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread Robin Dapp via Gcc-patches
> Could you merge it ?

Committed.

Regards
 Robin


Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread Jeff Law via Gcc-patches




On 6/20/23 04:56, Robin Dapp wrote:

Could you merge it ?
By the way, could Lehua get the write access?


IMHO nothing stands in the way but I'll defer to Jeff to have
the "official seal" :)
Once he ACKs Lehua needs to go the usual way of requesting
sourceware access via https://sourceware.org/cgi-bin/pdw/ps_form.cgi.
Lehua fills out that form.  List me as the approver and the process will 
run from there.  Takes a day or two for everything to get into place.


jeff

ps.  If Lehua has already filled out the form with Robin as the 
approver, that's fine too.  Might take a bit longer as I suspect the IT 
folks may not recognize Robin.


Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Lehua Ding
> Actually they are already in for a bit :)
> 51795b910737 (Robin Dapp 2023-06-01 14:18:57 +0200  1) /* { dg-do 
compile } */
>I thought something is special about them that they somehow didn't run
> on your machine or so.


The time I just said is your commit time from this link
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=51795b91073798c718df6fafb01303861641a5af.


authorRobin Dapp 

Re: [PATCH V3] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread Jeff Law via Gcc-patches




On 6/20/23 03:01, Robin Dapp wrote:

LGTM.

Likewise -- that V2/V3 is a nice improvement over the original V1 approach.

jeff


Re: [PATCH v7 0/6] c++, libstdc++: get std::is_object to dispatch to new built-in traits

2023-06-20 Thread Ken Matsui via Gcc-patches
Just a quick update, the benchmark code link has been updated and can
now be accessed at
https://github.com/ken-matsui/gcc-benches/blob/main/is_object.cc. I
have also created a report file which can be found at
https://github.com/ken-matsui/gcc-benches/blob/main/is_object.md.

On Thu, Jun 15, 2023 at 3:49 AM Ken Matsui  wrote:
>
> Hi,
>
> For those curious about the performance improvements of this patch, I
> conducted a benchmark that instantiates 256k specializations of
> is_object_v based on Patrick's code. You can find the benchmark code
> at this link:
>
> https://github.com/ken-matsui/gcc-benches/blob/main/is_object_benchmark.cc
>
> On my computer, using the gcc HEAD of this patch for a release build,
> the patch with -DUSE_BUILTIN took 64% less time and used 44-47% less
> memory compared to not using it.
>
> Sincerely,
> Ken Matsui
>
> On Mon, Jun 12, 2023 at 3:49 PM Ken Matsui  wrote:
> >
> > Hi,
> >
> > This patch series gets std::is_object to dispatch to built-in traits and
> > implements the following built-in traits, on which std::object depends.
> >
> > * __is_reference
> > * __is_function
> > * __is_void
> >
> > std::is_object was depending on them with disjunction and negation.
> >
> > __not_<__or_, is_reference<_Tp>, is_void<_Tp>>>::type
> >
> > Therefore, this patch uses them directly instead of implementing an 
> > additional
> > built-in trait __is_object, which makes the compiler slightly bigger and
> > slower.
> >
> > __bool_constant > __is_void(_Tp))>
> >
> > This would instantiate only __bool_constant and 
> > __bool_constant,
> > which can be mostly shared. That is, the purpose of built-in traits is
> > considered as achieved.
> >
> > Changes in v7
> >
> > * Removed an unnecessary new line.
> >
> > Ken Matsui (6):
> >   c++: implement __is_reference built-in trait
> >   libstdc++: use new built-in trait __is_reference for std::is_reference
> >   c++: implement __is_function built-in trait
> >   libstdc++: use new built-in trait __is_function for std::is_function
> >   c++, libstdc++: implement __is_void built-in trait
> >   libstdc++: make std::is_object dispatch to new built-in traits
> >
> >  gcc/cp/constraint.cc  |  9 +++
> >  gcc/cp/cp-trait.def   |  3 +
> >  gcc/cp/semantics.cc   | 12 
> >  gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  9 +++
> >  gcc/testsuite/g++.dg/ext/is_function.C| 58 +++
> >  gcc/testsuite/g++.dg/ext/is_reference.C   | 34 +++
> >  gcc/testsuite/g++.dg/ext/is_void.C| 35 +++
> >  gcc/testsuite/g++.dg/tm/pr46567.C |  6 +-
> >  libstdc++-v3/include/bits/cpp_type_traits.h   | 15 -
> >  libstdc++-v3/include/debug/helper_functions.h |  5 +-
> >  libstdc++-v3/include/std/type_traits  | 51 
> >  11 files changed, 216 insertions(+), 21 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.dg/ext/is_function.C
> >  create mode 100644 gcc/testsuite/g++.dg/ext/is_reference.C
> >  create mode 100644 gcc/testsuite/g++.dg/ext/is_void.C
> >
> > --
> > 2.41.0
> >


Re: [PATCH 1/2] c++: implement __remove_pointer built-in trait

2023-06-20 Thread Ken Matsui via Gcc-patches
Just a quick update, the benchmark code link has been updated and can
now be accessed at
https://github.com/ken-matsui/gcc-benches/blob/main/remove_pointer.cc.
I have also created a report file which can be found at
https://github.com/ken-matsui/gcc-benches/blob/main/remove_pointer.md.

On Sat, Jun 17, 2023 at 5:35 AM Ken Matsui  wrote:
>
> Hi,
>
> I conducted a benchmark for remove_pointer as well as is_object. Just
> like the is_object benchmark, here is the benchmark code:
>
> https://github.com/ken-matsui/gcc-benches/blob/main/remove_pointer_benchmark.cc
>
> On my computer, using the gcc HEAD of this patch for a release build,
> the patch with -DUSE_BUILTIN took 8.7% less time and used 4.3-4.9%
> less memory on average compared to not using it. Although the
> performance improvement was not as significant as with is_object, the
> benchmark demonstrated that the compilation was consistently more
> efficient.
>
> Sincerely,
> Ken Matsui
>
> On Thu, Jun 15, 2023 at 5:22 AM Ken Matsui  wrote:
> >
> > This patch implements built-in trait for std::remove_pointer.
> >
> > gcc/cp/ChangeLog:
> >
> > * cp-trait.def: Define __remove_pointer.
> > * semantics.cc (finish_trait_type): Handle CPTK_REMOVE_POINTER.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * g++.dg/ext/has-builtin-1.C: Test existence of __remove_pointer.
> > * g++.dg/ext/remove_pointer.C: New test.
> >
> > Signed-off-by: Ken Matsui 
> > ---
> >  gcc/cp/cp-trait.def   |  1 +
> >  gcc/cp/semantics.cc   |  4 ++
> >  gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  3 ++
> >  gcc/testsuite/g++.dg/ext/remove_pointer.C | 51 +++
> >  4 files changed, 59 insertions(+)
> >  create mode 100644 gcc/testsuite/g++.dg/ext/remove_pointer.C
> >
> > diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
> > index 8b7fece0cc8..07823e55579 100644
> > --- a/gcc/cp/cp-trait.def
> > +++ b/gcc/cp/cp-trait.def
> > @@ -90,6 +90,7 @@ DEFTRAIT_EXPR (IS_DEDUCIBLE, "__is_deducible ", 2)
> >  DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
> >  DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1)
> >  DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1)
> > +DEFTRAIT_TYPE (REMOVE_POINTER, "__remove_pointer", 1)
> >  DEFTRAIT_TYPE (UNDERLYING_TYPE,  "__underlying_type", 1)
> >  DEFTRAIT_TYPE (TYPE_PACK_ELEMENT, "__type_pack_element", -1)
> >
> > diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
> > index 8fb47fd179e..885c7a6fb64 100644
> > --- a/gcc/cp/semantics.cc
> > +++ b/gcc/cp/semantics.cc
> > @@ -12373,6 +12373,10 @@ finish_trait_type (cp_trait_kind kind, tree type1, 
> > tree type2,
> >if (TYPE_REF_P (type1))
> > type1 = TREE_TYPE (type1);
> >return cv_unqualified (type1);
> > +case CPTK_REMOVE_POINTER:
> > +  if (TYPE_PTR_P (type1))
> > +type1 = TREE_TYPE (type1);
> > +  return type1;
> >
> >  case CPTK_TYPE_PACK_ELEMENT:
> >return finish_type_pack_element (type1, type2, complain);
> > diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
> > b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> > index f343e153e56..e21e0a95509 100644
> > --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> > +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> > @@ -146,3 +146,6 @@
> >  #if !__has_builtin (__remove_cvref)
> >  # error "__has_builtin (__remove_cvref) failed"
> >  #endif
> > +#if !__has_builtin (__remove_pointer)
> > +# error "__has_builtin (__remove_pointer) failed"
> > +#endif
> > diff --git a/gcc/testsuite/g++.dg/ext/remove_pointer.C 
> > b/gcc/testsuite/g++.dg/ext/remove_pointer.C
> > new file mode 100644
> > index 000..7b13db93950
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/ext/remove_pointer.C
> > @@ -0,0 +1,51 @@
> > +// { dg-do compile { target c++11 } }
> > +
> > +#define SA(X) static_assert((X),#X)
> > +
> > +SA(__is_same(__remove_pointer(int), int));
> > +SA(__is_same(__remove_pointer(int*), int));
> > +SA(__is_same(__remove_pointer(int**), int*));
> > +
> > +SA(__is_same(__remove_pointer(const int*), const int));
> > +SA(__is_same(__remove_pointer(const int**), const int*));
> > +SA(__is_same(__remove_pointer(int* const), int));
> > +SA(__is_same(__remove_pointer(int** const), int*));
> > +SA(__is_same(__remove_pointer(int* const* const), int* const));
> > +
> > +SA(__is_same(__remove_pointer(volatile int*), volatile int));
> > +SA(__is_same(__remove_pointer(volatile int**), volatile int*));
> > +SA(__is_same(__remove_pointer(int* volatile), int));
> > +SA(__is_same(__remove_pointer(int** volatile), int*));
> > +SA(__is_same(__remove_pointer(int* volatile* volatile), int* volatile));
> > +
> > +SA(__is_same(__remove_pointer(const volatile int*), const volatile int));
> > +SA(__is_same(__remove_pointer(const volatile int**), const volatile int*));
> > +SA(__is_same(__remove_pointer(const int* volatile), const int));
> > +SA(__is_same(__remove_pointer(volatile int* const), volatile int));
> > +SA(__is_same(__remove_pointer(int* con

Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread Lehua Ding
> Lehua fills out that form.  List me as the approver and the process 
will
> run from there.  Takes a day or two for everything to get into place.


I just followed this step to submit the form, thanks to Robin, Jeff and Juzhe.


Best,
Lehua

Re: [PATCH] Improve DSE to handle stores before __builtin_unreachable ()

2023-06-20 Thread Jeff Law via Gcc-patches




On 6/20/23 00:59, Richard Biener via Gcc-patches wrote:

DSE isn't good at identifying program points that end lifetime
of variables that are not associated with virtual operands.  But
at least for those that end basic-blocks we can handle the simple
case where this ending is in the same basic-block as the definition
we want to elide.  That should catch quite some common cases already.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

As you can see from the testcase I had to adjust this possibly can
lead to more severe issues when one forgets a return (the C++ frontend
places builtin_unreachable () there).  I'm still planning to push
this improvement unless I hear objections.

Thanks,
Richard.

* tree-ssa-dse.cc (dse_classify_store): When we found
no defs and the basic-block with the original definition
ends in __builtin_unreachable[_trap] the store is dead.

* gcc.dg/tree-ssa/ssa-dse-47.c: New testcase.
* c-c++-common/asan/pr106558.c: Avoid undefined behavior
due to missing return.
I thought during the introduction of erroneous path isolation that we 
concluded stores, calls and such had observable side effects that must 
be preserved, even when we hit a block that leads to __builtin_unreachable.


Don't get me wrong, I'm all for removing the memory references if it's 
safe to do so.


Jeff


[PATCH] libstdc++: Use RAII in std::vector::_M_realloc_insert

2023-06-20 Thread Jonathan Wakely via Gcc-patches
I intend to push this to trunk once testing finishes.

I generated the diff with -b so the whitespace changes aren't shown,
because there was some re-indenting that makes the diff look larger than
it really is.

Honza, I don't think this is likely to make much difference for the PR
110287 testcases, but I think it simplifies the code and so is an
improvement in terms of maintenance and readability.

-- >8 --

Replace the try-block with RAII types for deallocating storage and
destroying elements.

libstdc++-v3/ChangeLog:

* include/bits/vector.tcc (_M_realloc_insert): Replace try-block
with RAII types.
---
 libstdc++-v3/include/bits/vector.tcc | 142 +--
 1 file changed, 89 insertions(+), 53 deletions(-)

diff --git a/libstdc++-v3/include/bits/vector.tcc 
b/libstdc++-v3/include/bits/vector.tcc
index acd11e2dc68..cda52fbbc4a 100644
--- a/libstdc++-v3/include/bits/vector.tcc
+++ b/libstdc++-v3/include/bits/vector.tcc
@@ -458,73 +458,109 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 _M_realloc_insert(iterator __position, const _Tp& __x)
 #endif
 {
-  const size_type __len =
-   _M_check_len(size_type(1), "vector::_M_realloc_insert");
+  const size_type __len = _M_check_len(1u, "vector::_M_realloc_insert");
   pointer __old_start = this->_M_impl._M_start;
   pointer __old_finish = this->_M_impl._M_finish;
   const size_type __elems_before = __position - begin();
   pointer __new_start(this->_M_allocate(__len));
   pointer __new_finish(__new_start);
-  __try
+
+  // RAII guard for allocated storage.
+  struct _Guard
+  {
+   pointer _M_storage; // Storage to deallocate
+   size_type _M_len;
+   _Tp_alloc_type& _M_alloc;
+
+   _GLIBCXX20_CONSTEXPR
+   _Guard(pointer __s, size_type __l, _Tp_alloc_type& __a)
+   : _M_storage(__s), _M_len(__l), _M_alloc(__a)
+   { }
+
+   _GLIBCXX20_CONSTEXPR
+   ~_Guard()
{
- // The order of the three operations is dictated by the C++11
- // case, where the moves could alter a new element belonging
- // to the existing vector.  This is an issue only for callers
- // taking the element by lvalue ref (see last bullet of C++11
- // [res.on.arguments]).
- _Alloc_traits::construct(this->_M_impl,
-  __new_start + __elems_before,
+ if (_M_storage)
+   __gnu_cxx::__alloc_traits<_Tp_alloc_type>::
+ deallocate(_M_alloc, _M_storage, _M_len);
+   }
+
+  private:
+   _Guard(const _Guard&);
+  };
+  _Guard __guard(__new_start, __len, _M_impl);
+
+  // The order of the three operations is dictated by the C++11
+  // case, where the moves could alter a new element belonging
+  // to the existing vector.  This is an issue only for callers
+  // taking the element by lvalue ref (see last bullet of C++11
+  // [res.on.arguments]).
+
+  // If this throws, the existing elements are unchanged.
 #if __cplusplus >= 201103L
-  std::forward<_Args>(__args)...);
+  _Alloc_traits::construct(this->_M_impl,
+  std::__to_address(__new_start + __elems_before),
+  std::forward<_Args>(__args)...);
 #else
-  __x);
+  _Alloc_traits::construct(this->_M_impl,
+  __new_start + __elems_before,
+  __x);
 #endif
- __new_finish = pointer();
 
 #if __cplusplus >= 201103L
- if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
-   {
- __new_finish = _S_relocate(__old_start, __position.base(),
-__new_start, _M_get_Tp_allocator());
-
- ++__new_finish;
-
- __new_finish = _S_relocate(__position.base(), __old_finish,
-__new_finish, _M_get_Tp_allocator());
-   }
- else
-#endif
-   {
- __new_finish
-   = std::__uninitialized_move_if_noexcept_a
-   (__old_start, __position.base(),
-__new_start, _M_get_Tp_allocator());
-
- ++__new_finish;
-
- __new_finish
-   = std::__uninitialized_move_if_noexcept_a
-   (__position.base(), __old_finish,
-__new_finish, _M_get_Tp_allocator());
-   }
-   }
-  __catch(...)
+  if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
{
- if (!__new_finish)
-   _Alloc_traits::destroy(this->_M_impl,
-  __new_start + __elems_before);
- else
-   std::_Destroy(__new_start, __new_finish, _M_get_Tp_allocator());
- _M_deallocate(__new_start, __len);
- __throw_exception_again;
+ // Relocation cannot throw.
+ __new_finish = _S_relocate(__old_start, __position.base(),
+

[PATCH V2] VECT: Apply LEN_MASK_{LOAD,STORE} into vectorizer

2023-06-20 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/ChangeLog:

* internal-fn.cc (expand_partial_store_optab_fn): Add 
LEN_MASK_{LOAD,STORE} vectorizer support.
(internal_load_fn_p): Ditto.
(internal_store_fn_p): Ditto.
(internal_fn_mask_index): Ditto.
(internal_fn_stored_value_index): Ditto.
(internal_len_load_store_bias): Ditto.
* optabs-query.cc (can_vec_mask_load_store_p): Ditto.
(get_len_load_store_mode): Ditto.
* tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
(get_all_ones_mask): New function.
(vectorizable_store): Add LEN_MASK_{LOAD,STORE} vectorizer support.
(vectorizable_load): Ditto.

---
 gcc/internal-fn.cc |  35 +-
 gcc/optabs-query.cc|  25 -
 gcc/tree-vect-stmts.cc | 234 ++---
 3 files changed, 227 insertions(+), 67 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index c911ae790cb..e10c21de5f1 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -2949,7 +2949,7 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
  * OPTAB.  */
 
 static void
-expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab 
optab)
 {
   class expand_operand ops[5];
   tree type, lhs, rhs, maskt, biast;
@@ -2957,7 +2957,7 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
   insn_code icode;
 
   maskt = gimple_call_arg (stmt, 2);
-  rhs = gimple_call_arg (stmt, 3);
+  rhs = gimple_call_arg (stmt, internal_fn_stored_value_index (ifn));
   type = TREE_TYPE (rhs);
   lhs = expand_call_mem_ref (type, stmt, 0);
 
@@ -4435,6 +4435,7 @@ internal_load_fn_p (internal_fn fn)
 case IFN_GATHER_LOAD:
 case IFN_MASK_GATHER_LOAD:
 case IFN_LEN_LOAD:
+case IFN_LEN_MASK_LOAD:
   return true;
 
 default:
@@ -4455,6 +4456,7 @@ internal_store_fn_p (internal_fn fn)
 case IFN_SCATTER_STORE:
 case IFN_MASK_SCATTER_STORE:
 case IFN_LEN_STORE:
+case IFN_LEN_MASK_STORE:
   return true;
 
 default:
@@ -4494,6 +4496,10 @@ internal_fn_mask_index (internal_fn fn)
 case IFN_MASK_STORE_LANES:
   return 2;
 
+case IFN_LEN_MASK_LOAD:
+case IFN_LEN_MASK_STORE:
+  return 3;
+
 case IFN_MASK_GATHER_LOAD:
 case IFN_MASK_SCATTER_STORE:
   return 4;
@@ -4519,6 +4525,9 @@ internal_fn_stored_value_index (internal_fn fn)
 case IFN_LEN_STORE:
   return 3;
 
+case IFN_LEN_MASK_STORE:
+  return 4;
+
 default:
   return -1;
 }
@@ -4583,13 +4592,31 @@ internal_len_load_store_bias (internal_fn ifn, 
machine_mode mode)
 {
   optab optab = direct_internal_fn_optab (ifn);
   insn_code icode = direct_optab_handler (optab, mode);
+  int bias_argno = 3;
+  if (icode == CODE_FOR_nothing)
+{
+  machine_mode mask_mode
+   = targetm.vectorize.get_mask_mode (mode).require ();
+  if (ifn == IFN_LEN_LOAD)
+   {
+ /* Try LEN_MASK_LOAD.  */
+ optab = direct_internal_fn_optab (IFN_LEN_MASK_LOAD);
+   }
+  else
+   {
+ /* Try LEN_MASK_STORE.  */
+ optab = direct_internal_fn_optab (IFN_LEN_MASK_STORE);
+   }
+  icode = convert_optab_handler (optab, mode, mask_mode);
+  bias_argno = 4;
+}
 
   if (icode != CODE_FOR_nothing)
 {
   /* For now we only support biases of 0 or -1.  Try both of them.  */
-  if (insn_operand_matches (icode, 3, GEN_INT (0)))
+  if (insn_operand_matches (icode, bias_argno, GEN_INT (0)))
return 0;
-  if (insn_operand_matches (icode, 3, GEN_INT (-1)))
+  if (insn_operand_matches (icode, bias_argno, GEN_INT (-1)))
return -1;
 }
 
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 276f8408dd7..4394d391200 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -566,11 +566,14 @@ can_vec_mask_load_store_p (machine_mode mode,
   bool is_load)
 {
   optab op = is_load ? maskload_optab : maskstore_optab;
+  optab len_op = is_load ? len_maskload_optab : len_maskstore_optab;
   machine_mode vmode;
 
   /* If mode is vector mode, check it directly.  */
   if (VECTOR_MODE_P (mode))
-return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing;
+return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing
+  || convert_optab_handler (len_op, mode, mask_mode)
+   != CODE_FOR_nothing;
 
   /* Otherwise, return true if there is some vector mode with
  the mask load/store supported.  */
@@ -584,7 +587,9 @@ can_vec_mask_load_store_p (machine_mode mode,
   vmode = targetm.vectorize.preferred_simd_mode (smode);
   if (VECTOR_MODE_P (vmode)
   && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
-  && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
+  && (convert_optab_handler (op, vmode, mask_mode) != CODE_

Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Lehua Ding
> -/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic" } */
> +/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" 
} */

By the way, shouldn't these cases have the `-mabi=lp64d` option added,
otherwise I get the following failure message when I run tests on RV32 GCC.


  FAIL: gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c -std=c99 -O3 
-ftree-vectorize --param riscv-autovec-preference=fixed-vlmax (test for excess 
errors)
  Excess errors.
  cc1: error: ABI requires '-march=rv32'



Best,
Lehua


-- Original --
From:  "Robin Dapp"

Re: [PATCH V2] VECT: Apply LEN_MASK_{LOAD,STORE} into vectorizer

2023-06-20 Thread juzhe.zh...@rivai.ai
Hi, Richi. I am not sure why I failed to send this patch to @sues email at the 
first time.
Now I cc your gmail.

This is the V2 patch addressing your comments for V1.

Thanks.


juzhe.zh...@rivai.ai
 
From: juzhe.zhong
Date: 2023-06-20 21:42
To: gcc-patches
CC: richard.sandiford; rguenther; Ju-Zhe Zhong
Subject: [PATCH V2] VECT: Apply LEN_MASK_{LOAD,STORE} into vectorizer
From: Ju-Zhe Zhong 
 
gcc/ChangeLog:
 
* internal-fn.cc (expand_partial_store_optab_fn): Add 
LEN_MASK_{LOAD,STORE} vectorizer support.
(internal_load_fn_p): Ditto.
(internal_store_fn_p): Ditto.
(internal_fn_mask_index): Ditto.
(internal_fn_stored_value_index): Ditto.
(internal_len_load_store_bias): Ditto.
* optabs-query.cc (can_vec_mask_load_store_p): Ditto.
(get_len_load_store_mode): Ditto.
* tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
(get_all_ones_mask): New function.
(vectorizable_store): Add LEN_MASK_{LOAD,STORE} vectorizer support.
(vectorizable_load): Ditto.
 
---
gcc/internal-fn.cc |  35 +-
gcc/optabs-query.cc|  25 -
gcc/tree-vect-stmts.cc | 234 ++---
3 files changed, 227 insertions(+), 67 deletions(-)
 
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index c911ae790cb..e10c21de5f1 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -2949,7 +2949,7 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
  * OPTAB.  */
static void
-expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab 
optab)
{
   class expand_operand ops[5];
   tree type, lhs, rhs, maskt, biast;
@@ -2957,7 +2957,7 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
   insn_code icode;
   maskt = gimple_call_arg (stmt, 2);
-  rhs = gimple_call_arg (stmt, 3);
+  rhs = gimple_call_arg (stmt, internal_fn_stored_value_index (ifn));
   type = TREE_TYPE (rhs);
   lhs = expand_call_mem_ref (type, stmt, 0);
@@ -4435,6 +4435,7 @@ internal_load_fn_p (internal_fn fn)
 case IFN_GATHER_LOAD:
 case IFN_MASK_GATHER_LOAD:
 case IFN_LEN_LOAD:
+case IFN_LEN_MASK_LOAD:
   return true;
 default:
@@ -4455,6 +4456,7 @@ internal_store_fn_p (internal_fn fn)
 case IFN_SCATTER_STORE:
 case IFN_MASK_SCATTER_STORE:
 case IFN_LEN_STORE:
+case IFN_LEN_MASK_STORE:
   return true;
 default:
@@ -4494,6 +4496,10 @@ internal_fn_mask_index (internal_fn fn)
 case IFN_MASK_STORE_LANES:
   return 2;
+case IFN_LEN_MASK_LOAD:
+case IFN_LEN_MASK_STORE:
+  return 3;
+
 case IFN_MASK_GATHER_LOAD:
 case IFN_MASK_SCATTER_STORE:
   return 4;
@@ -4519,6 +4525,9 @@ internal_fn_stored_value_index (internal_fn fn)
 case IFN_LEN_STORE:
   return 3;
+case IFN_LEN_MASK_STORE:
+  return 4;
+
 default:
   return -1;
 }
@@ -4583,13 +4592,31 @@ internal_len_load_store_bias (internal_fn ifn, 
machine_mode mode)
{
   optab optab = direct_internal_fn_optab (ifn);
   insn_code icode = direct_optab_handler (optab, mode);
+  int bias_argno = 3;
+  if (icode == CODE_FOR_nothing)
+{
+  machine_mode mask_mode
+ = targetm.vectorize.get_mask_mode (mode).require ();
+  if (ifn == IFN_LEN_LOAD)
+ {
+   /* Try LEN_MASK_LOAD.  */
+   optab = direct_internal_fn_optab (IFN_LEN_MASK_LOAD);
+ }
+  else
+ {
+   /* Try LEN_MASK_STORE.  */
+   optab = direct_internal_fn_optab (IFN_LEN_MASK_STORE);
+ }
+  icode = convert_optab_handler (optab, mode, mask_mode);
+  bias_argno = 4;
+}
   if (icode != CODE_FOR_nothing)
 {
   /* For now we only support biases of 0 or -1.  Try both of them.  */
-  if (insn_operand_matches (icode, 3, GEN_INT (0)))
+  if (insn_operand_matches (icode, bias_argno, GEN_INT (0)))
return 0;
-  if (insn_operand_matches (icode, 3, GEN_INT (-1)))
+  if (insn_operand_matches (icode, bias_argno, GEN_INT (-1)))
return -1;
 }
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 276f8408dd7..4394d391200 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -566,11 +566,14 @@ can_vec_mask_load_store_p (machine_mode mode,
   bool is_load)
{
   optab op = is_load ? maskload_optab : maskstore_optab;
+  optab len_op = is_load ? len_maskload_optab : len_maskstore_optab;
   machine_mode vmode;
   /* If mode is vector mode, check it directly.  */
   if (VECTOR_MODE_P (mode))
-return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing;
+return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing
+|| convert_optab_handler (len_op, mode, mask_mode)
+ != CODE_FOR_nothing;
   /* Otherwise, return true if there is some vector mode with
  the mask load/store supported.  */
@@ -584,7 +587,9 @@ can_vec_mask_load_store_p (machine_mode mode,
   vmode = targetm.vectorize.preferred_simd_mode (smode);
   

Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Robin Dapp via Gcc-patches
> By the way, shouldn't these cases have the `-mabi=lp64d` option added,
> otherwise I get the following failure message when I run tests on RV32 GCC.
> 
>   FAIL: gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c -std=c99 -O3 
> -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax (test for 
> excess errors)
>   Excess errors.
>   cc1: error: ABI requires '-march=rv32'

Arg, yes definitely, sorry.  I keep forgetting this... Will fix.

Regards
 Robin



Re: [COMMITTED] ada: Add CHERI intrinsic bindings and helper functions.

2023-06-20 Thread Marc Poulhiès via Gcc-patches
Hi,

>> The package Interfaces.CHERI provides intrinsic bindings and
>> helper functions to allow software to query, create, and
>> manipulate CHERI capabilities.
>
> I'm curious what the motivation for these intrinsic wrappers is, given that
> GCC trunk doesn't currently support them. Out of interest, can you share what
> the use case for these is?

We share the same Ada frontend with different GCC compilers and
contribute it in GCC's master branch.

You're correct that this particular change is not useful (yet) with
master, but we are testing/using it with a CHERI-aware GCC.

Does that answer your question?

Marc


RE: [PATCH V3] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread Li, Pan2 via Gcc-patches
Committed, thanks Robin and Jeff.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Tuesday, June 20, 2023 9:18 PM
To: Robin Dapp ; Juzhe-Zhong ; 
gcc-patches@gcc.gnu.org
Cc: kito.ch...@gmail.com; kito.ch...@sifive.com; pal...@dabbelt.com; 
pal...@rivosinc.com
Subject: Re: [PATCH V3] RISC-V: Optimize codegen of VLA SLP



On 6/20/23 03:01, Robin Dapp wrote:
> LGTM.
Likewise -- that V2/V3 is a nice improvement over the original V1 approach.

jeff


Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Robin Dapp via Gcc-patches
Hi,

I'm going to commit the attached.  Thanks Lehua for reporting.

Regards
 Robin


>From 1a4dfe90f251e38e27104f2fa11feecd3b04c4c1 Mon Sep 17 00:00:00 2001
From: Robin Dapp 
Date: Tue, 20 Jun 2023 15:52:16 +0200
Subject: [PATCH] RISC-V: testsuite: Add missing -mabi=lp64d.

This fixes more cases of missing -mabi=lp64d.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c: Add
-mabi=lp64d.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c: Dito.
---
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c  | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
index c32c31ecd69..9ed7c4f1205 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -O3 -march=rv64gcv_zvl128b 
-fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -O3 -march=rv64gcv_zvl128b -mabi=lp64d 
-fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
 
 #include 
 #include 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c
index 34efd5f700a..9cb167a8cdc 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d -Wno-pedantic 
-Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c
index 5f3168a320a..2837ff58e2d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d -Wno-pedantic 
-Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c
index 7210327a4ff..47f30ed79f1 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d -Wno-pedantic 
-Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c
index c5cb56a88c7..f7169f07506 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d -Wno-pedantic 
-Wno-psabi" } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
index 28f11150f8f..3d60e635869 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls

Re: [PATCH][RFC] c-family: Implement __has_feature and __has_extension [PR60512]

2023-06-20 Thread Iain Sandoe
Hi Alex

again, thanks for working on this and for fixing the SDK blocker.

> On 20 Jun 2023, at 13:30, Alex Coplan  wrote:
> 

> The patch can now survive bootstrap on Darwin (it looks like we'll need
> to adjust some Objective-C++ tests in light of the new pedwarn, but that
> looks to be straightforward).

Yes, I’ll deal with that soon (I was trying to decide whether to fix the the
header we have copied from GNUStep, or whether to mark it as a system
header).

>> (one reason to allow target opt-in/out of specific features)
>> 
>>> with the following omissions:
>> 
>>> - Objective-C-specific features.
>> 
>> I can clearly append the objective-c(++) cases to the end of the respective
>> lists, but then we need to make them conditional on language, version and
>> dialect (some will not be appropriate to GNU runtime).
>> 
>> this is why I think we need more flexible predicates on declaring features
>> and extensions.
> 
> Would it help mitigate these concerns if I implemented some Objective-C
> features as part of this patch (say, those implemented by your WIP
> patch)?
> 
> My feeling is that the vast majority of extensions / features have
> similar logic, so we should exploit that redundancy to keep things terse
> in the encoding for the general case. Where we need more flexible
> predicates (e.g. for objc_nonfragile_abi in your WIP patch), those can
> be handled on a case-by-case basis by adding a new enumerator and logic
> to handle that specially.
> 
> What do you think, does that sound OK to you?

Sketching out what you have in mind using one or two examples would be
helpful.  Again, the fact that some of the answers are target-dependent, is
what makes me think of needing a little more generality.

>> What about things like this:
>> 
>> attribute_availability_tvos, 
>> attribute_availability_watchos, 
>> attribute_availability_driverkit, 
> 
> FWIW, clang looks to define these unconditionally, so restricting these
> to a given target would be deviating from its precedent.

Hmm.. i did not check that although (for the sake of keeping target-specific
code localised) my current availabilty attribute implementation is Darwin-
specific.

Having said that, interoperability with clang is also a very useful goal - for
Darwin, the SDK headers have only been (fully) tested with clang up to
now and I am sure we will find more gotchas as we expand what we can
parse.

> However, I don't think it would be hard to extend the implementation in
> this patch to support target-specific features if required. I think
> perhaps a langhook that targets can call to add their own features would
> be a reasonable approach.

Indeed, that could work if the result is needed later than pre-processing.

In my patch, IIRC, I added another entry to the libcpp callbacks to handle
target-specific __has_ queries.

cheers
Iain




Re: [PATCH v2] RISC-V: Set the natural size of constant vector mask modes to one RVV data vector.

2023-06-20 Thread Jeff Law via Gcc-patches




On 6/20/23 00:47, juzhe.zh...@rivai.ai wrote:

LGTM. Thanks!

OK for the trunk, of course.
jeff


RE: [PATCH v2] RISC-V: Set the natural size of constant vector mask modes to one RVV data vector.

2023-06-20 Thread Li, Pan2 via Gcc-patches
Committed, thanks Jeff and Juzhe.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Tuesday, June 20, 2023 10:12 PM
To: juzhe.zh...@rivai.ai; Li Xu ; gcc-patches 

Cc: kito.cheng ; palmer 
Subject: Re: [PATCH v2] RISC-V: Set the natural size of constant vector mask 
modes to one RVV data vector.



On 6/20/23 00:47, juzhe.zh...@rivai.ai wrote:
> LGTM. Thanks!
OK for the trunk, of course.
jeff


RE: [PATCH] RISC-V: Fix out of range memory access of machine mode table

2023-06-20 Thread Li, Pan2 via Gcc-patches
Thanks Jakub for the explanation, I have a try like below patch but I am not 
quite sure it is expected, and where should I put the assertion.

> If yes, it needs to
> be unsigned short, if not, we should add an assertion (e.g. on streaming
> in the LTO table) that MAX_MACHINE_MODE <= 256.

diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
index 2cb83406db5..93ef97ec5d3 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -1985,8 +1985,6 @@ lto_input_mode_table (struct lto_file_decl_data 
*file_data)
 internal_error ("cannot read LTO mode table from %s",
file_data->file_name);
 
-  unsigned char *table = ggc_cleared_vec_alloc (1 << 8);
-  file_data->mode_table = table;
   const struct lto_simple_header_with_strings *header
 = (const struct lto_simple_header_with_strings *) data;
   int string_offset;
@@ -1994,6 +1992,9 @@ lto_input_mode_table (struct lto_file_decl_data 
*file_data)
   string_offset = sizeof (*header) + header->main_size;
 
   lto_input_block ib (data + sizeof (*header), header->main_size, NULL);
+  unsigned char *table = ggc_cleared_vec_alloc (
+1 << ib.mode_bits);
+  file_data->mode_table = table;
   data_in = lto_data_in_create (file_data, data + string_offset,
header->string_size, vNULL);
   bitpack_d bp = streamer_read_bitpack (&ib);
@@ -2001,13 +2002,13 @@ lto_input_mode_table (struct lto_file_decl_data 
*file_data)
   table[VOIDmode] = VOIDmode;
   table[BLKmode] = BLKmode;
   unsigned int m;
-  while ((m = bp_unpack_value (&bp, 8)) != VOIDmode)
+  while ((m = bp_unpack_value (&bp, ib.mode_bits)) != VOIDmode)
 {
   enum mode_class mclass
= bp_unpack_enum (&bp, mode_class, MAX_MODE_CLASS);
   poly_uint16 size = bp_unpack_poly_value (&bp, 16);
   poly_uint16 prec = bp_unpack_poly_value (&bp, 16);
-  machine_mode inner = (machine_mode) bp_unpack_value (&bp, 8);
+  machine_mode inner = (machine_mode) bp_unpack_value (&bp, ib.mode_bits);
   poly_uint16 nunits = bp_unpack_poly_value (&bp, 16);
   unsigned int ibit = 0, fbit = 0;
   unsigned int real_fmt_len = 0;
@@ -2018,8 +2019,8 @@ lto_input_mode_table (struct lto_file_decl_data 
*file_data)
case MODE_UFRACT:
case MODE_ACCUM:
case MODE_UACCUM:
- ibit = bp_unpack_value (&bp, 8);
- fbit = bp_unpack_value (&bp, 8);
+ ibit = bp_unpack_value (&bp, ib.mode_bits);
+ fbit = bp_unpack_value (&bp, ib.mode_bits);
  break;
case MODE_FLOAT:
case MODE_DECIMAL_FLOAT:
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index fc7133d07ba..f1d826d59e4 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -352,6 +352,8 @@ public:
 
   const char *data;
   const unsigned char *mode_table;
+  /* Indicates how many bits of one machine mode will have.  */
+  const unsigned int mode_bits = ceil_log2 (MAX_MACHINE_MODE) ;
   unsigned int p;
   unsigned int len;
 };
diff --git a/gcc/tree-streamer.cc b/gcc/tree-streamer.cc
index ed65a7692e3..a28ef9c7920 100644
--- a/gcc/tree-streamer.cc
+++ b/gcc/tree-streamer.cc
@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3.  If not see
During streaming in, we translate the on the disk mode using this
table.  For normal LTO it is set to identity, for ACCEL_COMPILER
depending on the mode_table content.  */
-unsigned char streamer_mode_table[1 << 8];
+unsigned char streamer_mode_table[MAX_MACHINE_MODE];
 
 /* Check that all the TS_* structures handled by the streamer_write_* and
streamer_read_* routines are exactly ALL the structures defined in
diff --git a/gcc/tree-streamer.h b/gcc/tree-streamer.h
index 170d61cf20b..9aa248cd2f5 100644
--- a/gcc/tree-streamer.h
+++ b/gcc/tree-streamer.h
@@ -75,7 +75,7 @@ void streamer_write_tree_body (struct output_block *, tree);
 void streamer_write_integer_cst (struct output_block *, tree);
 
 /* In tree-streamer.cc.  */
-extern unsigned char streamer_mode_table[1 << 8];
+extern unsigned char streamer_mode_table[MAX_MACHINE_MODE];
 void streamer_check_handled_ts_structures (void);
 bool streamer_tree_cache_insert (struct streamer_tree_cache_d *, tree,
 hashval_t, unsigned *);
@@ -108,15 +108,18 @@ inline void
 bp_pack_machine_mode (struct bitpack_d *bp, machine_mode mode)
 {
   streamer_mode_table[mode] = 1;
-  bp_pack_enum (bp, machine_mode, 1 << 8, mode);
+  int last = 1 << ceil_log2 (MAX_MACHINE_MODE);
+
+  bp_pack_enum (bp, machine_mode, last, mode);
 }
 
 inline machine_mode
 bp_unpack_machine_mode (struct bitpack_d *bp)
 {
-  return (machine_mode)
-  ((class lto_input_block *)
-   bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
+  lto_input_block *input_block =  (class lto_input_block *)bp->stream;
+  int index = bp_unpack_enum (bp, machine_mode, input_block->mode_bits);
+
+  return (machine_mode)input_block->mode_table[index];
 }
 
 #endif  /* GCC_TREE_STREAMER_H  */

Pan

-Or

  1   2   >